Updated AMbito and Ambito Financiero by DM. Fixes #795158 (Updated recipe for Ambito.com and new recipe for Ambito Fianciero both in spanish)

This commit is contained in:
Kovid Goyal 2011-06-09 13:33:46 -06:00
parent 3d7d916e51
commit 67adcb92f3
3 changed files with 116 additions and 26 deletions

View File

@ -1,7 +1,5 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
ambito.com
'''
@ -11,51 +9,56 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Ambito(BasicNewsRecipe):
title = 'Ambito.com'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
publisher = 'Ambito.com'
category = 'news, politics, Argentina'
description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires'
publisher = 'Editorial Nefir S.A.'
category = 'news, politics, economy, finances, Argentina'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'iso-8859-1'
cover_url = 'http://www.ambito.com/img/logo_.jpg'
remove_javascript = True
encoding = 'cp1252'
masthead_url = 'http://www.ambito.com/img/logo_.jpg'
use_embedded_content = False
language = 'es_AR'
publication_type = 'newsportal'
extra_css = """
body{font-family: "Trebuchet MS",Verdana,sans-serif}
.volanta{font-size: small}
.t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698}
"""
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
remove_tags = [dict(name=['object','link'])]
remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])]
remove_attributes = ['align']
feeds = [
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' )
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General')
,(u'Agro' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
,(u'Campo' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' )
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' )
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' )
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnologia' )
,(u'Salud' , u'http://www.ambito.com/rss/noticias.asp?S=Salud' )
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnolog%EDa' )
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' )
]
def print_version(self, url):
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
return url.replace('/noticia.asp?','/noticias/imprimir.asp?')
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
str = item.string
if str is None:
str = self.tag_to_string(item)
item.replaceWith(str)
return soup
language = 'es_AR'

View File

@ -0,0 +1,87 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
ambito.com/diario
'''
import time
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Ambito_Financiero(BasicNewsRecipe):
title = 'Ambito Financiero'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
publisher = 'Editorial Nefir S.A.'
category = 'news, politics, economy, Argentina'
no_stylesheets = True
encoding = 'cp1252'
masthead_url = 'http://www.ambito.com/diario/img/logo_af.gif'
publication_type = 'newspaper'
needs_subscription = 'optional'
use_embedded_content = False
language = 'es_AR'
PREFIX = 'http://www.ambito.com'
INDEX = PREFIX + '/diario/index.asp'
LOGIN = PREFIX + '/diario/login/entrada.asp'
extra_css = """
body{font-family: "Trebuchet MS",Verdana,sans-serif}
.volanta{font-size: small}
.t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])]
remove_attributes = ['align']
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(name='frmlogin')
br['USER_NAME'] = self.username
br['USER_PASS'] = self.password
br.submit()
return br
def print_version(self, url):
return url.replace('/diario/noticia.asp?','/noticias/imprimir.asp?')
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
str = item.string
if str is None:
str = self.tag_to_string(item)
item.replaceWith(str)
return soup
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('img',attrs={'class':'fotodespliegue'})
if cover_item:
self.cover_url = self.PREFIX + cover_item['src']
articles = []
checker = []
for feed_link in soup.findAll('a', attrs={'class':['t0_portada','t2_portada','bajada']}):
url = self.PREFIX + feed_link['href']
title = self.tag_to_string(feed_link)
date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
if url not in checker:
checker.append(url)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':u''
})
return [(self.title, articles)]

Binary file not shown.

After

Width:  |  Height:  |  Size: 508 B