Updated La Nacion. Fixes #826008 (Updated recipe for La Nacion)

This commit is contained in:
Kovid Goyal 2011-08-13 18:36:47 -06:00
parent 16e2d554ff
commit c3d46f1460

View File

@ -17,18 +17,15 @@ class Lanacion(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
language = 'es_AR' language = 'es_AR'
delay = 14
publication_type = 'newspaper' publication_type = 'newspaper'
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif' masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln-home.gif'
extra_css = """ extra_css = """
h1{font-family: Georgia,serif} h1{font-family: TheSans,Arial,sans-serif}
h2{color: #626262; font-weight: normal; font-size: 1.1em}
body{font-family: Arial,sans-serif} body{font-family: Arial,sans-serif}
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} img{display: block}
.notaFecha{color: #808080; font-size: small} .firma,.fecha{font-size: small}
.notaEpigrafe{font-size: x-small} .epigrafe-columna{font-size: x-small}
.topNota h1{font-family: Arial,sans-serif}
""" """
@ -39,21 +36,13 @@ class Lanacion(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
keep_only_tags = [
dict(name='div', attrs={'class':['topNota','itemHeader','nota','itemBody']})
,dict(name='div', attrs={'id':'content'})
]
remove_tags = [ remove_tags = [
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' }) dict(name=['iframe','embed','object','meta','link'])
,dict(name='ul' , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']}) ,dict(attrs={'id':['herramientas','relacionadas','ampliar']})
,dict(name='div' , attrs={'class':['titulosMultimedia','herramientas noprint','cajaHerramientas noprint','cajaHerramientas floatFix'] })
,dict(attrs={'class':['izquierda','espacio17','espacio10','espacio20','floatFix ultimasNoticias','relacionadas','titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
] ]
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']}) remove_tags_before = dict(attrs={'id':'encabezado'})
remove_attributes = ['height','width','visible','onclick','data-count','name'] remove_tags_after = dict(attrs={'id':'relacionadas'})
feeds = [ feeds = [
(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' ) (u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
@ -91,6 +80,15 @@ class Lanacion(BasicNewsRecipe):
if link.rfind('galeria=') > 0: if link.rfind('galeria=') > 0:
return None return None
return link return link
def get_cover_url(self):
soup = self.index_to_soup('http://www.lanacion.com.ar/edicion-impresa')
atap = soup.find(attrs={'class':'tapa'})
if atap:
li = atap.find('img')
if li:
return li['src']
return None
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):