Fix #9295 (problem with article redirects on La Nacion website)

This commit is contained in:
Kovid Goyal 2011-03-10 09:34:24 -07:00
parent c93547878c
commit 283a41916e

View File

@ -17,6 +17,7 @@ class Lanacion(BasicNewsRecipe):
use_embedded_content = False
no_stylesheets = True
language = 'es_AR'
delay = 14
publication_type = 'newspaper'
remove_empty_feeds = True
masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif'
@ -25,7 +26,7 @@ class Lanacion(BasicNewsRecipe):
h2{color: #626262; font-weight: normal; font-size: 1.1em}
body{font-family: Arial,sans-serif}
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
.notaFecha{color: #808080}
.notaFecha{color: #808080; font-size: small}
.notaEpigrafe{font-size: x-small}
.topNota h1{font-family: Arial,sans-serif}
"""
@ -38,7 +39,10 @@ class Lanacion(BasicNewsRecipe):
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
keep_only_tags = [
dict(name='div', attrs={'class':['topNota','itemHeader','nota','itemBody']})
,dict(name='div', attrs={'id':'content'})
]
remove_tags = [
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
@ -52,8 +56,7 @@ class Lanacion(BasicNewsRecipe):
remove_attributes = ['height','width','visible','onclick','data-count','name']
feeds = [
(u'Ultimas Noticias' , u'http://servicios.lanacion.com.ar/herramientas/rss/origen=2' )
,(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' )
,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' )
,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' )
@ -81,17 +84,12 @@ class Lanacion(BasicNewsRecipe):
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.set_debug_redirects(True)
br.set_debug_responses(True)
br.set_debug_http(True)
return br
def get_article_url(self, article):
link = BasicNewsRecipe.get_article_url(self,article)
if link.startswith('http://blogs.lanacion') and not link.endswith('/'):
return None
return self.browser.open_novisit(link).geturl()
if link.rfind('galeria=') > 0:
return None
return link
def preprocess_html(self, soup):