diff --git a/resources/recipes/lanacion.recipe b/resources/recipes/lanacion.recipe index 425aa9b193..cdee0e5e66 100644 --- a/resources/recipes/lanacion.recipe +++ b/resources/recipes/lanacion.recipe @@ -17,6 +17,7 @@ class Lanacion(BasicNewsRecipe): use_embedded_content = False no_stylesheets = True language = 'es_AR' + delay = 14 publication_type = 'newspaper' remove_empty_feeds = True masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif' @@ -25,7 +26,7 @@ class Lanacion(BasicNewsRecipe): h2{color: #626262; font-weight: normal; font-size: 1.1em} body{font-family: Arial,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} - .notaFecha{color: #808080} + .notaFecha{color: #808080; font-size: small} .notaEpigrafe{font-size: x-small} .topNota h1{font-family: Arial,sans-serif} """ @@ -38,7 +39,10 @@ class Lanacion(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [dict(name='div', attrs={'id':'content'})] + keep_only_tags = [ + dict(name='div', attrs={'class':['topNota','itemHeader','nota','itemBody']}) + ,dict(name='div', attrs={'id':'content'}) + ] remove_tags = [ dict(name='div' , attrs={'class':'notaComentario floatFix noprint' }) @@ -52,8 +56,7 @@ class Lanacion(BasicNewsRecipe): remove_attributes = ['height','width','visible','onclick','data-count','name'] feeds = [ - (u'Ultimas Noticias' , u'http://servicios.lanacion.com.ar/herramientas/rss/origen=2' ) - ,(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' ) + (u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' ) ,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' ) ,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' ) ,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' ) @@ -81,17 +84,12 @@ class Lanacion(BasicNewsRecipe): ] - def get_browser(self): - br = BasicNewsRecipe.get_browser() - br.set_debug_redirects(True) - br.set_debug_responses(True) - br.set_debug_http(True) - return br - def get_article_url(self, article): link = BasicNewsRecipe.get_article_url(self,article) if link.startswith('http://blogs.lanacion') and not link.endswith('/'): - return None + return self.browser.open_novisit(link).geturl() + if link.rfind('galeria=') > 0: + return None return link def preprocess_html(self, soup):