diff --git a/resources/recipes/clarin.recipe b/resources/recipes/clarin.recipe index 7c0bf7b7ef..3a96bca162 100644 --- a/resources/recipes/clarin.recipe +++ b/resources/recipes/clarin.recipe @@ -1,6 +1,6 @@ __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' clarin.com ''' @@ -21,7 +21,8 @@ class Clarin(BasicNewsRecipe): cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg') encoding = 'cp1252' language = 'es' - extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} ' + masthead_url = 'http://www.clarin.com/shared/v10/img/Hd/lg_Clarin.gif' + extra_css = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large} .Volan,.Pie,.Autor{ font-size: x-small} .Copete,.Hora{font-size: large} ' conversion_options = { 'comment' : description diff --git a/resources/recipes/lanacion.recipe b/resources/recipes/lanacion.recipe index 298c980f00..000b4fb0f6 100644 --- a/resources/recipes/lanacion.recipe +++ b/resources/recipes/lanacion.recipe @@ -1,7 +1,5 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' lanacion.com.ar ''' @@ -12,28 +10,34 @@ class Lanacion(BasicNewsRecipe): title = 'La Nacion' __author__ = 'Darko Miletic' description = 'Noticias de Argentina y el resto del mundo' - publisher = 'La Nacion' + publisher = 'La Nacion S.A.' category = 'news, politics, Argentina' oldest_article = 2 max_articles_per_feed = 100 use_embedded_content = False - remove_javascript = True no_stylesheets = True + language = 'es' + encoding = 'cp1252' + masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif' + extra_css = ' h1{font-family: Georgia,serif} body{font-family: Arial,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em} .notaEpigrafe{font-size: x-small} ' - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher': publisher + , 'language' : language + } keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})] remove_tags = [ dict(name='div' , attrs={'class':'notaComentario floatFix noprint' }) ,dict(name='ul' , attrs={'class':'cajaHerramientas cajaTop noprint'}) ,dict(name='div' , attrs={'class':'cajaHerramientas noprint' }) + ,dict(attrs={'class':['titulosMultimedia','derecha','techo color']}) + ,dict(name=['iframe','embed','object']) ] + remove_attributes = ['height','width'] feeds = [ (u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' ) @@ -51,10 +55,4 @@ class Lanacion(BasicNewsRecipe): ] def preprocess_html(self, soup): - mtag = '' - soup.head.insert(0,mtag) - for item in soup.findAll(style=True): - del item['style'] - return soup - - language = 'es' + return self.adeify_images(soup)