diff --git a/resources/recipes/clarin.recipe b/resources/recipes/clarin.recipe index 90ba562482..7c0bf7b7ef 100644 --- a/resources/recipes/clarin.recipe +++ b/resources/recipes/clarin.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2008-2009, Darko Miletic ' @@ -8,7 +7,6 @@ clarin.com from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Clarin(BasicNewsRecipe): title = 'Clarin' @@ -21,21 +19,16 @@ class Clarin(BasicNewsRecipe): use_embedded_content = False no_stylesheets = True cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg') - remove_javascript = True encoding = 'cp1252' - language = 'es' - - lang = 'es-AR' - direction = 'ltr' + language = 'es' extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} ' - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher': publisher + , 'language' : language + } remove_tags = [ dict(name='a' , attrs={'class':'Imp' }) @@ -60,14 +53,4 @@ class Clarin(BasicNewsRecipe): lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain return lurl - def preprocess_html(self, soup): - soup.html['lang'] = self.lang - soup.html['dir' ] = self.direction - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/resources/recipes/pagina12.recipe b/resources/recipes/pagina12.recipe index 5f3391ec25..3a271e055d 100644 --- a/resources/recipes/pagina12.recipe +++ b/resources/recipes/pagina12.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2008-2009, Darko Miletic ' @@ -6,7 +5,6 @@ __copyright__ = '2008-2009, Darko Miletic ' pagina12.com.ar ''' -from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Pagina12(BasicNewsRecipe): @@ -19,36 +17,33 @@ class Pagina12(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True encoding = 'cp1252' - cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/tapagn.jpg') - remove_javascript = True use_embedded_content = False - language = 'es' + language = 'es' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})] - - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - - - remove_tags = [ - dict(name='div', attrs={'id':'volver'}) - ,dict(name='div', attrs={'id':'logo' }) - ] - - - feeds = [(u'Pagina/12', u'http://www.pagina12.com.ar/diario/rss/principal.xml')] + feeds = [ + (u'Edicion impresa', u'http://www.pagina12.com.ar/diario/rss/principal.xml' ) + ,(u'Espectaculos' , u'http://www.pagina12.com.ar/diario/rss/espectaculos.xml') + ,(u'Radar' , u'http://www.pagina12.com.ar/diario/rss/radar.xml' ) + ,(u'Radar libros' , u'http://www.pagina12.com.ar/diario/rss/libros.xml' ) + ,(u'Cash' , u'http://www.pagina12.com.ar/diario/rss/cash.xml' ) + ,(u'Turismo' , u'http://www.pagina12.com.ar/diario/rss/turismo.xml' ) + ,(u'Libero' , u'http://www.pagina12.com.ar/diario/rss/libero.xml' ) + ,(u'NO' , u'http://www.pagina12.com.ar/diario/rss/no.xml' ) + ,(u'Las/12' , u'http://www.pagina12.com.ar/diario/rss/las12.xml' ) + ,(u'Soy' , u'http://www.pagina12.com.ar/diario/rss/soy.xml' ) + ,(u'M2' , u'http://www.pagina12.com.ar/diario/rss/futuro.xml' ) + ] def print_version(self, url): return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/') - def preprocess_html(self, soup): - mtag = '' - soup.head.insert(0,mtag) - for item in soup.findAll(style=True): - del item['style'] - return soup