Updated recipes for Clarin and Pagina 12. Fix #4199 (Updated recipes in spanish)

2025-08-30 23:00:21 -04:00 · 2009-12-13 12:23:06 -07:00 · 2009-12-13 12:23:06 -07:00 · 5e55b31ab3
commit 5e55b31ab3
parent afc66727fd
2 changed files with 30 additions and 52 deletions
--- a/resources/recipes/clarin.recipe
+++ b/resources/recipes/clarin.recipe
@ -1,4 +1,3 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
@ -8,7 +7,6 @@ clarin.com
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class Clarin(BasicNewsRecipe):
    title                 = 'Clarin'
@ -21,21 +19,16 @@ class Clarin(BasicNewsRecipe):
    use_embedded_content  = False
    no_stylesheets        = True
    cover_url             = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
    remove_javascript     = True
    encoding              = 'cp1252'
    language              = 'es'
    lang                  = 'es-AR'
    direction             = 'ltr'
    extra_css             = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
-    html2lrf_options = [
+    conversion_options = {
-                          '--comment', description
+                          'comment'  : description
-                        , '--category', category
+                        , 'tags'     : category
-                        , '--publisher', publisher
+                        , 'publisher': publisher
-                        ]
+                        , 'language' : language
-
+                        }
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
    remove_tags = [
                     dict(name='a'   , attrs={'class':'Imp'   })
@ -60,14 +53,4 @@ class Clarin(BasicNewsRecipe):
        lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
        return lurl
    def preprocess_html(self, soup):
        soup.html['lang'] = self.lang
        soup.html['dir' ] = self.direction
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
        soup.head.insert(0,mlang)
        soup.head.insert(1,mcharset)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -1,4 +1,3 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
@ -6,7 +5,6 @@ __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 pagina12.com.ar
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Pagina12(BasicNewsRecipe):
@ -19,36 +17,33 @@ class Pagina12(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
    cover_url             = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/tapagn.jpg')
    remove_javascript     = True
    use_embedded_content  = False
    language              = 'es'
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
-    html2lrf_options = [
+    feeds = [
-                          '--comment', description
+              (u'Edicion impresa', u'http://www.pagina12.com.ar/diario/rss/principal.xml'   )
-                        , '--category', category
+             ,(u'Espectaculos'   , u'http://www.pagina12.com.ar/diario/rss/espectaculos.xml')
-                        , '--publisher', publisher
+             ,(u'Radar'          , u'http://www.pagina12.com.ar/diario/rss/radar.xml'       )
             ,(u'Radar libros'   , u'http://www.pagina12.com.ar/diario/rss/libros.xml'      )
             ,(u'Cash'           , u'http://www.pagina12.com.ar/diario/rss/cash.xml'        )
             ,(u'Turismo'        , u'http://www.pagina12.com.ar/diario/rss/turismo.xml'     )
             ,(u'Libero'         , u'http://www.pagina12.com.ar/diario/rss/libero.xml'      )
             ,(u'NO'             , u'http://www.pagina12.com.ar/diario/rss/no.xml'          )
             ,(u'Las/12'         , u'http://www.pagina12.com.ar/diario/rss/las12.xml'       )
             ,(u'Soy'            , u'http://www.pagina12.com.ar/diario/rss/soy.xml'         )
             ,(u'M2'             , u'http://www.pagina12.com.ar/diario/rss/futuro.xml'      )
            ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    remove_tags = [
                     dict(name='div', attrs={'id':'volver'})
                    ,dict(name='div', attrs={'id':'logo'  })
                  ]
    feeds = [(u'Pagina/12', u'http://www.pagina12.com.ar/diario/rss/principal.xml')]
    def print_version(self, url):
        return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup