Fix #2786 (Updated recipe for Clarin)

2025-08-11 09:13:57 -04:00 · 2009-07-06 12:36:55 -06:00 · 2009-07-06 12:36:55 -06:00 · 3bd2627645
commit 3bd2627645
parent ce9c61580b
1 changed files with 21 additions and 13 deletions
--- a/src/calibre/web/feeds/recipes/recipe_clarin.py
+++ b/src/calibre/web/feeds/recipes/recipe_clarin.py
@ -7,9 +7,9 @@ clarin.com
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 from calibre.web.feeds.news import BasicNewsRecipe
 class Clarin(BasicNewsRecipe):
    title                 = 'Clarin'
    __author__            = 'Darko Miletic'
@ -22,14 +22,19 @@ class Clarin(BasicNewsRecipe):
    no_stylesheets        = True
    cover_url             = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
    remove_javascript     = True
-    
+    encoding              = 'cp1252'
    language              = _('Spanish')
    lang                  = 'es-AR'
    direction             = 'ltr'
    extra_css             = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
    html2lrf_options = [
-                          '--comment', description
+                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
-    
+
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
    remove_tags = [
                     dict(name='a'   , attrs={'class':'Imp'   })
@ -48,17 +53,20 @@ class Clarin(BasicNewsRecipe):
              ,(u'Deportes'      , u'http://www.clarin.com/diario/hoy/deportes.xml'     )
            ]
-    def get_article_url(self, article):
+    def print_version(self, url):
-        artl  = article.get('link',  None)
+        rest  = url.partition('-0')[-1]
        rest  = artl.partition('-0')[-1]
        lmain = rest.partition('.')[0]
-        return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
+        lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
        return lurl
    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
+        soup.html['lang'] = self.lang
-        soup.head.insert(0,mtag)    
+        soup.html['dir' ] = self.direction
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
        soup.head.insert(0,mlang)
        soup.head.insert(1,mcharset)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Spanish')