Fix #1891 (Updated various recipes for better EPUB support)

2025-07-09 03:04:10 -04:00 · 2009-02-19 10:24:48 -08:00 · 2009-02-19 10:24:48 -08:00 · f179a74a07
commit f179a74a07
parent 3c05e850d5
4 changed files with 58 additions and 56 deletions
--- a/src/calibre/web/feeds/recipes/recipe_harpers.py
+++ b/src/calibre/web/feeds/recipes/recipe_harpers.py
@ -1,7 +1,7 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 harpers.org
 '''
@ -12,18 +12,29 @@ class Harpers(BasicNewsRecipe):
    __author__            = u'Darko Miletic'
    language              = _('English')
    description           = u"Harper's Magazine: Founded June 1850."
+    publisher             = "Harper's Magazine "
+    category              = 'news, politics, USA'
    oldest_article        = 30
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    timefmt               = ' [%A, %d %B, %Y]' 
+    remove_javascript     = True
+
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' 
+    
    
    keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
    remove_tags = [
                     dict(name='table', attrs={'class':'rcnt'})
                    ,dict(name='table', attrs={'class':'rcnt topline'})
+                    ,dict(name=['link','object','embed'])
                  ]

-    feeds       = [
-                   (u"Harper's Magazine", u'http://www.harpers.org/rss/frontpage-rss20.xml')
-                   ]
+    feeds       = [(u"Harper's Magazine", u'http://www.harpers.org/rss/frontpage-rss20.xml')]
+
--- a/src/calibre/web/feeds/recipes/recipe_harpers_full.py
+++ b/src/calibre/web/feeds/recipes/recipe_harpers_full.py
@ -24,6 +24,7 @@ class Harpers_full(BasicNewsRecipe):
    use_embedded_content  = False
    simultaneous_downloads = 1
    delay                  = 1
+    language               = _('English')
    needs_subscription = True
    INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
    LOGIN = 'http://www.harpers.org'
@ -36,7 +37,7 @@ class Harpers_full(BasicNewsRecipe):
                        , '--publisher', publisher
                        ]
    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' 

    keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
    remove_tags = [
@ -72,9 +73,3 @@ class Harpers_full(BasicNewsRecipe):
                                })
        return [(soup.head.title.string, articles)]
        
-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
-        
-    language = _('English')
--- a/src/calibre/web/feeds/recipes/recipe_pobjeda.py
+++ b/src/calibre/web/feeds/recipes/recipe_pobjeda.py
@ -17,9 +17,6 @@ class Pobjeda(BasicNewsRecipe):
    description           = 'News from Montenegro'
    publisher             = 'Pobjeda a.d.'
    category              = 'news, politics, Montenegro'    
-    language              = _('Serbian')
-    oldest_article        = 2
-    max_articles_per_feed = 100
    no_stylesheets        = True
    remove_javascript     = True
    encoding              = 'utf8'
@ -30,11 +27,13 @@ class Pobjeda(BasicNewsRecipe):
    
    html2lrf_options = [
                          '--comment', description
+                        , '--base-font-size', '10'
                        , '--category', category
                        , '--publisher', publisher
                        ]
    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
+
    
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

@ -64,8 +63,6 @@ class Pobjeda(BasicNewsRecipe):
        soup.html['lang']     = 'sr-Latn-ME'
        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
        return soup

    def get_cover_url(self):
@ -87,7 +84,7 @@ class Pobjeda(BasicNewsRecipe):
            for item in soup.findAll('div', attrs={'class':'vijest'}):
                description = self.tag_to_string(item.h2)
                atag = item.h1.find('a')
-                if atag:
+                if atag and atag.has_key('href'):
                    url         = self.INDEX + '/' + atag['href']
                    title       = self.tag_to_string(atag)
                    date        = strftime(self.timefmt)                
--- a/src/calibre/web/feeds/recipes/recipe_pressonline.py
+++ b/src/calibre/web/feeds/recipes/recipe_pressonline.py
@ -32,7 +32,7 @@ class PressOnline(BasicNewsRecipe):
                        , '--publisher', publisher
                        ]
    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' 
     
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

@ -61,7 +61,6 @@ class PressOnline(BasicNewsRecipe):
        soup.html['lang']     = 'sr-Latn-RS'
        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
        soup.head.insert(0,mtag)
-        img = soup.find('img')
-        if img:
+        for img in soup.findAll('img', align=True):
            del img['align']
        return soup