Updated FAZ.NET recipe to produce EPUB that does not crash the SONY readers.

2025-07-07 10:14:46 -04:00 · 2009-03-25 08:48:29 -07:00 · 2009-03-25 08:48:29 -07:00 · 2a7c0bab5e
commit 2a7c0bab5e
parent a5a5420944
3 changed files with 11714 additions and 29 deletions
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/web/feeds/recipes/recipe_faznet.py
+++ b/src/calibre/web/feeds/recipes/recipe_faznet.py
@ -1,29 +1,50 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+__copyright__ = '2008-2009, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
 '''
 Profile to download FAZ.net
 '''
-import re
+
 from calibre.web.feeds.news import BasicNewsRecipe
 class FazNet(BasicNewsRecipe): 
-
+    title                 = 'FAZ NET'
-    title = 'FAZ NET'
+    __author__            = 'Kovid Goyal, Darko Miletic'
-    __author__ = 'Kovid Goyal'
+    description           = 'Frankfurter Allgemeine Zeitung'
-    description = 'Frankfurter Allgemeine Zeitung'
+    publisher             = 'FAZ Electronic Media GmbH'
-    use_embedded_content   = False
+    category              = 'news, politics, Germany'
-    language = _('German') 
+    use_embedded_content  = False
    language              = _('German') 
    max_articles_per_feed = 30 
    no_stylesheets        = True
    encoding              = 'utf-8'
    remove_javascript     = True
-    preprocess_regexps = [
+    html2lrf_options = [
-       (re.compile(r'Zum Thema</span>.*?</BODY>', re.IGNORECASE | re.DOTALL), 
+                          '--comment', description
-        lambda match : ''),
+                        , '--category', category
-    ]    
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
    remove_tags = [
                     dict(name=['object','link','embed','base'])
                    ,dict(name='div', attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo']})
                  ]
    feeds = [ ('FAZ.NET', 'http://www.faz.net/s/Rub/Tpl~Epartner~SRss_.xml') ] 
    def print_version(self, url):
-        return url.replace('.html?rss_aktuell', '~Afor~Eprint.html') 
+        article, sep, rest = url.partition('?')    
        return article.replace('.html', '~Afor~Eprint.html') 
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
        soup.head.insert(0,mtag)        
        del soup.body['onload']
        for item in soup.findAll(style=True):
            del item['style']
        return soup