Fix #7776 (Updated recipe for NZZ site)

2025-07-09 03:04:10 -04:00 · 2010-12-03 09:20:56 -07:00 · 2010-12-03 09:20:56 -07:00 · 72a41ea70c
commit 72a41ea70c
parent 40780b84ea
1 changed files with 25 additions and 10 deletions
--- a/resources/recipes/nzz_ger.recipe
+++ b/resources/recipes/nzz_ger.recipe
@ -1,6 +1,6 @@
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.nzz.ch
@ -20,6 +20,19 @@ class Nzz(BasicNewsRecipe):
    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'de'
    extra_css             = """
                               body{font-family: Georgia,"Times New Roman",Times,serif }
                               .artikel h3,.artikel h4,.bildLegende,.question,.autor{font-family: Arial,Verdana,Helvetica,sans-serif}
                               .bildLegende{font-size: small}
                               .autor{font-size: 0.9375em; color: #666666}
                               .quote{font-size: large !important; 
                                      font-style: italic; 
                                      font-weight: normal !important; 
                                      border-bottom: 1px dotted #BFBFBF; 
                                      border-top: 1px dotted #BFBFBF; 
                                      line-height: 1.25em}
                                .quelle{color: #666666; font-style: italic; white-space: nowrap}
                            """
    conversion_options = {
                             'comments'  : description
@ -28,12 +41,14 @@ class Nzz(BasicNewsRecipe):
                            ,'publisher' : publisher
                         }
-    keep_only_tags = [dict(name='div', attrs={'class':'article'})]
+    keep_only_tags = [dict(name='div', attrs={'class':'zone'})]
-
+    remove_tags_before = dict(name='p', attrs={'class':'dachzeile'})
    remove_tags_after=dict(name='p', attrs={'class':'fussnote'})
    remove_attributes=['width','height','lang']
    remove_tags = [
-                     dict(name=['object','link','base'])
+                     dict(name=['object','link','base','meta','iframe'])
-                    ,dict(name='div',attrs={'class':['more','teaser','advXertXoriXals','legal']})
+                    ,dict(attrs={'id':'content_rectangle_1'})
-                    ,dict(name='div',attrs={'id':['popup-src','readercomments','google-ad','advXertXoriXals']})
+                    ,dict(attrs={'class':['weiterfuehrendeLinks','fussnote','video']})                    
                  ]
    feeds = [
@ -50,7 +65,7 @@ class Nzz(BasicNewsRecipe):
              ,(u'Reisen'        , u'http://www.nzz.ch/magazin/reisen?rss=true')
            ]
-    def print_version(self, url):
+    def preprocess_html(self, soup):
-        return url + '?printview=true'
+        for item in soup.findAll(style=True):
-
+            del item['style']
-
+        return self.adeify_images(soup)