Updated recipe for The New Yorker

2025-08-30 23:00:21 -04:00 · 2009-05-25 20:31:00 -07:00 · 2009-05-25 20:31:00 -07:00 · fe7876189c
commit fe7876189c
parent 8c6801d9c8
1 changed files with 27 additions and 23 deletions
--- a/src/calibre/web/feeds/recipes/recipe_new_yorker.py
+++ b/src/calibre/web/feeds/recipes/recipe_new_yorker.py
@ -1,53 +1,57 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 newyorker.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class NewYorker(BasicNewsRecipe):
-
-    title                 = u'The New Yorker'
+    title                 = 'The New Yorker'
    __author__            = 'Darko Miletic'
    description           = 'The best of US journalism'    
    oldest_article        = 7
    language              = _('English')
    max_articles_per_feed = 100
-    no_stylesheets        = False
+    no_stylesheets        = True
    use_embedded_content  = False
-    extra_css = '''
-    .calibre_feed_list {font-size:xx-small}
-    .calibre_article_list {font-size:xx-small}
-    .calibre_feed_title {font-size:normal}
-    .calibre_recipe_title {font-size:normal}
-    .calibre_feed_description {font-size:xx-small}
-    '''
+    publisher             = 'Conde Nast Publications'
+    category              = 'news, politics, USA'
+    encoding              = 'cp1252'
                    
-
-    keep_only_tags = [
-                        dict(name='div'  , attrs={'id':'printbody'   })
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+
+    keep_only_tags = [dict(name='div', attrs={'id':'printbody'})]
+    remove_tags_after = dict(name='div',attrs={'id':'articlebody'})
    remove_tags = [
-                     dict(name='div'  , attrs={'class':'utils'       })
-                    ,dict(name='div'  , attrs={'id':'bottomFeatures' })
-                    ,dict(name='div'  , attrs={'id':'articleBottom'  })
+                     dict(name='div', attrs={'class':['utils','articleRailLinks','icons'] })
+                    ,dict(name='link')
                  ]

-    feeds          = [
-                        (u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')
-                     ]
+    feeds          = [(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')]

    def print_version(self, url):
        return url + '?printable=true'

+    def get_article_url(self, article):
+        return article.get('guid',  None)
+
    def postprocess_html(self, soup, x):
        body = soup.find('body')
        if body:
            html = soup.find('html')
            if html:
                body.extract()
-                html.insert(-1, body)
+                html.insert(2, body)
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(1,mcharset)
        return soup