Improved NYT recipes

2025-07-09 03:04:10 -04:00 · 2010-01-28 10:19:21 -07:00 · 2010-01-28 10:19:21 -07:00 · affc72895b
commit affc72895b
parent 356d66482a
2 changed files with 55 additions and 9 deletions
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -79,13 +79,30 @@ class NYTimes(BasicNewsRecipe):
                 .authorId      {text-align:    left;       \
                                 font-style:    italic;}\n  '
-#     def get_cover_url(self):
+    def get_cover_url(self):
-#        st = time.localtime()
+        cover = None
-#        year = str(st.tm_year)
+        st = time.localtime()
-#        month = "%.2d" % st.tm_mon
+        year = str(st.tm_year)
-#        day = "%.2d" % st.tm_mday
+        month = "%.2d" % st.tm_mon
-#        cover = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/nytfrontpage/' + 'scan.jpg'
+        day = "%.2d" % st.tm_mday
-#        return cover
+        cover = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/nytfrontpage/scan.jpg'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(cover)
        except:
            self.log("\nCover unavailable")
            cover = None
        return cover
    def get_masthead_url(self):
        masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import string, re
+import string, re, time
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -31,7 +31,8 @@ class NYTimes(BasicNewsRecipe):
    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
                   dict(id=['footer', 'toolsRight', 'articleInline',
                       'navigation', 'archive', 'side_search', 'blog_sidebar',
-                       'side_tool', 'side_index',
+                       'side_tool', 'side_index', 'login', 'businessSearchBar',
                       'adxLeaderboard',
                       'relatedArticles', 'relatedTopics', 'adxSponLink']),
                   dict(name=['script', 'noscript', 'style'])]
    encoding = decode
@ -51,11 +52,39 @@ class NYTimes(BasicNewsRecipe):
            #open('/t/log.html', 'wb').write(raw)
        return br
    def get_masthead_url(self):
        masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(masthead)
        except:
            self.log("\nCover unavailable")
            masthead = None
        return masthead
    def get_cover_url(self):
        cover = None
        st = time.localtime()
        year = str(st.tm_year)
        month = "%.2d" % st.tm_mon
        day = "%.2d" % st.tm_mday
        cover = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/nytfrontpage/scan.jpg'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(cover)
        except:
            self.log("\nCover unavailable")
            cover = None
        return cover
    def short_title(self):
        return 'NY Times'
    def parse_index(self):
        self.encoding = 'cp1252'
        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
        self.encoding = decode
        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=False)).strip()