Improved NYT recipes

This commit is contained in:
Kovid Goyal 2010-01-28 10:19:21 -07:00
parent 356d66482a
commit affc72895b
2 changed files with 55 additions and 9 deletions

View File

@ -79,13 +79,30 @@ class NYTimes(BasicNewsRecipe):
.authorId {text-align: left; \ .authorId {text-align: left; \
font-style: italic;}\n ' font-style: italic;}\n '
# def get_cover_url(self): def get_cover_url(self):
# st = time.localtime() cover = None
# year = str(st.tm_year) st = time.localtime()
# month = "%.2d" % st.tm_mon year = str(st.tm_year)
# day = "%.2d" % st.tm_mday month = "%.2d" % st.tm_mon
# cover = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/nytfrontpage/' + 'scan.jpg' day = "%.2d" % st.tm_mday
# return cover cover = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/nytfrontpage/scan.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
self.log("\nCover unavailable")
cover = None
return cover
def get_masthead_url(self):
masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()

View File

@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
nytimes.com nytimes.com
''' '''
import string, re import string, re, time
from calibre import strftime from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -31,7 +31,8 @@ class NYTimes(BasicNewsRecipe):
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}), remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
dict(id=['footer', 'toolsRight', 'articleInline', dict(id=['footer', 'toolsRight', 'articleInline',
'navigation', 'archive', 'side_search', 'blog_sidebar', 'navigation', 'archive', 'side_search', 'blog_sidebar',
'side_tool', 'side_index', 'side_tool', 'side_index', 'login', 'businessSearchBar',
'adxLeaderboard',
'relatedArticles', 'relatedTopics', 'adxSponLink']), 'relatedArticles', 'relatedTopics', 'adxSponLink']),
dict(name=['script', 'noscript', 'style'])] dict(name=['script', 'noscript', 'style'])]
encoding = decode encoding = decode
@ -51,11 +52,39 @@ class NYTimes(BasicNewsRecipe):
#open('/t/log.html', 'wb').write(raw) #open('/t/log.html', 'wb').write(raw)
return br return br
def get_masthead_url(self):
masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead
def get_cover_url(self):
cover = None
st = time.localtime()
year = str(st.tm_year)
month = "%.2d" % st.tm_mon
day = "%.2d" % st.tm_mday
cover = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/nytfrontpage/scan.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
self.log("\nCover unavailable")
cover = None
return cover
def short_title(self): def short_title(self):
return 'NY Times' return 'NY Times'
def parse_index(self): def parse_index(self):
self.encoding = 'cp1252'
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html') soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
self.encoding = decode
def feed_title(div): def feed_title(div):
return ''.join(div.findAll(text=True, recursive=False)).strip() return ''.join(div.findAll(text=True, recursive=False)).strip()