This commit is contained in:
GRiker 2010-06-19 10:29:51 -06:00
parent 5562b12d09
commit 7a157dc3b6
4 changed files with 127 additions and 90 deletions

View File

@ -79,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
'doubleRule', 'doubleRule',
'dottedLine', 'dottedLine',
'entry-meta', 'entry-meta',
'entry-response module',
'icon enlargeThis', 'icon enlargeThis',
'leftNavTabs', 'leftNavTabs',
'module box nav', 'module box nav',
@ -110,6 +111,7 @@ class NYTimes(BasicNewsRecipe):
'navigation', 'navigation',
'portfolioInline', 'portfolioInline',
'relatedArticles', 'relatedArticles',
'respond',
'side_search', 'side_search',
'side_index', 'side_index',
'side_tool', 'side_tool',

View File

@ -66,6 +66,7 @@ class NYTimes(BasicNewsRecipe):
'doubleRule', 'doubleRule',
'dottedLine', 'dottedLine',
'entry-meta', 'entry-meta',
'entry-response module',
'icon enlargeThis', 'icon enlargeThis',
'leftNavTabs', 'leftNavTabs',
'module box nav', 'module box nav',
@ -97,6 +98,7 @@ class NYTimes(BasicNewsRecipe):
'navigation', 'navigation',
'portfolioInline', 'portfolioInline',
'relatedArticles', 'relatedArticles',
'respond',
'side_search', 'side_search',
'side_index', 'side_index',
'side_tool', 'side_tool',
@ -417,12 +419,11 @@ class NYTimes(BasicNewsRecipe):
return soup return soup
def postprocess_book(self, oeb, opts, log) : def populate_article_metadata(self,article,soup,first):
print "\npostprocess_book()\n" '''
Extract author and description from article, add to article metadata
def extract_byline(href) : '''
# <meta name="byline" content= def extract_author(soup):
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
byline = soup.find('meta',attrs={'name':['byl','CLMST']}) byline = soup.find('meta',attrs={'name':['byl','CLMST']})
if byline : if byline :
author = byline['content'] author = byline['content']
@ -432,50 +433,32 @@ class NYTimes(BasicNewsRecipe):
if byline: if byline:
author = byline.renderContents() author = byline.renderContents()
else: else:
print "couldn't find byline in %s" % href
print soup.prettify() print soup.prettify()
return None return None
# Kill commas - Kindle switches to '&' return author
return re.sub(',','',author)
def extract_description(href) : def extract_description(soup):
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
description = soup.find('meta',attrs={'name':['description','description ']}) description = soup.find('meta',attrs={'name':['description','description ']})
if description : if description :
# print repr(description['content'])
# print self.massageNCXText(description['content'])
return self.massageNCXText(description['content']) return self.massageNCXText(description['content'])
else: else:
# Take first paragraph of article # Take first paragraph of article
articleBody = soup.find('div',attrs={'id':'articleBody'}) articlebody = soup.find('div',attrs={'id':'articlebody'})
if not articleBody: if not articlebody:
# Try again with class instead of id # Try again with class instead of id
articleBody = soup.find('div',attrs={'class':'articleBody'}) articlebody = soup.find('div',attrs={'class':'articlebody'})
if not articleBody: if not articlebody:
print 'postprocess_book.extract_description(): Did not find <div id="articleBody">:' print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
print soup.prettify() print soup.prettify()
return None return None
paras = articleBody.findAll('p') paras = articlebody.findAll('p')
for p in paras: for p in paras:
if p.renderContents() > '' : if p.renderContents() > '' :
return self.massageNCXText(self.tag_to_string(p,use_alt=False)) return self.massageNCXText(self.tag_to_string(p,use_alt=False))
return None return None
# Method entry point here article.author = extract_author(soup)
# Single section toc looks different than multi-section tocs article.summary = article.text_summary = extract_description(soup)
if oeb.toc.depth() == 2 :
for article in oeb.toc :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href).decode('utf-8')
elif oeb.toc.depth() == 3 :
for section in oeb.toc :
for article in section :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)
def strip_anchors(self,soup): def strip_anchors(self,soup):
paras = soup.findAll(True) paras = soup.findAll(True)

View File

@ -752,7 +752,8 @@ class BasicNewsRecipe(Recipe):
def feed2index(self, feed): def feed2index(self, f, feeds):
feed = feeds[f]
if feed.image_url is not None: # Download feed image if feed.image_url is not None: # Download feed image
imgdir = os.path.join(self.output_dir, 'images') imgdir = os.path.join(self.output_dir, 'images')
if not os.path.isdir(imgdir): if not os.path.isdir(imgdir):
@ -808,7 +809,8 @@ class BasicNewsRecipe(Recipe):
templ = templates.TouchscreenFeedTemplate() templ = templates.TouchscreenFeedTemplate()
css = touchscreen_css + '\n\n' + (self.extra_css if self.extra_css else '') css = touchscreen_css + '\n\n' + (self.extra_css if self.extra_css else '')
return templ.generate(feed, self.description_limiter,
return templ.generate(f, feeds, self.description_limiter,
extra_css=css).render(doctype='xhtml') extra_css=css).render(doctype='xhtml')
@ -951,7 +953,7 @@ class BasicNewsRecipe(Recipe):
#feeds.restore_duplicates() #feeds.restore_duplicates()
for f, feed in enumerate(feeds): for f, feed in enumerate(feeds):
html = self.feed2index(feed) html = self.feed2index(f,feeds)
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f) feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi: with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
fi.write(html) fi.write(html)

View File

@ -107,6 +107,8 @@ class TouchscreenNavBarTemplate(Template):
align = 'center' if center else 'left' align = 'center' if center else 'left'
navbar = DIV(CLASS('calibre_navbar', 'calibre_rescale_100', navbar = DIV(CLASS('calibre_navbar', 'calibre_rescale_100',
style='text-align:'+align)) style='text-align:'+align))
if False:
if bottom: if bottom:
navbar.append(DIV(style="border-top:1px solid gray;border-bottom:1em solid white")) navbar.append(DIV(style="border-top:1px solid gray;border-bottom:1em solid white"))
text = 'This article was downloaded by ' text = 'This article was downloaded by '
@ -128,11 +130,43 @@ class TouchscreenNavBarTemplate(Template):
navbar.append(A('Section Menu', href=href)) navbar.append(A('Section Menu', href=href))
href = '%s../../index.html#feed_%d'%(prefix, feed) href = '%s../../index.html#feed_%d'%(prefix, feed)
navbar.iterchildren(reversed=True).next().tail = ' | ' navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Main Menu', href=href)) navbar.append(A("Sections", href=href))
if art > 0 and not bottom: if art > 0 and not bottom:
href = '%s../article_%d/index.html'%(prefix, art-1) href = '%s../article_%d/index.html'%(prefix, art-1)
navbar.iterchildren(reversed=True).next().tail = ' | ' navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Previous', href=href)) navbar.append(A('Previous', href=href))
else:
if bottom:
navbar.append(DIV(style="border-top:1px solid gray;border-bottom:1em solid white"))
text = 'This article was downloaded by '
p = PT(text, STRONG(__appname__), A(url, href=url), style='text-align:left')
p[0].tail = ' from '
navbar.append(p)
navbar.append(BR())
navbar.append(BR())
else:
# | Previous
if art > 0 and not bottom:
href = '%s../article_%d/index.html'%(prefix, art-1)
navbar.text = '| '
navbar.append(A('Previous', href=href))
# | Section | Main |
href = '%s../index.html#article_%d'%(prefix, art)
if art > 0:
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Articles', href=href))
href = '%s../../index.html#feed_%d'%(prefix, feed)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A("Sections", href=href))
# | Next
next = 'feed_%d'%(feed+1) if art == number_of_articles_in_feed - 1 \
else 'article_%d'%(art+1)
up = '../..' if art == number_of_articles_in_feed - 1 else '..'
href = '%s%s/%s/index.html'%(prefix, up, next)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Next', href=href))
navbar.iterchildren(reversed=True).next().tail = ' | ' navbar.iterchildren(reversed=True).next().tail = ' | '
if not bottom: if not bottom:
@ -200,7 +234,8 @@ class TouchscreenIndexTemplate(Template):
class FeedTemplate(Template): class FeedTemplate(Template):
def _generate(self, feed, cutoff, extra_css=None, style=None): def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
feed = feeds[f]
head = HEAD(TITLE(feed.title)) head = HEAD(TITLE(feed.title))
if style: if style:
head.append(STYLE(style, type='text/css')) head.append(STYLE(style, type='text/css'))
@ -250,7 +285,41 @@ class FeedTemplate(Template):
class TouchscreenFeedTemplate(Template): class TouchscreenFeedTemplate(Template):
def _generate(self, feed, cutoff, extra_css=None, style=None): def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
def trim_title(title,clip=15):
if len(title)>clip:
tokens = title.split(' ')
new_title_tokens = []
new_title_len = 0
for token in tokens:
if len(token) + new_title_len < clip:
new_title_tokens.append(token)
new_title_len += len(token) + 1
else:
new_title_tokens.append('...')
title = ' '.join(new_title_tokens)
break
return title
feed = feeds[f]
# Construct the navbar
navbar = DIV('| ', CLASS('calibre_navbar', 'calibre_rescale_100'),style='text-align:center')
if f > 0:
link = A(trim_title(feeds[f-1].title), href = '../feed_%d/index.html' % int(f-1))
link.tail = ' |'
navbar.append(link)
link = A("Sections", href="../index.html")
link.tail = ' |'
navbar.append(link)
if f < len(feeds)-1:
link = A(trim_title(feeds[f+1].title), href = '../feed_%d/index.html' % int(f+1))
link.tail = ' |'
navbar.append(link)
# Build the page
head = HEAD(TITLE(feed.title)) head = HEAD(TITLE(feed.title))
if style: if style:
head.append(STYLE(style, type='text/css')) head.append(STYLE(style, type='text/css'))
@ -262,6 +331,7 @@ class TouchscreenFeedTemplate(Template):
DIV(style="border-top:1px solid gray;border-bottom:1em solid white") DIV(style="border-top:1px solid gray;border-bottom:1em solid white")
) )
body.append(div) body.append(div)
if getattr(feed, 'image', None): if getattr(feed, 'image', None):
div.append(DIV(IMG( div.append(DIV(IMG(
alt = feed.image_alt if feed.image_alt else '', alt = feed.image_alt if feed.image_alt else '',
@ -280,7 +350,6 @@ class TouchscreenFeedTemplate(Template):
continue continue
tr = TR() tr = TR()
if True:
div_td = DIV( div_td = DIV(
A(article.title, CLASS('summary_headline','calibre_rescale_120', A(article.title, CLASS('summary_headline','calibre_rescale_120',
href=article.url)), href=article.url)),
@ -292,29 +361,10 @@ class TouchscreenFeedTemplate(Template):
div_td.append(DIV(cutoff(article.text_summary), div_td.append(DIV(cutoff(article.text_summary),
CLASS('summary_text', 'calibre_rescale_100'))) CLASS('summary_text', 'calibre_rescale_100')))
tr.append(TD(div_td)) tr.append(TD(div_td))
else:
td = TD(
A(article.title, CLASS('summary_headline','calibre_rescale_120',
href=article.url))
)
if article.author:
td.append(DIV(article.author,
CLASS('summary_byline', 'calibre_rescale_100')))
if article.summary:
td.append(DIV(cutoff(article.text_summary),
CLASS('summary_text', 'calibre_rescale_100')))
tr.append(td)
toc.append(tr) toc.append(tr)
div.append(toc) div.append(toc)
navbar = DIV('| ', CLASS('calibre_navbar', 'calibre_rescale_100'),style='text-align:center')
link = A('Up one level', href="../index.html")
link.tail = ' |'
navbar.append(link)
div.append(navbar) div.append(navbar)
self.root = HTML(head, body) self.root = HTML(head, body)
class EmbeddedContent(Template): class EmbeddedContent(Template):