This commit is contained in:
GRiker 2010-06-19 10:29:51 -06:00
parent 5562b12d09
commit 7a157dc3b6
4 changed files with 127 additions and 90 deletions

View File

@ -79,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
'doubleRule',
'dottedLine',
'entry-meta',
'entry-response module',
'icon enlargeThis',
'leftNavTabs',
'module box nav',
@ -110,6 +111,7 @@ class NYTimes(BasicNewsRecipe):
'navigation',
'portfolioInline',
'relatedArticles',
'respond',
'side_search',
'side_index',
'side_tool',

View File

@ -66,6 +66,7 @@ class NYTimes(BasicNewsRecipe):
'doubleRule',
'dottedLine',
'entry-meta',
'entry-response module',
'icon enlargeThis',
'leftNavTabs',
'module box nav',
@ -97,6 +98,7 @@ class NYTimes(BasicNewsRecipe):
'navigation',
'portfolioInline',
'relatedArticles',
'respond',
'side_search',
'side_index',
'side_tool',
@ -417,12 +419,11 @@ class NYTimes(BasicNewsRecipe):
return soup
def postprocess_book(self, oeb, opts, log) :
print "\npostprocess_book()\n"
def extract_byline(href) :
# <meta name="byline" content=
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
def populate_article_metadata(self,article,soup,first):
'''
Extract author and description from article, add to article metadata
'''
def extract_author(soup):
byline = soup.find('meta',attrs={'name':['byl','CLMST']})
if byline :
author = byline['content']
@ -432,50 +433,32 @@ class NYTimes(BasicNewsRecipe):
if byline:
author = byline.renderContents()
else:
print "couldn't find byline in %s" % href
print soup.prettify()
return None
# Kill commas - Kindle switches to '&'
return re.sub(',','',author)
return author
def extract_description(href) :
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
def extract_description(soup):
description = soup.find('meta',attrs={'name':['description','description ']})
if description :
# print repr(description['content'])
# print self.massageNCXText(description['content'])
return self.massageNCXText(description['content'])
else:
# Take first paragraph of article
articleBody = soup.find('div',attrs={'id':'articleBody'})
if not articleBody:
articlebody = soup.find('div',attrs={'id':'articlebody'})
if not articlebody:
# Try again with class instead of id
articleBody = soup.find('div',attrs={'class':'articleBody'})
if not articleBody:
print 'postprocess_book.extract_description(): Did not find <div id="articleBody">:'
articlebody = soup.find('div',attrs={'class':'articlebody'})
if not articlebody:
print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
print soup.prettify()
return None
paras = articleBody.findAll('p')
paras = articlebody.findAll('p')
for p in paras:
if p.renderContents() > '' :
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
return None
# Method entry point here
# Single section toc looks different than multi-section tocs
if oeb.toc.depth() == 2 :
for article in oeb.toc :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href).decode('utf-8')
elif oeb.toc.depth() == 3 :
for section in oeb.toc :
for article in section :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)
article.author = extract_author(soup)
article.summary = article.text_summary = extract_description(soup)
def strip_anchors(self,soup):
paras = soup.findAll(True)

View File

@ -752,7 +752,8 @@ class BasicNewsRecipe(Recipe):
def feed2index(self, feed):
def feed2index(self, f, feeds):
feed = feeds[f]
if feed.image_url is not None: # Download feed image
imgdir = os.path.join(self.output_dir, 'images')
if not os.path.isdir(imgdir):
@ -808,7 +809,8 @@ class BasicNewsRecipe(Recipe):
templ = templates.TouchscreenFeedTemplate()
css = touchscreen_css + '\n\n' + (self.extra_css if self.extra_css else '')
return templ.generate(feed, self.description_limiter,
return templ.generate(f, feeds, self.description_limiter,
extra_css=css).render(doctype='xhtml')
@ -951,7 +953,7 @@ class BasicNewsRecipe(Recipe):
#feeds.restore_duplicates()
for f, feed in enumerate(feeds):
html = self.feed2index(feed)
html = self.feed2index(f,feeds)
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
fi.write(html)

View File

@ -107,32 +107,66 @@ class TouchscreenNavBarTemplate(Template):
align = 'center' if center else 'left'
navbar = DIV(CLASS('calibre_navbar', 'calibre_rescale_100',
style='text-align:'+align))
if bottom:
navbar.append(DIV(style="border-top:1px solid gray;border-bottom:1em solid white"))
text = 'This article was downloaded by '
p = PT(text, STRONG(__appname__), A(url, href=url), style='text-align:left')
p[0].tail = ' from '
navbar.append(p)
navbar.append(BR())
navbar.append(BR())
if False:
if bottom:
navbar.append(DIV(style="border-top:1px solid gray;border-bottom:1em solid white"))
text = 'This article was downloaded by '
p = PT(text, STRONG(__appname__), A(url, href=url), style='text-align:left')
p[0].tail = ' from '
navbar.append(p)
navbar.append(BR())
navbar.append(BR())
else:
next = 'feed_%d'%(feed+1) if art == number_of_articles_in_feed - 1 \
else 'article_%d'%(art+1)
up = '../..' if art == number_of_articles_in_feed - 1 else '..'
href = '%s%s/%s/index.html'%(prefix, up, next)
navbar.text = '| '
navbar.append(A('Next', href=href))
href = '%s../index.html#article_%d'%(prefix, art)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Section Menu', href=href))
href = '%s../../index.html#feed_%d'%(prefix, feed)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A("Sections", href=href))
if art > 0 and not bottom:
href = '%s../article_%d/index.html'%(prefix, art-1)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Previous', href=href))
else:
if bottom:
navbar.append(DIV(style="border-top:1px solid gray;border-bottom:1em solid white"))
text = 'This article was downloaded by '
p = PT(text, STRONG(__appname__), A(url, href=url), style='text-align:left')
p[0].tail = ' from '
navbar.append(p)
navbar.append(BR())
navbar.append(BR())
else:
# | Previous
if art > 0 and not bottom:
href = '%s../article_%d/index.html'%(prefix, art-1)
navbar.text = '| '
navbar.append(A('Previous', href=href))
# | Section | Main |
href = '%s../index.html#article_%d'%(prefix, art)
if art > 0:
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Articles', href=href))
href = '%s../../index.html#feed_%d'%(prefix, feed)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A("Sections", href=href))
# | Next
next = 'feed_%d'%(feed+1) if art == number_of_articles_in_feed - 1 \
else 'article_%d'%(art+1)
up = '../..' if art == number_of_articles_in_feed - 1 else '..'
href = '%s%s/%s/index.html'%(prefix, up, next)
navbar.text = '| '
navbar.append(A('Next', href=href))
href = '%s../index.html#article_%d'%(prefix, art)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Section Menu', href=href))
href = '%s../../index.html#feed_%d'%(prefix, feed)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Main Menu', href=href))
if art > 0 and not bottom:
href = '%s../article_%d/index.html'%(prefix, art-1)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Previous', href=href))
navbar.append(A('Next', href=href))
navbar.iterchildren(reversed=True).next().tail = ' | '
if not bottom:
@ -200,7 +234,8 @@ class TouchscreenIndexTemplate(Template):
class FeedTemplate(Template):
def _generate(self, feed, cutoff, extra_css=None, style=None):
def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
feed = feeds[f]
head = HEAD(TITLE(feed.title))
if style:
head.append(STYLE(style, type='text/css'))
@ -250,7 +285,41 @@ class FeedTemplate(Template):
class TouchscreenFeedTemplate(Template):
def _generate(self, feed, cutoff, extra_css=None, style=None):
def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
def trim_title(title,clip=15):
if len(title)>clip:
tokens = title.split(' ')
new_title_tokens = []
new_title_len = 0
for token in tokens:
if len(token) + new_title_len < clip:
new_title_tokens.append(token)
new_title_len += len(token) + 1
else:
new_title_tokens.append('...')
title = ' '.join(new_title_tokens)
break
return title
feed = feeds[f]
# Construct the navbar
navbar = DIV('| ', CLASS('calibre_navbar', 'calibre_rescale_100'),style='text-align:center')
if f > 0:
link = A(trim_title(feeds[f-1].title), href = '../feed_%d/index.html' % int(f-1))
link.tail = ' |'
navbar.append(link)
link = A("Sections", href="../index.html")
link.tail = ' |'
navbar.append(link)
if f < len(feeds)-1:
link = A(trim_title(feeds[f+1].title), href = '../feed_%d/index.html' % int(f+1))
link.tail = ' |'
navbar.append(link)
# Build the page
head = HEAD(TITLE(feed.title))
if style:
head.append(STYLE(style, type='text/css'))
@ -262,6 +331,7 @@ class TouchscreenFeedTemplate(Template):
DIV(style="border-top:1px solid gray;border-bottom:1em solid white")
)
body.append(div)
if getattr(feed, 'image', None):
div.append(DIV(IMG(
alt = feed.image_alt if feed.image_alt else '',
@ -280,41 +350,21 @@ class TouchscreenFeedTemplate(Template):
continue
tr = TR()
if True:
div_td = DIV(
A(article.title, CLASS('summary_headline','calibre_rescale_120',
href=article.url)),
style="display:inline-block")
if article.author:
div_td.append(DIV(article.author,
CLASS('summary_byline', 'calibre_rescale_100')))
if article.summary:
div_td.append(DIV(cutoff(article.text_summary),
CLASS('summary_text', 'calibre_rescale_100')))
tr.append(TD(div_td))
else:
td = TD(
A(article.title, CLASS('summary_headline','calibre_rescale_120',
href=article.url))
)
if article.author:
td.append(DIV(article.author,
CLASS('summary_byline', 'calibre_rescale_100')))
if article.summary:
td.append(DIV(cutoff(article.text_summary),
CLASS('summary_text', 'calibre_rescale_100')))
tr.append(td)
div_td = DIV(
A(article.title, CLASS('summary_headline','calibre_rescale_120',
href=article.url)),
style="display:inline-block")
if article.author:
div_td.append(DIV(article.author,
CLASS('summary_byline', 'calibre_rescale_100')))
if article.summary:
div_td.append(DIV(cutoff(article.text_summary),
CLASS('summary_text', 'calibre_rescale_100')))
tr.append(TD(div_td))
toc.append(tr)
div.append(toc)
navbar = DIV('| ', CLASS('calibre_navbar', 'calibre_rescale_100'),style='text-align:center')
link = A('Up one level', href="../index.html")
link.tail = ' |'
navbar.append(link)
div.append(navbar)
self.root = HTML(head, body)
class EmbeddedContent(Template):