mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
GwR wip
This commit is contained in:
parent
5562b12d09
commit
7a157dc3b6
@ -79,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'doubleRule',
|
'doubleRule',
|
||||||
'dottedLine',
|
'dottedLine',
|
||||||
'entry-meta',
|
'entry-meta',
|
||||||
|
'entry-response module',
|
||||||
'icon enlargeThis',
|
'icon enlargeThis',
|
||||||
'leftNavTabs',
|
'leftNavTabs',
|
||||||
'module box nav',
|
'module box nav',
|
||||||
@ -110,6 +111,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'navigation',
|
'navigation',
|
||||||
'portfolioInline',
|
'portfolioInline',
|
||||||
'relatedArticles',
|
'relatedArticles',
|
||||||
|
'respond',
|
||||||
'side_search',
|
'side_search',
|
||||||
'side_index',
|
'side_index',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
|
@ -66,6 +66,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'doubleRule',
|
'doubleRule',
|
||||||
'dottedLine',
|
'dottedLine',
|
||||||
'entry-meta',
|
'entry-meta',
|
||||||
|
'entry-response module',
|
||||||
'icon enlargeThis',
|
'icon enlargeThis',
|
||||||
'leftNavTabs',
|
'leftNavTabs',
|
||||||
'module box nav',
|
'module box nav',
|
||||||
@ -97,6 +98,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'navigation',
|
'navigation',
|
||||||
'portfolioInline',
|
'portfolioInline',
|
||||||
'relatedArticles',
|
'relatedArticles',
|
||||||
|
'respond',
|
||||||
'side_search',
|
'side_search',
|
||||||
'side_index',
|
'side_index',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
@ -417,12 +419,11 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def postprocess_book(self, oeb, opts, log) :
|
def populate_article_metadata(self,article,soup,first):
|
||||||
print "\npostprocess_book()\n"
|
'''
|
||||||
|
Extract author and description from article, add to article metadata
|
||||||
def extract_byline(href) :
|
'''
|
||||||
# <meta name="byline" content=
|
def extract_author(soup):
|
||||||
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
|
||||||
byline = soup.find('meta',attrs={'name':['byl','CLMST']})
|
byline = soup.find('meta',attrs={'name':['byl','CLMST']})
|
||||||
if byline :
|
if byline :
|
||||||
author = byline['content']
|
author = byline['content']
|
||||||
@ -432,50 +433,32 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if byline:
|
if byline:
|
||||||
author = byline.renderContents()
|
author = byline.renderContents()
|
||||||
else:
|
else:
|
||||||
print "couldn't find byline in %s" % href
|
|
||||||
print soup.prettify()
|
print soup.prettify()
|
||||||
return None
|
return None
|
||||||
# Kill commas - Kindle switches to '&'
|
return author
|
||||||
return re.sub(',','',author)
|
|
||||||
|
|
||||||
def extract_description(href) :
|
def extract_description(soup):
|
||||||
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
|
||||||
description = soup.find('meta',attrs={'name':['description','description ']})
|
description = soup.find('meta',attrs={'name':['description','description ']})
|
||||||
if description :
|
if description :
|
||||||
# print repr(description['content'])
|
|
||||||
# print self.massageNCXText(description['content'])
|
|
||||||
return self.massageNCXText(description['content'])
|
return self.massageNCXText(description['content'])
|
||||||
else:
|
else:
|
||||||
# Take first paragraph of article
|
# Take first paragraph of article
|
||||||
articleBody = soup.find('div',attrs={'id':'articleBody'})
|
articlebody = soup.find('div',attrs={'id':'articlebody'})
|
||||||
if not articleBody:
|
if not articlebody:
|
||||||
# Try again with class instead of id
|
# Try again with class instead of id
|
||||||
articleBody = soup.find('div',attrs={'class':'articleBody'})
|
articlebody = soup.find('div',attrs={'class':'articlebody'})
|
||||||
if not articleBody:
|
if not articlebody:
|
||||||
print 'postprocess_book.extract_description(): Did not find <div id="articleBody">:'
|
print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
|
||||||
print soup.prettify()
|
print soup.prettify()
|
||||||
return None
|
return None
|
||||||
paras = articleBody.findAll('p')
|
paras = articlebody.findAll('p')
|
||||||
for p in paras:
|
for p in paras:
|
||||||
if p.renderContents() > '' :
|
if p.renderContents() > '' :
|
||||||
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
|
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Method entry point here
|
article.author = extract_author(soup)
|
||||||
# Single section toc looks different than multi-section tocs
|
article.summary = article.text_summary = extract_description(soup)
|
||||||
if oeb.toc.depth() == 2 :
|
|
||||||
for article in oeb.toc :
|
|
||||||
if article.author is None :
|
|
||||||
article.author = extract_byline(article.href)
|
|
||||||
if article.description is None :
|
|
||||||
article.description = extract_description(article.href).decode('utf-8')
|
|
||||||
elif oeb.toc.depth() == 3 :
|
|
||||||
for section in oeb.toc :
|
|
||||||
for article in section :
|
|
||||||
if article.author is None :
|
|
||||||
article.author = extract_byline(article.href)
|
|
||||||
if article.description is None :
|
|
||||||
article.description = extract_description(article.href)
|
|
||||||
|
|
||||||
def strip_anchors(self,soup):
|
def strip_anchors(self,soup):
|
||||||
paras = soup.findAll(True)
|
paras = soup.findAll(True)
|
||||||
|
@ -752,7 +752,8 @@ class BasicNewsRecipe(Recipe):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def feed2index(self, feed):
|
def feed2index(self, f, feeds):
|
||||||
|
feed = feeds[f]
|
||||||
if feed.image_url is not None: # Download feed image
|
if feed.image_url is not None: # Download feed image
|
||||||
imgdir = os.path.join(self.output_dir, 'images')
|
imgdir = os.path.join(self.output_dir, 'images')
|
||||||
if not os.path.isdir(imgdir):
|
if not os.path.isdir(imgdir):
|
||||||
@ -808,7 +809,8 @@ class BasicNewsRecipe(Recipe):
|
|||||||
|
|
||||||
templ = templates.TouchscreenFeedTemplate()
|
templ = templates.TouchscreenFeedTemplate()
|
||||||
css = touchscreen_css + '\n\n' + (self.extra_css if self.extra_css else '')
|
css = touchscreen_css + '\n\n' + (self.extra_css if self.extra_css else '')
|
||||||
return templ.generate(feed, self.description_limiter,
|
|
||||||
|
return templ.generate(f, feeds, self.description_limiter,
|
||||||
extra_css=css).render(doctype='xhtml')
|
extra_css=css).render(doctype='xhtml')
|
||||||
|
|
||||||
|
|
||||||
@ -951,7 +953,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
#feeds.restore_duplicates()
|
#feeds.restore_duplicates()
|
||||||
|
|
||||||
for f, feed in enumerate(feeds):
|
for f, feed in enumerate(feeds):
|
||||||
html = self.feed2index(feed)
|
html = self.feed2index(f,feeds)
|
||||||
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||||
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
|
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
|
||||||
fi.write(html)
|
fi.write(html)
|
||||||
|
@ -107,6 +107,8 @@ class TouchscreenNavBarTemplate(Template):
|
|||||||
align = 'center' if center else 'left'
|
align = 'center' if center else 'left'
|
||||||
navbar = DIV(CLASS('calibre_navbar', 'calibre_rescale_100',
|
navbar = DIV(CLASS('calibre_navbar', 'calibre_rescale_100',
|
||||||
style='text-align:'+align))
|
style='text-align:'+align))
|
||||||
|
|
||||||
|
if False:
|
||||||
if bottom:
|
if bottom:
|
||||||
navbar.append(DIV(style="border-top:1px solid gray;border-bottom:1em solid white"))
|
navbar.append(DIV(style="border-top:1px solid gray;border-bottom:1em solid white"))
|
||||||
text = 'This article was downloaded by '
|
text = 'This article was downloaded by '
|
||||||
@ -128,11 +130,43 @@ class TouchscreenNavBarTemplate(Template):
|
|||||||
navbar.append(A('Section Menu', href=href))
|
navbar.append(A('Section Menu', href=href))
|
||||||
href = '%s../../index.html#feed_%d'%(prefix, feed)
|
href = '%s../../index.html#feed_%d'%(prefix, feed)
|
||||||
navbar.iterchildren(reversed=True).next().tail = ' | '
|
navbar.iterchildren(reversed=True).next().tail = ' | '
|
||||||
navbar.append(A('Main Menu', href=href))
|
navbar.append(A("Sections", href=href))
|
||||||
if art > 0 and not bottom:
|
if art > 0 and not bottom:
|
||||||
href = '%s../article_%d/index.html'%(prefix, art-1)
|
href = '%s../article_%d/index.html'%(prefix, art-1)
|
||||||
navbar.iterchildren(reversed=True).next().tail = ' | '
|
navbar.iterchildren(reversed=True).next().tail = ' | '
|
||||||
navbar.append(A('Previous', href=href))
|
navbar.append(A('Previous', href=href))
|
||||||
|
else:
|
||||||
|
if bottom:
|
||||||
|
navbar.append(DIV(style="border-top:1px solid gray;border-bottom:1em solid white"))
|
||||||
|
text = 'This article was downloaded by '
|
||||||
|
p = PT(text, STRONG(__appname__), A(url, href=url), style='text-align:left')
|
||||||
|
p[0].tail = ' from '
|
||||||
|
navbar.append(p)
|
||||||
|
navbar.append(BR())
|
||||||
|
navbar.append(BR())
|
||||||
|
else:
|
||||||
|
# | Previous
|
||||||
|
if art > 0 and not bottom:
|
||||||
|
href = '%s../article_%d/index.html'%(prefix, art-1)
|
||||||
|
navbar.text = '| '
|
||||||
|
navbar.append(A('Previous', href=href))
|
||||||
|
|
||||||
|
# | Section | Main |
|
||||||
|
href = '%s../index.html#article_%d'%(prefix, art)
|
||||||
|
if art > 0:
|
||||||
|
navbar.iterchildren(reversed=True).next().tail = ' | '
|
||||||
|
navbar.append(A('Articles', href=href))
|
||||||
|
href = '%s../../index.html#feed_%d'%(prefix, feed)
|
||||||
|
navbar.iterchildren(reversed=True).next().tail = ' | '
|
||||||
|
navbar.append(A("Sections", href=href))
|
||||||
|
|
||||||
|
# | Next
|
||||||
|
next = 'feed_%d'%(feed+1) if art == number_of_articles_in_feed - 1 \
|
||||||
|
else 'article_%d'%(art+1)
|
||||||
|
up = '../..' if art == number_of_articles_in_feed - 1 else '..'
|
||||||
|
href = '%s%s/%s/index.html'%(prefix, up, next)
|
||||||
|
navbar.iterchildren(reversed=True).next().tail = ' | '
|
||||||
|
navbar.append(A('Next', href=href))
|
||||||
|
|
||||||
navbar.iterchildren(reversed=True).next().tail = ' | '
|
navbar.iterchildren(reversed=True).next().tail = ' | '
|
||||||
if not bottom:
|
if not bottom:
|
||||||
@ -200,7 +234,8 @@ class TouchscreenIndexTemplate(Template):
|
|||||||
|
|
||||||
class FeedTemplate(Template):
|
class FeedTemplate(Template):
|
||||||
|
|
||||||
def _generate(self, feed, cutoff, extra_css=None, style=None):
|
def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
|
||||||
|
feed = feeds[f]
|
||||||
head = HEAD(TITLE(feed.title))
|
head = HEAD(TITLE(feed.title))
|
||||||
if style:
|
if style:
|
||||||
head.append(STYLE(style, type='text/css'))
|
head.append(STYLE(style, type='text/css'))
|
||||||
@ -250,7 +285,41 @@ class FeedTemplate(Template):
|
|||||||
|
|
||||||
class TouchscreenFeedTemplate(Template):
|
class TouchscreenFeedTemplate(Template):
|
||||||
|
|
||||||
def _generate(self, feed, cutoff, extra_css=None, style=None):
|
def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
|
||||||
|
|
||||||
|
def trim_title(title,clip=15):
|
||||||
|
if len(title)>clip:
|
||||||
|
tokens = title.split(' ')
|
||||||
|
new_title_tokens = []
|
||||||
|
new_title_len = 0
|
||||||
|
for token in tokens:
|
||||||
|
if len(token) + new_title_len < clip:
|
||||||
|
new_title_tokens.append(token)
|
||||||
|
new_title_len += len(token) + 1
|
||||||
|
else:
|
||||||
|
new_title_tokens.append('...')
|
||||||
|
title = ' '.join(new_title_tokens)
|
||||||
|
break
|
||||||
|
return title
|
||||||
|
|
||||||
|
feed = feeds[f]
|
||||||
|
|
||||||
|
# Construct the navbar
|
||||||
|
navbar = DIV('| ', CLASS('calibre_navbar', 'calibre_rescale_100'),style='text-align:center')
|
||||||
|
if f > 0:
|
||||||
|
link = A(trim_title(feeds[f-1].title), href = '../feed_%d/index.html' % int(f-1))
|
||||||
|
link.tail = ' |'
|
||||||
|
navbar.append(link)
|
||||||
|
|
||||||
|
link = A("Sections", href="../index.html")
|
||||||
|
link.tail = ' |'
|
||||||
|
navbar.append(link)
|
||||||
|
if f < len(feeds)-1:
|
||||||
|
link = A(trim_title(feeds[f+1].title), href = '../feed_%d/index.html' % int(f+1))
|
||||||
|
link.tail = ' |'
|
||||||
|
navbar.append(link)
|
||||||
|
|
||||||
|
# Build the page
|
||||||
head = HEAD(TITLE(feed.title))
|
head = HEAD(TITLE(feed.title))
|
||||||
if style:
|
if style:
|
||||||
head.append(STYLE(style, type='text/css'))
|
head.append(STYLE(style, type='text/css'))
|
||||||
@ -262,6 +331,7 @@ class TouchscreenFeedTemplate(Template):
|
|||||||
DIV(style="border-top:1px solid gray;border-bottom:1em solid white")
|
DIV(style="border-top:1px solid gray;border-bottom:1em solid white")
|
||||||
)
|
)
|
||||||
body.append(div)
|
body.append(div)
|
||||||
|
|
||||||
if getattr(feed, 'image', None):
|
if getattr(feed, 'image', None):
|
||||||
div.append(DIV(IMG(
|
div.append(DIV(IMG(
|
||||||
alt = feed.image_alt if feed.image_alt else '',
|
alt = feed.image_alt if feed.image_alt else '',
|
||||||
@ -280,7 +350,6 @@ class TouchscreenFeedTemplate(Template):
|
|||||||
continue
|
continue
|
||||||
tr = TR()
|
tr = TR()
|
||||||
|
|
||||||
if True:
|
|
||||||
div_td = DIV(
|
div_td = DIV(
|
||||||
A(article.title, CLASS('summary_headline','calibre_rescale_120',
|
A(article.title, CLASS('summary_headline','calibre_rescale_120',
|
||||||
href=article.url)),
|
href=article.url)),
|
||||||
@ -292,29 +361,10 @@ class TouchscreenFeedTemplate(Template):
|
|||||||
div_td.append(DIV(cutoff(article.text_summary),
|
div_td.append(DIV(cutoff(article.text_summary),
|
||||||
CLASS('summary_text', 'calibre_rescale_100')))
|
CLASS('summary_text', 'calibre_rescale_100')))
|
||||||
tr.append(TD(div_td))
|
tr.append(TD(div_td))
|
||||||
else:
|
|
||||||
td = TD(
|
|
||||||
A(article.title, CLASS('summary_headline','calibre_rescale_120',
|
|
||||||
href=article.url))
|
|
||||||
)
|
|
||||||
if article.author:
|
|
||||||
td.append(DIV(article.author,
|
|
||||||
CLASS('summary_byline', 'calibre_rescale_100')))
|
|
||||||
if article.summary:
|
|
||||||
td.append(DIV(cutoff(article.text_summary),
|
|
||||||
CLASS('summary_text', 'calibre_rescale_100')))
|
|
||||||
|
|
||||||
tr.append(td)
|
|
||||||
|
|
||||||
toc.append(tr)
|
toc.append(tr)
|
||||||
div.append(toc)
|
div.append(toc)
|
||||||
|
|
||||||
navbar = DIV('| ', CLASS('calibre_navbar', 'calibre_rescale_100'),style='text-align:center')
|
|
||||||
link = A('Up one level', href="../index.html")
|
|
||||||
link.tail = ' |'
|
|
||||||
navbar.append(link)
|
|
||||||
div.append(navbar)
|
div.append(navbar)
|
||||||
|
|
||||||
self.root = HTML(head, body)
|
self.root = HTML(head, body)
|
||||||
|
|
||||||
class EmbeddedContent(Template):
|
class EmbeddedContent(Template):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user