News download: Support for touchscreen specific optimizations. Also add a new populate_article_metadata callback

This commit is contained in:
Kovid Goyal 2010-06-02 17:35:09 -06:00
commit 4ebc67bf0d
6 changed files with 210 additions and 10 deletions

View File

@ -240,6 +240,9 @@ class OutputProfile(Plugin):
# Device supports displaying a nested TOC
supports_nested_toc = True
# If True output should be optimized for a touchscreen interface
touchscreen = False
@classmethod
def tags_to_string(cls, tags):
return escape(', '.join(tags))
@ -254,6 +257,7 @@ class iPadOutput(OutputProfile):
comic_screen_size = (768, 1024)
dpi = 132.0
supports_nested_toc = False
touchscreen = True
class SonyReaderOutput(OutputProfile):

View File

@ -1229,8 +1229,10 @@ class ITUNES(DevicePlugin):
self.iTunes.delete(cached_book['lib_book'])
elif iswindows:
# Assume we're wrapped in a pythoncom
# Windows stores the book under a common author directory, so we just delete the .epub
'''
Assume we're wrapped in a pythoncom
Windows stores the book under a common author directory, so we just delete the .epub
'''
if DEBUG:
self.log.info("ITUNES._remove_from_iTunes(): '%s'" % cached_book['title'])
book = self._find_library_book(cached_book)

View File

@ -115,7 +115,7 @@ Pre/post processing of downloaded HTML
.. automethod:: BasicNewsRecipe.postprocess_html
.. automethod:: BasicNewsRecipe.populate_article_metadata
Convenience methods

View File

@ -254,7 +254,7 @@ class BasicNewsRecipe(Recipe):
#: will remove everythong from `<!--Article ends here-->` to `</body>`.
preprocess_regexps = []
#: The CSS that is used to styles the templates, i.e., the navigation bars and
#: The CSS that is used to style the templates, i.e., the navigation bars and
#: the Tables of Contents. Rather than overriding this variable, you should
#: use `extra_css` in your recipe to customize look and feel.
template_css = u'''
@ -517,6 +517,19 @@ class BasicNewsRecipe(Recipe):
'''
raise NotImplementedError
def populate_article_metadata(self, article, soup, first):
'''
Called when each HTML page belonging to article is downloaded.
Intended to be used to get article metadata like author/summary/etc.
from the parsed HTML (soup).
:param article: A object of class :class:`calibre.web.feeds.Article`.
If you chane the sumamry, remeber to also change the
text_summary
:param soup: Parsed HTML belonging to this article
:param first: True iff the parsed HTML is the first page of the article.
'''
pass
def postprocess_book(self, oeb, opts, log):
'''
Run any needed post processing on the parsed downloaded e-book.
@ -544,6 +557,8 @@ class BasicNewsRecipe(Recipe):
self.username = options.username
self.password = options.password
self.lrf = options.lrf
self.output_profile = options.output_profile
self.touchscreen = getattr(self.output_profile, 'touchscreen', False)
self.output_dir = os.path.abspath(self.output_dir)
if options.test:
@ -597,7 +612,7 @@ class BasicNewsRecipe(Recipe):
if self.delay > 0:
self.simultaneous_downloads = 1
self.navbar = templates.NavBarTemplate()
self.navbar = templates.TouchscreenNavBarTemplate() if self.touchscreen else templates.NavBarTemplate()
self.failed_downloads = []
self.partial_failures = []
@ -638,7 +653,15 @@ class BasicNewsRecipe(Recipe):
for base in list(soup.findAll(['base', 'iframe'])):
base.extract()
return self.postprocess_html(soup, first_fetch)
ans = self.postprocess_html(soup, first_fetch)
try:
article = self.feed_objects[f].articles[a]
except:
self.log.exception('Failed to get article object for postprocessing')
pass
else:
self.populate_article_metadata(article, ans, first_fetch)
return ans
def download(self):
@ -674,7 +697,11 @@ class BasicNewsRecipe(Recipe):
def feeds2index(self, feeds):
templ = templates.IndexTemplate()
css = self.template_css + '\n\n' +(self.extra_css if self.extra_css else '')
return templ.generate(self.title, self.timefmt, feeds,
timefmt = self.timefmt
if self.touchscreen:
templ = templates.TouchscreenIndexTemplate()
timefmt = '%A, %d %b %Y'
return templ.generate(self.title, "mastheadImage.jpg", timefmt, feeds,
extra_css=css).render(doctype='xhtml')
@classmethod
@ -727,6 +754,44 @@ class BasicNewsRecipe(Recipe):
templ = templates.FeedTemplate()
css = self.template_css + '\n\n' +(self.extra_css if self.extra_css else '')
if self.touchscreen:
touchscreen_css = u'''
.summary_headline {
font-size:large; font-weight:bold; margin-top:0px; margin-bottom:0px;
}
.summary_byline {
font-size:small; margin-top:0px; margin-bottom:0px;
}
.summary_text {
margin-top:0px; margin-bottom:0px;
}
.feed {
font-family:sans-serif; font-weight:bold; font-size:larger;
}
.calibre_navbar {
font-family:monospace;
}
hr {
border-color:gray;
border-style:solid;
border-width:thin;
}
table.toc {
font-size:large;
}
td.article_count {
text-align:right;
}
'''
templ = templates.TouchscreenFeedTemplate()
css = touchscreen_css + '\n\n' + (self.extra_css if self.extra_css else '')
return templ.generate(feed, self.description_limiter,
extra_css=css).render(doctype='xhtml')
@ -820,6 +885,7 @@ class BasicNewsRecipe(Recipe):
if hasattr(feed, 'reverse'):
feed.reverse()
self.feed_objects = feeds
for f, feed in enumerate(feeds):
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
if not os.path.isdir(feed_dir):
@ -1053,6 +1119,9 @@ class BasicNewsRecipe(Recipe):
mi = MetaInformation(self.short_title() + strftime(self.timefmt), [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
if self.output_profile.name == 'iPad':
mi.authors = [strftime('%A, %d %B %Y')]
mi.author_sort = strftime('%Y-%m-%d')
mi.publication_type = 'periodical:'+self.publication_type
mi.timestamp = nowf()
mi.comments = self.description

View File

@ -5,7 +5,8 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
from lxml import html, etree
from lxml.html.builder import HTML, HEAD, TITLE, STYLE, DIV, BODY, \
STRONG, BR, H1, SPAN, A, HR, UL, LI, H2, IMG, P as PT
STRONG, BR, H1, SPAN, A, HR, UL, LI, H2, IMG, P as PT, \
TABLE, TD, TR
from calibre import preferred_encoding, strftime, isbytestring
@ -89,12 +90,55 @@ class NavBarTemplate(Template):
self.root = HTML(head, BODY(navbar))
class TouchscreenNavBarTemplate(Template):
def _generate(self, bottom, feed, art, number_of_articles_in_feed,
two_levels, url, __appname__, prefix='', center=True,
extra_css=None, style=None):
head = HEAD(TITLE('navbar'))
if style:
head.append(STYLE(style, type='text/css'))
if extra_css:
head.append(STYLE(extra_css, type='text/css'))
if prefix and not prefix.endswith('/'):
prefix += '/'
align = 'center' if center else 'left'
navbar = DIV(CLASS('calibre_navbar', 'calibre_rescale_100',
style='text-align:'+align))
if bottom:
navbar.append(HR())
text = 'This article was downloaded by '
p = PT(text, STRONG(__appname__), A(url, href=url), style='text-align:left')
p[0].tail = ' from '
navbar.append(BR())
navbar.append(BR())
else:
next = 'feed_%d'%(feed+1) if art == number_of_articles_in_feed - 1 \
else 'article_%d'%(art+1)
up = '../..' if art == number_of_articles_in_feed - 1 else '..'
href = '%s%s/%s/index.html'%(prefix, up, next)
navbar.text = '| '
navbar.append(A('Next', href=href))
href = '%s../index.html#article_%d'%(prefix, art)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Section Menu', href=href))
href = '%s../../index.html#feed_%d'%(prefix, feed)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Main Menu', href=href))
if art > 0 and not bottom:
href = '%s../article_%d/index.html'%(prefix, art-1)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Previous', href=href))
navbar.iterchildren(reversed=True).next().tail = ' | '
if not bottom:
navbar.append(HR())
self.root = HTML(head, BODY(navbar))
class IndexTemplate(Template):
def _generate(self, title, datefmt, feeds, extra_css=None, style=None):
def _generate(self, title, masthead, datefmt, feeds, extra_css=None, style=None):
if isinstance(datefmt, unicode):
datefmt = datefmt.encode(preferred_encoding)
date = strftime(datefmt)
@ -110,12 +154,40 @@ class IndexTemplate(Template):
href='feed_%d/index.html'%i)), id='feed_%d'%i)
ul.append(li)
div = DIV(
H1(title, CLASS('calibre_recipe_title', 'calibre_rescale_180')),
PT(IMG(src=masthead,alt="masthead"),style='text-align:center'),
PT(date, style='text-align:right'),
ul,
CLASS('calibre_rescale_100'))
self.root = HTML(head, BODY(div))
class TouchscreenIndexTemplate(Template):
def _generate(self, title, masthead, datefmt, feeds, extra_css=None, style=None):
if isinstance(datefmt, unicode):
datefmt = datefmt.encode(preferred_encoding)
date = strftime(datefmt)
masthead_img = IMG(src=masthead,alt="masthead")
head = HEAD(TITLE(title))
if style:
head.append(STYLE(style, type='text/css'))
if extra_css:
head.append(STYLE(extra_css, type='text/css'))
toc = TABLE(CLASS('toc'),width="100%",border="0",cellpadding="3px")
for i, feed in enumerate(feeds):
if feed:
tr = TR()
tr.append(TD( CLASS('toc_item'), A(feed.title, href='feed_%d/index.html'%i)))
tr.append(TD( CLASS('article_count'),'%d' % len(feed.articles)))
toc.append(tr)
div = DIV(
PT(masthead_img,style='text-align:center'),
PT(date, style='text-align:center'),
toc,
CLASS('calibre_rescale_100'))
self.root = HTML(head, BODY(div))
class FeedTemplate(Template):
def _generate(self, feed, cutoff, extra_css=None, style=None):
@ -166,6 +238,56 @@ class FeedTemplate(Template):
self.root = HTML(head, body)
class TouchscreenFeedTemplate(Template):
def _generate(self, feed, cutoff, extra_css=None, style=None):
head = HEAD(TITLE(feed.title))
if style:
head.append(STYLE(style, type='text/css'))
if extra_css:
head.append(STYLE(extra_css, type='text/css'))
body = BODY(style='page-break-before:always')
div = DIV(
H2(feed.title,
CLASS('calibre_feed_title', 'calibre_rescale_160')),
CLASS('calibre_rescale_100')
)
body.append(div)
if getattr(feed, 'image', None):
div.append(DIV(IMG(
alt = feed.image_alt if feed.image_alt else '',
src = feed.image_url
),
CLASS('calibre_feed_image')))
if getattr(feed, 'description', None):
d = DIV(feed.description, CLASS('calibre_feed_description',
'calibre_rescale_80'))
d.append(BR())
div.append(d)
toc = TABLE(CLASS('toc'),width="100%",border="0",cellpadding="3px")
for i, article in enumerate(feed.articles):
if not getattr(article, 'downloaded', False):
continue
tr = TR()
td = TD(
A(article.title, CLASS('article calibre_rescale_100',
href=article.url))
)
if article.summary:
td.append(DIV(cutoff(article.text_summary),
CLASS('article_description', 'calibre_rescale_80')))
tr.append(td)
toc.append(tr)
div.append(toc)
navbar = DIV('| ', CLASS('calibre_navbar', 'calibre_rescale_100'),style='text-align:center')
link = A('Up one level', href="../index.html")
link.tail = ' |'
navbar.append(link)
div.append(navbar)
self.root = HTML(head, body)
class EmbeddedContent(Template):

View File

@ -328,6 +328,9 @@ class RecursiveFetcher(object):
continue
try:
data = self.fetch_url(iurl)
if data == 'GIF89a\x01':
# Skip empty GIF files as PIL errors on them anyway
continue
except Exception:
self.log.exception('Could not fetch image %s'% iurl)
continue