MOBI news download: Allow recipes to set a thumbnail for entries in the periodical table of contents. Fixes #900130 (Kindle MOBI TOC thumbnail)

This commit is contained in:
Kovid Goyal 2011-12-12 12:31:43 +05:30
parent 156e0970c9
commit 6f4b31fe13
7 changed files with 60 additions and 14 deletions

View File

@ -31,7 +31,7 @@ class TOC(list):
def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0, def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
base_path=os.getcwd(), type='unknown', author=None, base_path=os.getcwd(), type='unknown', author=None,
description=None): description=None, toc_thumbnail=None):
self.href = href self.href = href
self.fragment = fragment self.fragment = fragment
if not self.fragment: if not self.fragment:
@ -43,6 +43,7 @@ class TOC(list):
self.type = type self.type = type
self.author = author self.author = author
self.description = description self.description = description
self.toc_thumbnail = toc_thumbnail
def __str__(self): def __str__(self):
lines = ['TOC: %s#%s'%(self.href, self.fragment)] lines = ['TOC: %s#%s'%(self.href, self.fragment)]
@ -72,12 +73,12 @@ class TOC(list):
entry.parent = None entry.parent = None
def add_item(self, href, fragment, text, play_order=None, type='unknown', def add_item(self, href, fragment, text, play_order=None, type='unknown',
author=None, description=None): author=None, description=None, toc_thumbnail=None):
if play_order is None: if play_order is None:
play_order = (self[-1].play_order if len(self) else self.play_order) + 1 play_order = (self[-1].play_order if len(self) else self.play_order) + 1
self.append(TOC(href=href, fragment=fragment, text=text, parent=self, self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
base_path=self.base_path, play_order=play_order, base_path=self.base_path, play_order=play_order,
type=type, author=author, description=description)) type=type, author=author, description=description, toc_thumbnail=toc_thumbnail))
return self[-1] return self[-1]
def top_level_items(self): def top_level_items(self):
@ -269,6 +270,9 @@ class TOC(list):
if desc: if desc:
desc = re.sub(r'\s+', ' ', desc) desc = re.sub(r'\s+', ' ', desc)
elem.append(C.meta(desc, name='description')) elem.append(C.meta(desc, name='description'))
idx = getattr(np, 'toc_thumbnail', None)
if idx:
elem.append(C.meta(idx, name='toc_thumbnail'))
parent.append(elem) parent.append(elem)
for np2 in np: for np2 in np:
navpoint(elem, np2) navpoint(elem, np2)

View File

@ -656,11 +656,11 @@ class Tag(object): # {{{
' image record associated with this article', ' image record associated with this article',
'image_index'), 'image_index'),
70 : ('Description offset in cncx', 'desc_offset'), 70 : ('Description offset in cncx', 'desc_offset'),
71 : ('Image attribution offset in cncx', 71 : ('Author offset in cncx', 'author_offset'),
'image_attr_offset'),
72 : ('Image caption offset in cncx', 72 : ('Image caption offset in cncx',
'image_caption_offset'), 'image_caption_offset'),
73 : ('Author offset in cncx', 'author_offset'), 73 : ('Image attribution offset in cncx',
'image_attr_offset'),
}, },
'chapter_with_subchapters' : { 'chapter_with_subchapters' : {

View File

@ -136,7 +136,8 @@ class IndexEntry(object):
'last_child_index': 23, 'last_child_index': 23,
'image_index': 69, 'image_index': 69,
'desc_offset': 70, 'desc_offset': 70,
'author_offset': 73, 'author_offset': 71,
} }
RTAG_MAP = {v:k for k, v in TAG_VALUES.iteritems()} RTAG_MAP = {v:k for k, v in TAG_VALUES.iteritems()}
@ -754,6 +755,13 @@ class Indexer(object): # {{{
normalized_articles.append(article) normalized_articles.append(article)
article.author_offset = self.cncx[art.author] article.author_offset = self.cncx[art.author]
article.desc_offset = self.cncx[art.description] article.desc_offset = self.cncx[art.description]
if getattr(art, 'toc_thumbnail', None) is not None:
try:
ii = self.serializer.images[art.toc_thumbnail] - 1
if ii > -1:
article.image_index = ii
except KeyError:
pass # Image not found in serializer
if normalized_articles: if normalized_articles:
normalized_articles.sort(key=lambda x:x.offset) normalized_articles.sort(key=lambda x:x.offset)

View File

@ -1629,9 +1629,10 @@ class TOC(object):
:attr:`id`: Option unique identifier for this node. :attr:`id`: Option unique identifier for this node.
:attr:`author`: Optional author attribution for periodicals <mbp:> :attr:`author`: Optional author attribution for periodicals <mbp:>
:attr:`description`: Optional description attribute for periodicals <mbp:> :attr:`description`: Optional description attribute for periodicals <mbp:>
:attr:`toc_thumbnail`: Optional toc thumbnail image
""" """
def __init__(self, title=None, href=None, klass=None, id=None, def __init__(self, title=None, href=None, klass=None, id=None,
play_order=None, author=None, description=None): play_order=None, author=None, description=None, toc_thumbnail=None):
self.title = title self.title = title
self.href = urlnormalize(href) if href else href self.href = urlnormalize(href) if href else href
self.klass = klass self.klass = klass
@ -1643,10 +1644,11 @@ class TOC(object):
self.play_order = play_order self.play_order = play_order
self.author = author self.author = author
self.description = description self.description = description
self.toc_thumbnail = toc_thumbnail
def add(self, title, href, klass=None, id=None, play_order=0, author=None, description=None): def add(self, title, href, klass=None, id=None, play_order=0, author=None, description=None, toc_thumbnail=None):
"""Create and return a new sub-node of this node.""" """Create and return a new sub-node of this node."""
node = TOC(title, href, klass, id, play_order, author, description) node = TOC(title, href, klass, id, play_order, author, description, toc_thumbnail)
self.nodes.append(node) self.nodes.append(node)
return node return node

View File

@ -371,8 +371,15 @@ class OEBReader(object):
else : else :
description = None description = None
index_image = xpath(child,
'descendant::calibre:meta[@name = "toc_thumbnail"]')
toc_thumbnail = (index_image[0].text if index_image else None)
if not toc_thumbnail or not toc_thumbnail.strip():
toc_thumbnail = None
node = toc.add(title, href, id=id, klass=klass, node = toc.add(title, href, id=id, klass=klass,
play_order=po, description=description, author=author) play_order=po, description=description, author=author,
toc_thumbnail=toc_thumbnail)
self._toc_from_navpoint(item, node, child) self._toc_from_navpoint(item, node, child)

View File

@ -31,6 +31,7 @@ class Article(object):
self._title = clean_ascii_chars(self._title) self._title = clean_ascii_chars(self._title)
self.url = url self.url = url
self.author = author self.author = author
self.toc_thumbnail = None
if author and not isinstance(author, unicode): if author and not isinstance(author, unicode):
author = author.decode('utf-8', 'replace') author = author.decode('utf-8', 'replace')
self.summary = summary self.summary = summary
@ -88,8 +89,10 @@ URL : %s
Author : %s Author : %s
Summary : %s Summary : %s
Date : %s Date : %s
TOC thumb : %s
Has content : %s Has content : %s
'''%(self.title, self.url, self.author, self.summary[:20]+'...', self.localtime.strftime('%a, %d %b, %Y %H:%M'), '''%(self.title, self.url, self.author, self.summary[:20]+'...',
self.toc_thumbnail, self.localtime.strftime('%a, %d %b, %Y %H:%M'),
bool(self.content))).encode('utf-8') bool(self.content))).encode('utf-8')
def __str__(self): def __str__(self):

View File

@ -653,6 +653,25 @@ class BasicNewsRecipe(Recipe):
''' '''
raise NotImplementedError raise NotImplementedError
def add_toc_thumbnail(self, article, src):
'''
Call this from populate_article_metadata with the src attribute of an
<img> tag from the article that is appropriate for use as the thumbnail
representing the article in the Table of Contents. Whether the
thumbnail is actually used is device dependent (currently only used by
the Kindles). Note that the referenced image must be one that was
successfully downloaded, otherwise it will be ignored.
'''
if not src or not hasattr(article, 'toc_thumbnail'):
return
src = src.replace('\\', '/')
if re.search(r'feed_\d+/article_\d+/images/img', src, flags=re.I) is None:
self.log.warn('Ignoring invalid TOC thumbnail image: %r'%src)
return
article.toc_thumbnail = re.sub(r'^.*?feed', 'feed',
src, flags=re.IGNORECASE)
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
''' '''
Called when each HTML page belonging to article is downloaded. Called when each HTML page belonging to article is downloaded.
@ -1285,13 +1304,16 @@ class BasicNewsRecipe(Recipe):
desc = None desc = None
else: else:
desc = self.description_limiter(desc) desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir) entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None) po = self.play_order_map.get(entries[-1], None)
if po is None: if po is None:
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), parent.add_item('%sindex.html'%adir, None,
play_order=po, author=auth, description=desc) a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages: for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp]) prefix = os.path.commonprefix([opf_path, sp])