MOBI news download: Allow recipes to set a thumbnail for entries in the periodical table of contents. Fixes #900130 (Kindle MOBI TOC thumbnail)

This commit is contained in:
Kovid Goyal 2011-12-12 12:31:43 +05:30
parent 156e0970c9
commit 6f4b31fe13
7 changed files with 60 additions and 14 deletions

View File

@ -31,7 +31,7 @@ class TOC(list):
def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
base_path=os.getcwd(), type='unknown', author=None,
description=None):
description=None, toc_thumbnail=None):
self.href = href
self.fragment = fragment
if not self.fragment:
@ -43,6 +43,7 @@ class TOC(list):
self.type = type
self.author = author
self.description = description
self.toc_thumbnail = toc_thumbnail
def __str__(self):
lines = ['TOC: %s#%s'%(self.href, self.fragment)]
@ -72,12 +73,12 @@ class TOC(list):
entry.parent = None
def add_item(self, href, fragment, text, play_order=None, type='unknown',
author=None, description=None):
author=None, description=None, toc_thumbnail=None):
if play_order is None:
play_order = (self[-1].play_order if len(self) else self.play_order) + 1
self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
base_path=self.base_path, play_order=play_order,
type=type, author=author, description=description))
type=type, author=author, description=description, toc_thumbnail=toc_thumbnail))
return self[-1]
def top_level_items(self):
@ -269,6 +270,9 @@ class TOC(list):
if desc:
desc = re.sub(r'\s+', ' ', desc)
elem.append(C.meta(desc, name='description'))
idx = getattr(np, 'toc_thumbnail', None)
if idx:
elem.append(C.meta(idx, name='toc_thumbnail'))
parent.append(elem)
for np2 in np:
navpoint(elem, np2)

View File

@ -656,11 +656,11 @@ class Tag(object): # {{{
' image record associated with this article',
'image_index'),
70 : ('Description offset in cncx', 'desc_offset'),
71 : ('Image attribution offset in cncx',
'image_attr_offset'),
71 : ('Author offset in cncx', 'author_offset'),
72 : ('Image caption offset in cncx',
'image_caption_offset'),
73 : ('Author offset in cncx', 'author_offset'),
73 : ('Image attribution offset in cncx',
'image_attr_offset'),
},
'chapter_with_subchapters' : {

View File

@ -136,7 +136,8 @@ class IndexEntry(object):
'last_child_index': 23,
'image_index': 69,
'desc_offset': 70,
'author_offset': 73,
'author_offset': 71,
}
RTAG_MAP = {v:k for k, v in TAG_VALUES.iteritems()}
@ -754,6 +755,13 @@ class Indexer(object): # {{{
normalized_articles.append(article)
article.author_offset = self.cncx[art.author]
article.desc_offset = self.cncx[art.description]
if getattr(art, 'toc_thumbnail', None) is not None:
try:
ii = self.serializer.images[art.toc_thumbnail] - 1
if ii > -1:
article.image_index = ii
except KeyError:
pass # Image not found in serializer
if normalized_articles:
normalized_articles.sort(key=lambda x:x.offset)

View File

@ -1629,9 +1629,10 @@ class TOC(object):
:attr:`id`: Option unique identifier for this node.
:attr:`author`: Optional author attribution for periodicals <mbp:>
:attr:`description`: Optional description attribute for periodicals <mbp:>
:attr:`toc_thumbnail`: Optional toc thumbnail image
"""
def __init__(self, title=None, href=None, klass=None, id=None,
play_order=None, author=None, description=None):
play_order=None, author=None, description=None, toc_thumbnail=None):
self.title = title
self.href = urlnormalize(href) if href else href
self.klass = klass
@ -1643,10 +1644,11 @@ class TOC(object):
self.play_order = play_order
self.author = author
self.description = description
self.toc_thumbnail = toc_thumbnail
def add(self, title, href, klass=None, id=None, play_order=0, author=None, description=None):
def add(self, title, href, klass=None, id=None, play_order=0, author=None, description=None, toc_thumbnail=None):
"""Create and return a new sub-node of this node."""
node = TOC(title, href, klass, id, play_order, author, description)
node = TOC(title, href, klass, id, play_order, author, description, toc_thumbnail)
self.nodes.append(node)
return node

View File

@ -371,8 +371,15 @@ class OEBReader(object):
else :
description = None
index_image = xpath(child,
'descendant::calibre:meta[@name = "toc_thumbnail"]')
toc_thumbnail = (index_image[0].text if index_image else None)
if not toc_thumbnail or not toc_thumbnail.strip():
toc_thumbnail = None
node = toc.add(title, href, id=id, klass=klass,
play_order=po, description=description, author=author)
play_order=po, description=description, author=author,
toc_thumbnail=toc_thumbnail)
self._toc_from_navpoint(item, node, child)

View File

@ -31,6 +31,7 @@ class Article(object):
self._title = clean_ascii_chars(self._title)
self.url = url
self.author = author
self.toc_thumbnail = None
if author and not isinstance(author, unicode):
author = author.decode('utf-8', 'replace')
self.summary = summary
@ -88,8 +89,10 @@ URL : %s
Author : %s
Summary : %s
Date : %s
TOC thumb : %s
Has content : %s
'''%(self.title, self.url, self.author, self.summary[:20]+'...', self.localtime.strftime('%a, %d %b, %Y %H:%M'),
'''%(self.title, self.url, self.author, self.summary[:20]+'...',
self.toc_thumbnail, self.localtime.strftime('%a, %d %b, %Y %H:%M'),
bool(self.content))).encode('utf-8')
def __str__(self):

View File

@ -653,6 +653,25 @@ class BasicNewsRecipe(Recipe):
'''
raise NotImplementedError
def add_toc_thumbnail(self, article, src):
'''
Call this from populate_article_metadata with the src attribute of an
<img> tag from the article that is appropriate for use as the thumbnail
representing the article in the Table of Contents. Whether the
thumbnail is actually used is device dependent (currently only used by
the Kindles). Note that the referenced image must be one that was
successfully downloaded, otherwise it will be ignored.
'''
if not src or not hasattr(article, 'toc_thumbnail'):
return
src = src.replace('\\', '/')
if re.search(r'feed_\d+/article_\d+/images/img', src, flags=re.I) is None:
self.log.warn('Ignoring invalid TOC thumbnail image: %r'%src)
return
article.toc_thumbnail = re.sub(r'^.*?feed', 'feed',
src, flags=re.IGNORECASE)
def populate_article_metadata(self, article, soup, first):
'''
Called when each HTML page belonging to article is downloaded.
@ -1285,13 +1304,16 @@ class BasicNewsRecipe(Recipe):
desc = None
else:
desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
parent.add_item('%sindex.html'%adir, None,
a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])