EPUB Input: Implement reading of Table of Contents from EPUB 3 files that do not specify a fallback NCX ToC

This commit is contained in:
Kovid Goyal 2016-06-15 23:50:58 +05:30
parent f315b1cacb
commit 921a579d35
2 changed files with 89 additions and 16 deletions

View File

@ -218,6 +218,10 @@ class EPUBInput(InputFormatPlugin):
raise DRMError(os.path.basename(path)) raise DRMError(os.path.basename(path))
self.encrypted_fonts = self._encrypted_font_uris self.encrypted_fonts = self._encrypted_font_uris
epub3_nav = opf.epub3_nav
if epub3_nav is not None:
self.convert_epub3_nav(epub3_nav, opf, log)
if len(parts) > 1 and parts[0]: if len(parts) > 1 and parts[0]:
delta = '/'.join(parts[:-1])+'/' delta = '/'.join(parts[:-1])+'/'
for elem in opf.itermanifest(): for elem in opf.itermanifest():
@ -252,11 +256,65 @@ class EPUBInput(InputFormatPlugin):
if len(list(opf.iterspine())) == 0: if len(list(opf.iterspine())) == 0:
raise ValueError('No valid entries in the spine of this EPUB') raise ValueError('No valid entries in the spine of this EPUB')
with open('content.opf', 'wb') as nopf: with lopen('content.opf', 'wb') as nopf:
nopf.write(opf.render()) nopf.write(opf.render())
return os.path.abspath(u'content.opf') return os.path.abspath(u'content.opf')
def convert_epub3_nav(self, nav_path, opf, log):
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX
from calibre.ebooks.oeb.polish.toc import first_child
from tempfile import NamedTemporaryFile
with lopen(nav_path, 'rb') as f:
raw = f.read()
raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0]
root = parse(raw, log=log)
ncx = etree.fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>')
navmap = ncx[0]
et = '{%s}type' % EPUB_NS
bn = os.path.basename(nav_path)
def add_from_li(li, parent):
href = text = None
for x in li.iterchildren(XHTML('a'), XHTML('span')):
text = etree.tostring(x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
href = x.get('href')
if href:
if href.startswith('#'):
href = bn + href
break
np = parent.makeelement(NCX('navPoint'))
parent.append(np)
np.append(np.makeelement(NCX('navLabel')))
np[0].append(np.makeelement(NCX('text')))
np[0][0].text = text
if href:
np.append(np.makeelement(NCX('content'), attrib={'src':href}))
return np
def process_nav_node(node, toc_parent):
for li in node.iterchildren(XHTML('li')):
child = add_from_li(li, toc_parent)
ol = first_child(li, XHTML('ol'))
if child is not None and ol is not None:
process_nav_node(ol, child)
for nav in root.iterdescendants(XHTML('nav')):
if nav.get(et) == 'toc':
ol = first_child(nav, XHTML('ol'))
if ol is not None:
process_nav_node(ol, navmap)
break
with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f:
f.write(etree.tostring(ncx, encoding='utf-8'))
ncx_id = opf.add_path_to_manifest(f.name, NCX_MIME)
for spine in opf.root.xpath('//*[local-name()="spine"]'):
spine.set('toc', ncx_id)
def postprocess_book(self, oeb, opts, log): def postprocess_book(self, oeb, opts, log):
rc = getattr(self, 'removed_cover', None) rc = getattr(self, 'removed_cover', None)
if rc: if rc:

View File

@ -219,17 +219,20 @@ class ManifestItem(Resource): # {{{
class Manifest(ResourceCollection): # {{{ class Manifest(ResourceCollection): # {{{
def append_from_opf_manifest_item(self, item, dir):
self.append(ManifestItem.from_opf_manifest_item(item, dir))
id = item.get('id', '')
if not id:
id = 'id%d'%self.next_id
self[-1].id = id
self.next_id += 1
@staticmethod @staticmethod
def from_opf_manifest_element(items, dir): def from_opf_manifest_element(items, dir):
m = Manifest() m = Manifest()
for item in items: for item in items:
try: try:
m.append(ManifestItem.from_opf_manifest_item(item, dir)) m.append_from_opf_manifest_item(item, dir)
id = item.get('id', '')
if not id:
id = 'id%d'%m.next_id
m[-1].id = id
m.next_id += 1
except ValueError: except ValueError:
continue continue
return m return m
@ -660,7 +663,6 @@ class OPF(object): # {{{
for item in self.manifest: for item in self.manifest:
if 'toc' in item.href().lower(): if 'toc' in item.href().lower():
toc = item.path toc = item.path
if toc is None: if toc is None:
return return
self.toc = TOC(base_path=self.base_dir) self.toc = TOC(base_path=self.base_dir)
@ -721,18 +723,17 @@ class OPF(object): # {{{
return [i.get('id') for i in items] return [i.get('id') for i in items]
def add_path_to_manifest(self, path, media_type): def add_path_to_manifest(self, path, media_type):
has_path = False
path = os.path.abspath(path) path = os.path.abspath(path)
for i in self.itermanifest(): for i in self.itermanifest():
xpath = os.path.join(self.base_dir, *(i.get('href', '').split('/'))) xpath = os.path.join(self.base_dir, *(i.get('href', '').split('/')))
if os.path.abspath(xpath) == path: if os.path.abspath(xpath) == path:
has_path = True return i.get('id')
break
if not has_path:
href = os.path.relpath(path, self.base_dir).replace(os.sep, '/') href = os.path.relpath(path, self.base_dir).replace(os.sep, '/')
item = self.create_manifest_item(href, media_type) item = self.create_manifest_item(href, media_type)
manifest = self.manifest_ppath(self.root)[0] manifest = self.manifest_ppath(self.root)[0]
manifest.append(item) manifest.append(item)
self.manifest.append_from_opf_manifest_item(item, self.basedir)
return item.get('id')
def iterspine(self): def iterspine(self):
return self.spine_path(self.root) return self.spine_path(self.root)
@ -1184,6 +1185,20 @@ class OPF(object): # {{{
if mt and 'xml' not in mt and 'html' not in mt: if mt and 'xml' not in mt and 'html' not in mt:
return item.get('href', None) return item.get('href', None)
@property
def epub3_nav(self):
if self.package_version >= 3.0:
for item in self.itermanifest():
props = (item.get('properties') or '').lower().split()
if 'nav' in props:
mt = item.get('media-type') or ''
if 'html' in mt.lower():
mid = item.get('id')
if mid:
path = self.manifest.path_for_id(mid)
if path and os.path.exists(path):
return path
@dynamic_property @dynamic_property
def cover(self): def cover(self):