mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
EPUB Input: Implement reading of Table of Contents from EPUB 3 files that do not specify a fallback NCX ToC
This commit is contained in:
parent
f315b1cacb
commit
921a579d35
@ -218,6 +218,10 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
raise DRMError(os.path.basename(path))
|
raise DRMError(os.path.basename(path))
|
||||||
self.encrypted_fonts = self._encrypted_font_uris
|
self.encrypted_fonts = self._encrypted_font_uris
|
||||||
|
|
||||||
|
epub3_nav = opf.epub3_nav
|
||||||
|
if epub3_nav is not None:
|
||||||
|
self.convert_epub3_nav(epub3_nav, opf, log)
|
||||||
|
|
||||||
if len(parts) > 1 and parts[0]:
|
if len(parts) > 1 and parts[0]:
|
||||||
delta = '/'.join(parts[:-1])+'/'
|
delta = '/'.join(parts[:-1])+'/'
|
||||||
for elem in opf.itermanifest():
|
for elem in opf.itermanifest():
|
||||||
@ -252,11 +256,65 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
if len(list(opf.iterspine())) == 0:
|
if len(list(opf.iterspine())) == 0:
|
||||||
raise ValueError('No valid entries in the spine of this EPUB')
|
raise ValueError('No valid entries in the spine of this EPUB')
|
||||||
|
|
||||||
with open('content.opf', 'wb') as nopf:
|
with lopen('content.opf', 'wb') as nopf:
|
||||||
nopf.write(opf.render())
|
nopf.write(opf.render())
|
||||||
|
|
||||||
return os.path.abspath(u'content.opf')
|
return os.path.abspath(u'content.opf')
|
||||||
|
|
||||||
|
def convert_epub3_nav(self, nav_path, opf, log):
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.oeb.polish.parsing import parse
|
||||||
|
from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX
|
||||||
|
from calibre.ebooks.oeb.polish.toc import first_child
|
||||||
|
from tempfile import NamedTemporaryFile
|
||||||
|
with lopen(nav_path, 'rb') as f:
|
||||||
|
raw = f.read()
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0]
|
||||||
|
root = parse(raw, log=log)
|
||||||
|
ncx = etree.fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>')
|
||||||
|
navmap = ncx[0]
|
||||||
|
et = '{%s}type' % EPUB_NS
|
||||||
|
bn = os.path.basename(nav_path)
|
||||||
|
|
||||||
|
def add_from_li(li, parent):
|
||||||
|
href = text = None
|
||||||
|
for x in li.iterchildren(XHTML('a'), XHTML('span')):
|
||||||
|
text = etree.tostring(x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
|
||||||
|
href = x.get('href')
|
||||||
|
if href:
|
||||||
|
if href.startswith('#'):
|
||||||
|
href = bn + href
|
||||||
|
break
|
||||||
|
np = parent.makeelement(NCX('navPoint'))
|
||||||
|
parent.append(np)
|
||||||
|
np.append(np.makeelement(NCX('navLabel')))
|
||||||
|
np[0].append(np.makeelement(NCX('text')))
|
||||||
|
np[0][0].text = text
|
||||||
|
if href:
|
||||||
|
np.append(np.makeelement(NCX('content'), attrib={'src':href}))
|
||||||
|
return np
|
||||||
|
|
||||||
|
def process_nav_node(node, toc_parent):
|
||||||
|
for li in node.iterchildren(XHTML('li')):
|
||||||
|
child = add_from_li(li, toc_parent)
|
||||||
|
ol = first_child(li, XHTML('ol'))
|
||||||
|
if child is not None and ol is not None:
|
||||||
|
process_nav_node(ol, child)
|
||||||
|
|
||||||
|
for nav in root.iterdescendants(XHTML('nav')):
|
||||||
|
if nav.get(et) == 'toc':
|
||||||
|
ol = first_child(nav, XHTML('ol'))
|
||||||
|
if ol is not None:
|
||||||
|
process_nav_node(ol, navmap)
|
||||||
|
break
|
||||||
|
|
||||||
|
with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f:
|
||||||
|
f.write(etree.tostring(ncx, encoding='utf-8'))
|
||||||
|
ncx_id = opf.add_path_to_manifest(f.name, NCX_MIME)
|
||||||
|
for spine in opf.root.xpath('//*[local-name()="spine"]'):
|
||||||
|
spine.set('toc', ncx_id)
|
||||||
|
|
||||||
def postprocess_book(self, oeb, opts, log):
|
def postprocess_book(self, oeb, opts, log):
|
||||||
rc = getattr(self, 'removed_cover', None)
|
rc = getattr(self, 'removed_cover', None)
|
||||||
if rc:
|
if rc:
|
||||||
|
@ -219,17 +219,20 @@ class ManifestItem(Resource): # {{{
|
|||||||
|
|
||||||
class Manifest(ResourceCollection): # {{{
|
class Manifest(ResourceCollection): # {{{
|
||||||
|
|
||||||
|
def append_from_opf_manifest_item(self, item, dir):
|
||||||
|
self.append(ManifestItem.from_opf_manifest_item(item, dir))
|
||||||
|
id = item.get('id', '')
|
||||||
|
if not id:
|
||||||
|
id = 'id%d'%self.next_id
|
||||||
|
self[-1].id = id
|
||||||
|
self.next_id += 1
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_opf_manifest_element(items, dir):
|
def from_opf_manifest_element(items, dir):
|
||||||
m = Manifest()
|
m = Manifest()
|
||||||
for item in items:
|
for item in items:
|
||||||
try:
|
try:
|
||||||
m.append(ManifestItem.from_opf_manifest_item(item, dir))
|
m.append_from_opf_manifest_item(item, dir)
|
||||||
id = item.get('id', '')
|
|
||||||
if not id:
|
|
||||||
id = 'id%d'%m.next_id
|
|
||||||
m[-1].id = id
|
|
||||||
m.next_id += 1
|
|
||||||
except ValueError:
|
except ValueError:
|
||||||
continue
|
continue
|
||||||
return m
|
return m
|
||||||
@ -660,7 +663,6 @@ class OPF(object): # {{{
|
|||||||
for item in self.manifest:
|
for item in self.manifest:
|
||||||
if 'toc' in item.href().lower():
|
if 'toc' in item.href().lower():
|
||||||
toc = item.path
|
toc = item.path
|
||||||
|
|
||||||
if toc is None:
|
if toc is None:
|
||||||
return
|
return
|
||||||
self.toc = TOC(base_path=self.base_dir)
|
self.toc = TOC(base_path=self.base_dir)
|
||||||
@ -721,18 +723,17 @@ class OPF(object): # {{{
|
|||||||
return [i.get('id') for i in items]
|
return [i.get('id') for i in items]
|
||||||
|
|
||||||
def add_path_to_manifest(self, path, media_type):
|
def add_path_to_manifest(self, path, media_type):
|
||||||
has_path = False
|
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
for i in self.itermanifest():
|
for i in self.itermanifest():
|
||||||
xpath = os.path.join(self.base_dir, *(i.get('href', '').split('/')))
|
xpath = os.path.join(self.base_dir, *(i.get('href', '').split('/')))
|
||||||
if os.path.abspath(xpath) == path:
|
if os.path.abspath(xpath) == path:
|
||||||
has_path = True
|
return i.get('id')
|
||||||
break
|
href = os.path.relpath(path, self.base_dir).replace(os.sep, '/')
|
||||||
if not has_path:
|
item = self.create_manifest_item(href, media_type)
|
||||||
href = os.path.relpath(path, self.base_dir).replace(os.sep, '/')
|
manifest = self.manifest_ppath(self.root)[0]
|
||||||
item = self.create_manifest_item(href, media_type)
|
manifest.append(item)
|
||||||
manifest = self.manifest_ppath(self.root)[0]
|
self.manifest.append_from_opf_manifest_item(item, self.basedir)
|
||||||
manifest.append(item)
|
return item.get('id')
|
||||||
|
|
||||||
def iterspine(self):
|
def iterspine(self):
|
||||||
return self.spine_path(self.root)
|
return self.spine_path(self.root)
|
||||||
@ -1184,6 +1185,20 @@ class OPF(object): # {{{
|
|||||||
if mt and 'xml' not in mt and 'html' not in mt:
|
if mt and 'xml' not in mt and 'html' not in mt:
|
||||||
return item.get('href', None)
|
return item.get('href', None)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def epub3_nav(self):
|
||||||
|
if self.package_version >= 3.0:
|
||||||
|
for item in self.itermanifest():
|
||||||
|
props = (item.get('properties') or '').lower().split()
|
||||||
|
if 'nav' in props:
|
||||||
|
mt = item.get('media-type') or ''
|
||||||
|
if 'html' in mt.lower():
|
||||||
|
mid = item.get('id')
|
||||||
|
if mid:
|
||||||
|
path = self.manifest.path_for_id(mid)
|
||||||
|
if path and os.path.exists(path):
|
||||||
|
return path
|
||||||
|
|
||||||
@dynamic_property
|
@dynamic_property
|
||||||
def cover(self):
|
def cover(self):
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user