Edit Book: Add support for EPUB 3 to the Table of Contents tool. Now when editing EPUB 3 files using the tool will automatically generate both the EPUB 3 navigation document and the NCX table of contents for backwards compatibility

This commit is contained in:
Kovid Goyal 2016-06-15 18:00:50 +05:30
parent e622cee5ed
commit 16870db206
8 changed files with 203 additions and 38 deletions

View File

@ -0,0 +1,9 @@
<?xml version='1.0' encoding='utf-8'?>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
<head>
<title>Navigation</title>
</head>
<body>
</body>
</html>

View File

@ -7,12 +7,26 @@ __docformat__ = 'restructuredtext en'
Provides abstraction for metadata reading.writing from a variety of ebook formats.
"""
import os, sys, re
from collections import namedtuple
from urlparse import urlparse
from calibre import relpath, guess_type, remove_bracketed_text, prints
from calibre.utils.config_base import tweaks
OPFVersion = namedtuple('OPFVersion', 'major minor patch')
def parse_opf_version(raw):
try:
v = list(map(int, raw.split('.')))
except Exception:
v = [2, 0, 0]
while len(v) < 3:
v.append(0)
v = v[:3]
return OPFVersion(*v)
try:
_author_pat = re.compile(tweaks['authors_split_regex'])
except:

View File

@ -24,6 +24,7 @@ from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.conversion.plugins.epub_input import (
ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data)
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, CSSPreProcessor as cssp
from calibre.ebooks.metadata import parse_opf_version
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader.headers import MetadataHeader
from calibre.ebooks.mobi.tweak import set_cover
@ -585,6 +586,11 @@ class Container(ContainerBase): # {{{
except IndexError:
return ''
@property
def opf_version_parsed(self):
' The version set on the OPF\'s <package> element as a tuple of integers '
return parse_opf_version(self.opf_version)
@property
def manifest_id_map(self):
' Mapping of manifest id to canonical names '
@ -600,6 +606,14 @@ class Container(ContainerBase): # {{{
item.get('href'), self.opf_name))
return {mt:tuple(v) for mt, v in ans.iteritems()}
def manifest_items_with_property(self, property_name):
' All manifest items that have the specified property '
q = property_name.lower()
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @properties]'):
props = (item.get('properties') or '').lower().split()
if q in props:
yield self.href_to_name(item.get('href'), self.opf_name)
@property
def guide_type_map(self):
' Mapping of guide type to canonical name '

View File

@ -15,7 +15,7 @@ from calibre.spell.dictionary import parse_lang_code
from calibre.ebooks.oeb.base import barename
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container
from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.polish.toc import find_existing_toc
from calibre.ebooks.oeb.polish.toc import find_existing_ncx_toc, find_existing_nav_toc
_patterns = None
@ -177,8 +177,9 @@ def group_sort(locations):
def get_checkable_file_names(container):
file_names = [name for name, linear in container.spine_names] + [container.opf_name]
toc = find_existing_toc(container)
if toc is not None and container.exists(toc):
for f in (find_existing_ncx_toc, find_existing_nav_toc):
toc = f(container)
if toc is not None and container.exists(toc) and toc not in file_names:
file_names.append(toc)
return file_names, toc

View File

@ -11,13 +11,15 @@ import re
from urlparse import urlparse
from collections import Counter, OrderedDict
from functools import partial
from future_builtins import map
from operator import itemgetter
from lxml import etree
from lxml.builder import ElementMaker
from calibre import __version__
from calibre.ebooks.oeb.base import XPath, uuid_id, xml2text, NCX, NCX_NS, XML, XHTML, XHTML_NS, serialize
from calibre.ebooks.oeb.base import (
XPath, uuid_id, xml2text, NCX, NCX_NS, XML, XHTML, XHTML_NS, serialize, EPUB_NS)
from calibre.ebooks.oeb.polish.errors import MalformedMarkup
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.polish.opf import set_guide_item, get_book_language
@ -32,6 +34,8 @@ ns['lower-case'] = lambda c, x: x.lower() if hasattr(x, 'lower') else x
class TOC(object):
toc_title = None
def __init__(self, title=None, dest=None, frag=None):
self.title, self.dest, self.frag = title, dest, frag
self.dest_exists = self.dest_error = None
@ -150,6 +154,47 @@ def parse_ncx(container, ncx_name):
break
return toc_root
def add_from_li(container, li, parent, ncx_name):
dest = frag = text = None
for x in li.iterchildren(XHTML('a'), XHTML('span')):
text = etree.tostring(x, method='text', encoding=unicode, with_tail=False) or ' '.join('descendant-or-self::*/@title')
href = x.get('href')
if href:
dest = container.href_to_name(href, base=ncx_name)
frag = urlparse(href).fragment or None
break
return parent.add(text or None, dest or None, frag or None)
def first_child(parent, tagname):
try:
return next(parent.iterchildren(tagname))
except StopIteration:
return None
def process_nav_node(container, node, toc_parent, nav_name):
for li in node.iterchildren(XHTML('li')):
child = add_from_li(container, li, toc_parent, nav_name)
ol = first_child(li, XHTML('ol'))
if child is not None and ol is not None:
process_nav_node(container, ol, child, nav_name)
def parse_nav(container, nav_name):
root = container.parsed(nav_name)
toc_root = TOC()
toc_root.lang = toc_root.uid = None
et = '{%s}type' % EPUB_NS
for nav in root.iterdescendants(XHTML('nav')):
if nav.get(et) == 'toc':
ol = first_child(nav, XHTML('ol'))
if ol is not None:
process_nav_node(container, ol, toc_root, nav_name)
for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
text = etree.tostring(h, method='text', encoding=unicode, with_tail=False) or h.get('title')
if text:
toc_root.toc_title = text
break
break
return toc_root
def verify_toc_destinations(container, toc):
anchor_map = {}
@ -181,31 +226,38 @@ def verify_toc_destinations(container, toc):
'The anchor %(a)s does not exist in file %(f)s')%dict(
a=item.frag, f=name)
def find_existing_toc(container):
def find_existing_ncx_toc(container):
toc = container.opf_xpath('//opf:spine/@toc')
if toc:
toc = container.manifest_id_map.get(toc[0], None)
if not toc:
ncx = guess_type('a.ncx')
toc = container.manifest_type_map.get(ncx, [None])[0]
if not toc:
return None
return toc
return toc or None
def find_existing_nav_toc(container):
for name in container.manifest_items_with_property('nav'):
return name
def get_toc(container, verify_destinations=True):
toc = find_existing_toc(container)
if toc is None or not container.has_name(toc):
def get_x_toc(container, find_toc, parse_toc, verify_destinations=True):
def empty_toc():
ans = TOC()
ans.lang = ans.uid = ans.toc_file_name = None
ans.lang = ans.uid = None
return ans
ans = parse_ncx(container, toc)
ans.toc_file_name = toc
toc = find_toc(container)
ans = empty_toc() if toc is None or not container.has_name(toc) else parse_toc(container, toc)
ans.toc_file_name = toc if toc and container.has_name(toc) else None
if verify_destinations:
verify_toc_destinations(container, ans)
return ans
def get_toc(container, verify_destinations=True):
ver = container.opf_version_parsed
if ver.major < 3:
return get_x_toc(container, find_existing_ncx_toc, parse_ncx, verify_destinations=verify_destinations)
else:
return get_x_toc(container, find_existing_nav_toc, parse_nav, verify_destinations=verify_destinations)
def ensure_id(elem):
if elem.tag == XHTML('a'):
anchor = elem.get('name', None)
@ -452,12 +504,13 @@ def create_ncx(toc, to_href, btitle, lang, uid):
return ncx
def commit_toc(container, toc, lang=None, uid=None):
tocname = find_existing_toc(container)
def commit_ncx_toc(container, toc, lang=None, uid=None):
tocname = find_existing_ncx_toc(container)
if tocname is None:
item = container.generate_item('toc.ncx', id_prefix='toc')
tocname = container.href_to_name(item.get('href'),
base=container.opf_name)
tocname = container.href_to_name(item.get('href'), base=container.opf_name)
ncx_id = item.get('id')
[s.set('toc', ncx_id) for s in container.opf_xpath('//opf:spine')]
if not lang:
lang = get_lang()
for l in container.opf_xpath('//dc:language'):
@ -486,12 +539,78 @@ def commit_toc(container, toc, lang=None, uid=None):
container.replace(tocname, root)
container.pretty_print.add(tocname)
def commit_nav_toc(container, toc, lang=None):
from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree
tocname = find_existing_nav_toc(container)
if tocname is None:
item = container.generate_item('nav.html', id_prefix='nav')
item.set('properties', 'nav')
tocname = container.href_to_name(item.get('href'), base=container.opf_name)
try:
root = container.parsed(tocname)
except KeyError:
root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
et = '{%s}type' % EPUB_NS
navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == 'toc']
for x in navs[1:]:
x.getparent().remove(x)
if navs:
nav = navs[0]
tail = nav.tail
attrib = dict(nav.attrib)
nav.clear()
nav.attrib.update(attrib)
nav.tail = tail
else:
nav = root.makeelement(XHTML('nav'))
first_child(root, XHTML('body')).append(nav)
nav.set('{%s}type' % EPUB_NS, 'toc')
if toc.toc_title:
nav.append(nav.makeelement(XHTML('h1')))
nav[-1].text = toc.toc_title
rnode = nav.makeelement(XHTML('ol'))
nav.append(rnode)
to_href = partial(container.name_to_href, base=tocname)
spat = re.compile(r'\s+')
def process_node(xml_parent, toc_parent):
for child in toc_parent:
li = xml_parent.makeelement(XHTML('li'))
xml_parent.append(li)
title = child.title or ''
title = spat.sub(' ', title).strip()
a = li.makeelement(XHTML('a' if child.dest else 'span'))
a.text = title
li.append(a)
if child.dest:
href = to_href(child.dest)
if child.frag:
href += '#'+child.frag
a.set('href', href)
if len(child):
ol = li.makeelement(XHTML('ol'))
li.append(ol)
process_node(ol, child)
process_node(rnode, toc)
pretty_xml_tree(rnode)
container.replace(tocname, root)
def commit_toc(container, toc, lang=None, uid=None):
commit_ncx_toc(container, toc, lang=lang, uid=uid)
if container.opf_version_parsed.major > 2:
commit_nav_toc(container, toc, lang=lang)
def remove_names_from_toc(container, names):
toc = get_toc(container)
if len(toc) == 0:
return False
remove = []
changed = []
names = frozenset(names)
for find_toc, parse_toc, commit_toc in (
(find_existing_ncx_toc, parse_ncx, commit_ncx_toc),
(find_existing_nav_toc, parse_nav, commit_nav_toc),
):
toc = get_x_toc(container, find_toc, parse_toc, verify_destinations=False)
if len(toc) > 0:
remove = []
for node in toc.iterdescendants():
if node.dest in names:
remove.append(node)
@ -499,8 +618,8 @@ def remove_names_from_toc(container, names):
for node in reversed(remove):
node.remove_from_parent()
commit_toc(container, toc)
return True
return False
changed.append(find_toc(container))
return changed
def find_inline_toc(container):
for name, linear in container.spine_names:

View File

@ -575,6 +575,7 @@ class TOCView(QWidget): # {{{
def __init__(self, parent, prefs):
QWidget.__init__(self, parent)
self.toc_title = None
self.prefs = prefs
l = self.l = QGridLayout()
self.setLayout(l)
@ -748,6 +749,7 @@ class TOCView(QWidget): # {{{
self.item_view.hide_azw3_warning()
self.toc = get_toc(self.ebook)
self.toc_lang, self.toc_uid = self.toc.lang, self.toc.uid
self.toc_title = self.toc.toc_title
self.blank = QIcon(I('blank.png'))
self.ok = QIcon(I('ok.png'))
self.err = QIcon(I('dot_red.png'))
@ -972,6 +974,7 @@ class TOCEditor(QDialog): # {{{
tb = None
try:
toc = self.toc_view.create_toc()
toc.toc_title = getattr(self.toc_view, 'toc_title', None)
commit_toc(self.ebook, toc, lang=self.toc_view.toc_lang,
uid=self.toc_view.toc_uid)
self.ebook.commit()

View File

@ -25,7 +25,7 @@ from calibre.ebooks.oeb.polish.css import filter_css
from calibre.ebooks.oeb.polish.pretty import fix_all_html, pretty_all
from calibre.ebooks.oeb.polish.replace import rename_files, replace_file, get_recommended_folders, rationalize_folders
from calibre.ebooks.oeb.polish.split import split, merge, AbortError, multisplit
from calibre.ebooks.oeb.polish.toc import remove_names_from_toc, find_existing_toc, create_inline_toc
from calibre.ebooks.oeb.polish.toc import remove_names_from_toc, create_inline_toc
from calibre.ebooks.oeb.polish.utils import link_stylesheets, setup_cssutils_serialization as scs
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog, choose_save_file, open_url, choose_dir
from calibre.gui2.dialogs.confirm_delete import confirm
@ -397,9 +397,10 @@ class Boss(QObject):
if not editors:
self.gui.preview.clear()
self.gui.live_css.clear()
if remove_names_from_toc(current_container(), spine_names + list(other_items)):
changed = remove_names_from_toc(current_container(), spine_names + list(other_items))
if changed:
self.gui.toc_view.update_if_visible()
toc = find_existing_toc(current_container())
for toc in changed:
if toc and toc in editors:
editors[toc].replace_data(c.raw_data(toc))
if c.opf_name in editors:
@ -514,6 +515,7 @@ class Boss(QObject):
self.set_modified()
self.update_editors_from_container()
self.gui.toc_view.update_if_visible()
self.gui.file_list.build(current_container())
def insert_inline_toc(self):
self.commit_all_editors_to_container()

View File

@ -95,6 +95,7 @@ class TOCEditor(QDialog):
def write_toc(self):
toc = self.toc_view.create_toc()
toc.toc_title = getattr(self.toc_view, 'toc_title', None)
commit_toc(current_container(), toc, lang=self.toc_view.toc_lang,
uid=self.toc_view.toc_uid)
@ -115,6 +116,7 @@ class TOCViewer(QWidget):
def __init__(self, parent=None):
QWidget.__init__(self, parent)
self.l = l = QGridLayout(self)
self.toc_title = None
self.setLayout(l)
l.setContentsMargins(0, 0, 0, 0)
@ -194,6 +196,7 @@ class TOCViewer(QWidget):
return
toc = get_toc(c, verify_destinations=False)
self.toc_name = getattr(toc, 'toc_file_name', None)
self.toc_title = toc.toc_title
def process_node(toc, parent):
for child in toc: