Edit Book: Add support for EPUB 3 to the Table of Contents tool. Now when editing EPUB 3 files using the tool will automatically generate both the EPUB 3 navigation document and the NCX table of contents for backwards compatibility

This commit is contained in:
Kovid Goyal 2016-06-15 18:00:50 +05:30
parent e622cee5ed
commit 16870db206
8 changed files with 203 additions and 38 deletions

View File

@ -0,0 +1,9 @@
<?xml version='1.0' encoding='utf-8'?>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
<head>
<title>Navigation</title>
</head>
<body>
</body>
</html>

View File

@ -7,12 +7,26 @@ __docformat__ = 'restructuredtext en'
Provides abstraction for metadata reading.writing from a variety of ebook formats. Provides abstraction for metadata reading.writing from a variety of ebook formats.
""" """
import os, sys, re import os, sys, re
from collections import namedtuple
from urlparse import urlparse from urlparse import urlparse
from calibre import relpath, guess_type, remove_bracketed_text, prints from calibre import relpath, guess_type, remove_bracketed_text, prints
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
OPFVersion = namedtuple('OPFVersion', 'major minor patch')
def parse_opf_version(raw):
try:
v = list(map(int, raw.split('.')))
except Exception:
v = [2, 0, 0]
while len(v) < 3:
v.append(0)
v = v[:3]
return OPFVersion(*v)
try: try:
_author_pat = re.compile(tweaks['authors_split_regex']) _author_pat = re.compile(tweaks['authors_split_regex'])
except: except:

View File

@ -24,6 +24,7 @@ from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.conversion.plugins.epub_input import ( from calibre.ebooks.conversion.plugins.epub_input import (
ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data) ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data)
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, CSSPreProcessor as cssp from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, CSSPreProcessor as cssp
from calibre.ebooks.metadata import parse_opf_version
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.headers import MetadataHeader
from calibre.ebooks.mobi.tweak import set_cover from calibre.ebooks.mobi.tweak import set_cover
@ -585,6 +586,11 @@ class Container(ContainerBase): # {{{
except IndexError: except IndexError:
return '' return ''
@property
def opf_version_parsed(self):
' The version set on the OPF\'s <package> element as a tuple of integers '
return parse_opf_version(self.opf_version)
@property @property
def manifest_id_map(self): def manifest_id_map(self):
' Mapping of manifest id to canonical names ' ' Mapping of manifest id to canonical names '
@ -600,6 +606,14 @@ class Container(ContainerBase): # {{{
item.get('href'), self.opf_name)) item.get('href'), self.opf_name))
return {mt:tuple(v) for mt, v in ans.iteritems()} return {mt:tuple(v) for mt, v in ans.iteritems()}
def manifest_items_with_property(self, property_name):
' All manifest items that have the specified property '
q = property_name.lower()
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @properties]'):
props = (item.get('properties') or '').lower().split()
if q in props:
yield self.href_to_name(item.get('href'), self.opf_name)
@property @property
def guide_type_map(self): def guide_type_map(self):
' Mapping of guide type to canonical name ' ' Mapping of guide type to canonical name '

View File

@ -15,7 +15,7 @@ from calibre.spell.dictionary import parse_lang_code
from calibre.ebooks.oeb.base import barename from calibre.ebooks.oeb.base import barename
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container
from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.polish.toc import find_existing_toc from calibre.ebooks.oeb.polish.toc import find_existing_ncx_toc, find_existing_nav_toc
_patterns = None _patterns = None
@ -177,9 +177,10 @@ def group_sort(locations):
def get_checkable_file_names(container): def get_checkable_file_names(container):
file_names = [name for name, linear in container.spine_names] + [container.opf_name] file_names = [name for name, linear in container.spine_names] + [container.opf_name]
toc = find_existing_toc(container) for f in (find_existing_ncx_toc, find_existing_nav_toc):
if toc is not None and container.exists(toc): toc = f(container)
file_names.append(toc) if toc is not None and container.exists(toc) and toc not in file_names:
file_names.append(toc)
return file_names, toc return file_names, toc
def get_all_words(container, book_locale, get_word_count=False): def get_all_words(container, book_locale, get_word_count=False):

View File

@ -11,13 +11,15 @@ import re
from urlparse import urlparse from urlparse import urlparse
from collections import Counter, OrderedDict from collections import Counter, OrderedDict
from functools import partial from functools import partial
from future_builtins import map
from operator import itemgetter from operator import itemgetter
from lxml import etree from lxml import etree
from lxml.builder import ElementMaker from lxml.builder import ElementMaker
from calibre import __version__ from calibre import __version__
from calibre.ebooks.oeb.base import XPath, uuid_id, xml2text, NCX, NCX_NS, XML, XHTML, XHTML_NS, serialize from calibre.ebooks.oeb.base import (
XPath, uuid_id, xml2text, NCX, NCX_NS, XML, XHTML, XHTML_NS, serialize, EPUB_NS)
from calibre.ebooks.oeb.polish.errors import MalformedMarkup from calibre.ebooks.oeb.polish.errors import MalformedMarkup
from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.polish.opf import set_guide_item, get_book_language from calibre.ebooks.oeb.polish.opf import set_guide_item, get_book_language
@ -32,6 +34,8 @@ ns['lower-case'] = lambda c, x: x.lower() if hasattr(x, 'lower') else x
class TOC(object): class TOC(object):
toc_title = None
def __init__(self, title=None, dest=None, frag=None): def __init__(self, title=None, dest=None, frag=None):
self.title, self.dest, self.frag = title, dest, frag self.title, self.dest, self.frag = title, dest, frag
self.dest_exists = self.dest_error = None self.dest_exists = self.dest_error = None
@ -150,6 +154,47 @@ def parse_ncx(container, ncx_name):
break break
return toc_root return toc_root
def add_from_li(container, li, parent, ncx_name):
dest = frag = text = None
for x in li.iterchildren(XHTML('a'), XHTML('span')):
text = etree.tostring(x, method='text', encoding=unicode, with_tail=False) or ' '.join('descendant-or-self::*/@title')
href = x.get('href')
if href:
dest = container.href_to_name(href, base=ncx_name)
frag = urlparse(href).fragment or None
break
return parent.add(text or None, dest or None, frag or None)
def first_child(parent, tagname):
try:
return next(parent.iterchildren(tagname))
except StopIteration:
return None
def process_nav_node(container, node, toc_parent, nav_name):
for li in node.iterchildren(XHTML('li')):
child = add_from_li(container, li, toc_parent, nav_name)
ol = first_child(li, XHTML('ol'))
if child is not None and ol is not None:
process_nav_node(container, ol, child, nav_name)
def parse_nav(container, nav_name):
root = container.parsed(nav_name)
toc_root = TOC()
toc_root.lang = toc_root.uid = None
et = '{%s}type' % EPUB_NS
for nav in root.iterdescendants(XHTML('nav')):
if nav.get(et) == 'toc':
ol = first_child(nav, XHTML('ol'))
if ol is not None:
process_nav_node(container, ol, toc_root, nav_name)
for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
text = etree.tostring(h, method='text', encoding=unicode, with_tail=False) or h.get('title')
if text:
toc_root.toc_title = text
break
break
return toc_root
def verify_toc_destinations(container, toc): def verify_toc_destinations(container, toc):
anchor_map = {} anchor_map = {}
@ -181,31 +226,38 @@ def verify_toc_destinations(container, toc):
'The anchor %(a)s does not exist in file %(f)s')%dict( 'The anchor %(a)s does not exist in file %(f)s')%dict(
a=item.frag, f=name) a=item.frag, f=name)
def find_existing_ncx_toc(container):
def find_existing_toc(container):
toc = container.opf_xpath('//opf:spine/@toc') toc = container.opf_xpath('//opf:spine/@toc')
if toc: if toc:
toc = container.manifest_id_map.get(toc[0], None) toc = container.manifest_id_map.get(toc[0], None)
if not toc: if not toc:
ncx = guess_type('a.ncx') ncx = guess_type('a.ncx')
toc = container.manifest_type_map.get(ncx, [None])[0] toc = container.manifest_type_map.get(ncx, [None])[0]
if not toc: return toc or None
return None
return toc
def find_existing_nav_toc(container):
for name in container.manifest_items_with_property('nav'):
return name
def get_toc(container, verify_destinations=True): def get_x_toc(container, find_toc, parse_toc, verify_destinations=True):
toc = find_existing_toc(container) def empty_toc():
if toc is None or not container.has_name(toc):
ans = TOC() ans = TOC()
ans.lang = ans.uid = ans.toc_file_name = None ans.lang = ans.uid = None
return ans return ans
ans = parse_ncx(container, toc) toc = find_toc(container)
ans.toc_file_name = toc ans = empty_toc() if toc is None or not container.has_name(toc) else parse_toc(container, toc)
ans.toc_file_name = toc if toc and container.has_name(toc) else None
if verify_destinations: if verify_destinations:
verify_toc_destinations(container, ans) verify_toc_destinations(container, ans)
return ans return ans
def get_toc(container, verify_destinations=True):
ver = container.opf_version_parsed
if ver.major < 3:
return get_x_toc(container, find_existing_ncx_toc, parse_ncx, verify_destinations=verify_destinations)
else:
return get_x_toc(container, find_existing_nav_toc, parse_nav, verify_destinations=verify_destinations)
def ensure_id(elem): def ensure_id(elem):
if elem.tag == XHTML('a'): if elem.tag == XHTML('a'):
anchor = elem.get('name', None) anchor = elem.get('name', None)
@ -452,12 +504,13 @@ def create_ncx(toc, to_href, btitle, lang, uid):
return ncx return ncx
def commit_toc(container, toc, lang=None, uid=None): def commit_ncx_toc(container, toc, lang=None, uid=None):
tocname = find_existing_toc(container) tocname = find_existing_ncx_toc(container)
if tocname is None: if tocname is None:
item = container.generate_item('toc.ncx', id_prefix='toc') item = container.generate_item('toc.ncx', id_prefix='toc')
tocname = container.href_to_name(item.get('href'), tocname = container.href_to_name(item.get('href'), base=container.opf_name)
base=container.opf_name) ncx_id = item.get('id')
[s.set('toc', ncx_id) for s in container.opf_xpath('//opf:spine')]
if not lang: if not lang:
lang = get_lang() lang = get_lang()
for l in container.opf_xpath('//dc:language'): for l in container.opf_xpath('//dc:language'):
@ -486,21 +539,87 @@ def commit_toc(container, toc, lang=None, uid=None):
container.replace(tocname, root) container.replace(tocname, root)
container.pretty_print.add(tocname) container.pretty_print.add(tocname)
def commit_nav_toc(container, toc, lang=None):
from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree
tocname = find_existing_nav_toc(container)
if tocname is None:
item = container.generate_item('nav.html', id_prefix='nav')
item.set('properties', 'nav')
tocname = container.href_to_name(item.get('href'), base=container.opf_name)
try:
root = container.parsed(tocname)
except KeyError:
root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
et = '{%s}type' % EPUB_NS
navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == 'toc']
for x in navs[1:]:
x.getparent().remove(x)
if navs:
nav = navs[0]
tail = nav.tail
attrib = dict(nav.attrib)
nav.clear()
nav.attrib.update(attrib)
nav.tail = tail
else:
nav = root.makeelement(XHTML('nav'))
first_child(root, XHTML('body')).append(nav)
nav.set('{%s}type' % EPUB_NS, 'toc')
if toc.toc_title:
nav.append(nav.makeelement(XHTML('h1')))
nav[-1].text = toc.toc_title
rnode = nav.makeelement(XHTML('ol'))
nav.append(rnode)
to_href = partial(container.name_to_href, base=tocname)
spat = re.compile(r'\s+')
def process_node(xml_parent, toc_parent):
for child in toc_parent:
li = xml_parent.makeelement(XHTML('li'))
xml_parent.append(li)
title = child.title or ''
title = spat.sub(' ', title).strip()
a = li.makeelement(XHTML('a' if child.dest else 'span'))
a.text = title
li.append(a)
if child.dest:
href = to_href(child.dest)
if child.frag:
href += '#'+child.frag
a.set('href', href)
if len(child):
ol = li.makeelement(XHTML('ol'))
li.append(ol)
process_node(ol, child)
process_node(rnode, toc)
pretty_xml_tree(rnode)
container.replace(tocname, root)
def commit_toc(container, toc, lang=None, uid=None):
commit_ncx_toc(container, toc, lang=lang, uid=uid)
if container.opf_version_parsed.major > 2:
commit_nav_toc(container, toc, lang=lang)
def remove_names_from_toc(container, names): def remove_names_from_toc(container, names):
toc = get_toc(container) changed = []
if len(toc) == 0:
return False
remove = []
names = frozenset(names) names = frozenset(names)
for node in toc.iterdescendants(): for find_toc, parse_toc, commit_toc in (
if node.dest in names: (find_existing_ncx_toc, parse_ncx, commit_ncx_toc),
remove.append(node) (find_existing_nav_toc, parse_nav, commit_nav_toc),
if remove: ):
for node in reversed(remove): toc = get_x_toc(container, find_toc, parse_toc, verify_destinations=False)
node.remove_from_parent() if len(toc) > 0:
commit_toc(container, toc) remove = []
return True for node in toc.iterdescendants():
return False if node.dest in names:
remove.append(node)
if remove:
for node in reversed(remove):
node.remove_from_parent()
commit_toc(container, toc)
changed.append(find_toc(container))
return changed
def find_inline_toc(container): def find_inline_toc(container):
for name, linear in container.spine_names: for name, linear in container.spine_names:

View File

@ -575,6 +575,7 @@ class TOCView(QWidget): # {{{
def __init__(self, parent, prefs): def __init__(self, parent, prefs):
QWidget.__init__(self, parent) QWidget.__init__(self, parent)
self.toc_title = None
self.prefs = prefs self.prefs = prefs
l = self.l = QGridLayout() l = self.l = QGridLayout()
self.setLayout(l) self.setLayout(l)
@ -748,6 +749,7 @@ class TOCView(QWidget): # {{{
self.item_view.hide_azw3_warning() self.item_view.hide_azw3_warning()
self.toc = get_toc(self.ebook) self.toc = get_toc(self.ebook)
self.toc_lang, self.toc_uid = self.toc.lang, self.toc.uid self.toc_lang, self.toc_uid = self.toc.lang, self.toc.uid
self.toc_title = self.toc.toc_title
self.blank = QIcon(I('blank.png')) self.blank = QIcon(I('blank.png'))
self.ok = QIcon(I('ok.png')) self.ok = QIcon(I('ok.png'))
self.err = QIcon(I('dot_red.png')) self.err = QIcon(I('dot_red.png'))
@ -972,6 +974,7 @@ class TOCEditor(QDialog): # {{{
tb = None tb = None
try: try:
toc = self.toc_view.create_toc() toc = self.toc_view.create_toc()
toc.toc_title = getattr(self.toc_view, 'toc_title', None)
commit_toc(self.ebook, toc, lang=self.toc_view.toc_lang, commit_toc(self.ebook, toc, lang=self.toc_view.toc_lang,
uid=self.toc_view.toc_uid) uid=self.toc_view.toc_uid)
self.ebook.commit() self.ebook.commit()

View File

@ -25,7 +25,7 @@ from calibre.ebooks.oeb.polish.css import filter_css
from calibre.ebooks.oeb.polish.pretty import fix_all_html, pretty_all from calibre.ebooks.oeb.polish.pretty import fix_all_html, pretty_all
from calibre.ebooks.oeb.polish.replace import rename_files, replace_file, get_recommended_folders, rationalize_folders from calibre.ebooks.oeb.polish.replace import rename_files, replace_file, get_recommended_folders, rationalize_folders
from calibre.ebooks.oeb.polish.split import split, merge, AbortError, multisplit from calibre.ebooks.oeb.polish.split import split, merge, AbortError, multisplit
from calibre.ebooks.oeb.polish.toc import remove_names_from_toc, find_existing_toc, create_inline_toc from calibre.ebooks.oeb.polish.toc import remove_names_from_toc, create_inline_toc
from calibre.ebooks.oeb.polish.utils import link_stylesheets, setup_cssutils_serialization as scs from calibre.ebooks.oeb.polish.utils import link_stylesheets, setup_cssutils_serialization as scs
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog, choose_save_file, open_url, choose_dir from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog, choose_save_file, open_url, choose_dir
from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.confirm_delete import confirm
@ -397,11 +397,12 @@ class Boss(QObject):
if not editors: if not editors:
self.gui.preview.clear() self.gui.preview.clear()
self.gui.live_css.clear() self.gui.live_css.clear()
if remove_names_from_toc(current_container(), spine_names + list(other_items)): changed = remove_names_from_toc(current_container(), spine_names + list(other_items))
if changed:
self.gui.toc_view.update_if_visible() self.gui.toc_view.update_if_visible()
toc = find_existing_toc(current_container()) for toc in changed:
if toc and toc in editors: if toc and toc in editors:
editors[toc].replace_data(c.raw_data(toc)) editors[toc].replace_data(c.raw_data(toc))
if c.opf_name in editors: if c.opf_name in editors:
editors[c.opf_name].replace_data(c.raw_data(c.opf_name)) editors[c.opf_name].replace_data(c.raw_data(c.opf_name))
@ -514,6 +515,7 @@ class Boss(QObject):
self.set_modified() self.set_modified()
self.update_editors_from_container() self.update_editors_from_container()
self.gui.toc_view.update_if_visible() self.gui.toc_view.update_if_visible()
self.gui.file_list.build(current_container())
def insert_inline_toc(self): def insert_inline_toc(self):
self.commit_all_editors_to_container() self.commit_all_editors_to_container()

View File

@ -95,6 +95,7 @@ class TOCEditor(QDialog):
def write_toc(self): def write_toc(self):
toc = self.toc_view.create_toc() toc = self.toc_view.create_toc()
toc.toc_title = getattr(self.toc_view, 'toc_title', None)
commit_toc(current_container(), toc, lang=self.toc_view.toc_lang, commit_toc(current_container(), toc, lang=self.toc_view.toc_lang,
uid=self.toc_view.toc_uid) uid=self.toc_view.toc_uid)
@ -115,6 +116,7 @@ class TOCViewer(QWidget):
def __init__(self, parent=None): def __init__(self, parent=None):
QWidget.__init__(self, parent) QWidget.__init__(self, parent)
self.l = l = QGridLayout(self) self.l = l = QGridLayout(self)
self.toc_title = None
self.setLayout(l) self.setLayout(l)
l.setContentsMargins(0, 0, 0, 0) l.setContentsMargins(0, 0, 0, 0)
@ -194,6 +196,7 @@ class TOCViewer(QWidget):
return return
toc = get_toc(c, verify_destinations=False) toc = get_toc(c, verify_destinations=False)
self.toc_name = getattr(toc, 'toc_file_name', None) self.toc_name = getattr(toc, 'toc_file_name', None)
self.toc_title = toc.toc_title
def process_node(toc, parent): def process_node(toc, parent):
for child in toc: for child in toc: