diff --git a/resources/templates/new_nav.html b/resources/templates/new_nav.html new file mode 100644 index 0000000000..b4af9de5c3 --- /dev/null +++ b/resources/templates/new_nav.html @@ -0,0 +1,9 @@ + + + + Navigation + + + + + diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index 1ad151d9fc..3bb1c6c948 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -7,12 +7,26 @@ __docformat__ = 'restructuredtext en' Provides abstraction for metadata reading.writing from a variety of ebook formats. """ import os, sys, re +from collections import namedtuple + from urlparse import urlparse from calibre import relpath, guess_type, remove_bracketed_text, prints from calibre.utils.config_base import tweaks +OPFVersion = namedtuple('OPFVersion', 'major minor patch') + +def parse_opf_version(raw): + try: + v = list(map(int, raw.split('.'))) + except Exception: + v = [2, 0, 0] + while len(v) < 3: + v.append(0) + v = v[:3] + return OPFVersion(*v) + try: _author_pat = re.compile(tweaks['authors_split_regex']) except: diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index fb85fc7076..d7bf382970 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -24,6 +24,7 @@ from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.conversion.plugins.epub_input import ( ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data) from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, CSSPreProcessor as cssp +from calibre.ebooks.metadata import parse_opf_version from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.tweak import set_cover @@ -585,6 +586,11 @@ class Container(ContainerBase): # {{{ except IndexError: return '' + @property + def opf_version_parsed(self): + ' The version set on the OPF\'s element as a tuple of integers ' + return parse_opf_version(self.opf_version) + @property def manifest_id_map(self): ' Mapping of manifest id to canonical names ' @@ -600,6 +606,14 @@ class Container(ContainerBase): # {{{ item.get('href'), self.opf_name)) return {mt:tuple(v) for mt, v in ans.iteritems()} + def manifest_items_with_property(self, property_name): + ' All manifest items that have the specified property ' + q = property_name.lower() + for item in self.opf_xpath('//opf:manifest/opf:item[@href and @properties]'): + props = (item.get('properties') or '').lower().split() + if q in props: + yield self.href_to_name(item.get('href'), self.opf_name) + @property def guide_type_map(self): ' Mapping of guide type to canonical name ' diff --git a/src/calibre/ebooks/oeb/polish/spell.py b/src/calibre/ebooks/oeb/polish/spell.py index e1bea5ae34..0824cd2302 100644 --- a/src/calibre/ebooks/oeb/polish/spell.py +++ b/src/calibre/ebooks/oeb/polish/spell.py @@ -15,7 +15,7 @@ from calibre.spell.dictionary import parse_lang_code from calibre.ebooks.oeb.base import barename from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container from calibre.ebooks.oeb.polish.parsing import parse -from calibre.ebooks.oeb.polish.toc import find_existing_toc +from calibre.ebooks.oeb.polish.toc import find_existing_ncx_toc, find_existing_nav_toc _patterns = None @@ -177,9 +177,10 @@ def group_sort(locations): def get_checkable_file_names(container): file_names = [name for name, linear in container.spine_names] + [container.opf_name] - toc = find_existing_toc(container) - if toc is not None and container.exists(toc): - file_names.append(toc) + for f in (find_existing_ncx_toc, find_existing_nav_toc): + toc = f(container) + if toc is not None and container.exists(toc) and toc not in file_names: + file_names.append(toc) return file_names, toc def get_all_words(container, book_locale, get_word_count=False): diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 6486a896f9..188e5526ba 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -11,13 +11,15 @@ import re from urlparse import urlparse from collections import Counter, OrderedDict from functools import partial +from future_builtins import map from operator import itemgetter from lxml import etree from lxml.builder import ElementMaker from calibre import __version__ -from calibre.ebooks.oeb.base import XPath, uuid_id, xml2text, NCX, NCX_NS, XML, XHTML, XHTML_NS, serialize +from calibre.ebooks.oeb.base import ( + XPath, uuid_id, xml2text, NCX, NCX_NS, XML, XHTML, XHTML_NS, serialize, EPUB_NS) from calibre.ebooks.oeb.polish.errors import MalformedMarkup from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.opf import set_guide_item, get_book_language @@ -32,6 +34,8 @@ ns['lower-case'] = lambda c, x: x.lower() if hasattr(x, 'lower') else x class TOC(object): + toc_title = None + def __init__(self, title=None, dest=None, frag=None): self.title, self.dest, self.frag = title, dest, frag self.dest_exists = self.dest_error = None @@ -150,6 +154,47 @@ def parse_ncx(container, ncx_name): break return toc_root +def add_from_li(container, li, parent, ncx_name): + dest = frag = text = None + for x in li.iterchildren(XHTML('a'), XHTML('span')): + text = etree.tostring(x, method='text', encoding=unicode, with_tail=False) or ' '.join('descendant-or-self::*/@title') + href = x.get('href') + if href: + dest = container.href_to_name(href, base=ncx_name) + frag = urlparse(href).fragment or None + break + return parent.add(text or None, dest or None, frag or None) + +def first_child(parent, tagname): + try: + return next(parent.iterchildren(tagname)) + except StopIteration: + return None + +def process_nav_node(container, node, toc_parent, nav_name): + for li in node.iterchildren(XHTML('li')): + child = add_from_li(container, li, toc_parent, nav_name) + ol = first_child(li, XHTML('ol')) + if child is not None and ol is not None: + process_nav_node(container, ol, child, nav_name) + +def parse_nav(container, nav_name): + root = container.parsed(nav_name) + toc_root = TOC() + toc_root.lang = toc_root.uid = None + et = '{%s}type' % EPUB_NS + for nav in root.iterdescendants(XHTML('nav')): + if nav.get(et) == 'toc': + ol = first_child(nav, XHTML('ol')) + if ol is not None: + process_nav_node(container, ol, toc_root, nav_name) + for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())): + text = etree.tostring(h, method='text', encoding=unicode, with_tail=False) or h.get('title') + if text: + toc_root.toc_title = text + break + break + return toc_root def verify_toc_destinations(container, toc): anchor_map = {} @@ -181,31 +226,38 @@ def verify_toc_destinations(container, toc): 'The anchor %(a)s does not exist in file %(f)s')%dict( a=item.frag, f=name) - -def find_existing_toc(container): +def find_existing_ncx_toc(container): toc = container.opf_xpath('//opf:spine/@toc') if toc: toc = container.manifest_id_map.get(toc[0], None) if not toc: ncx = guess_type('a.ncx') toc = container.manifest_type_map.get(ncx, [None])[0] - if not toc: - return None - return toc + return toc or None +def find_existing_nav_toc(container): + for name in container.manifest_items_with_property('nav'): + return name -def get_toc(container, verify_destinations=True): - toc = find_existing_toc(container) - if toc is None or not container.has_name(toc): +def get_x_toc(container, find_toc, parse_toc, verify_destinations=True): + def empty_toc(): ans = TOC() - ans.lang = ans.uid = ans.toc_file_name = None + ans.lang = ans.uid = None return ans - ans = parse_ncx(container, toc) - ans.toc_file_name = toc + toc = find_toc(container) + ans = empty_toc() if toc is None or not container.has_name(toc) else parse_toc(container, toc) + ans.toc_file_name = toc if toc and container.has_name(toc) else None if verify_destinations: verify_toc_destinations(container, ans) return ans +def get_toc(container, verify_destinations=True): + ver = container.opf_version_parsed + if ver.major < 3: + return get_x_toc(container, find_existing_ncx_toc, parse_ncx, verify_destinations=verify_destinations) + else: + return get_x_toc(container, find_existing_nav_toc, parse_nav, verify_destinations=verify_destinations) + def ensure_id(elem): if elem.tag == XHTML('a'): anchor = elem.get('name', None) @@ -452,12 +504,13 @@ def create_ncx(toc, to_href, btitle, lang, uid): return ncx -def commit_toc(container, toc, lang=None, uid=None): - tocname = find_existing_toc(container) +def commit_ncx_toc(container, toc, lang=None, uid=None): + tocname = find_existing_ncx_toc(container) if tocname is None: item = container.generate_item('toc.ncx', id_prefix='toc') - tocname = container.href_to_name(item.get('href'), - base=container.opf_name) + tocname = container.href_to_name(item.get('href'), base=container.opf_name) + ncx_id = item.get('id') + [s.set('toc', ncx_id) for s in container.opf_xpath('//opf:spine')] if not lang: lang = get_lang() for l in container.opf_xpath('//dc:language'): @@ -486,21 +539,87 @@ def commit_toc(container, toc, lang=None, uid=None): container.replace(tocname, root) container.pretty_print.add(tocname) +def commit_nav_toc(container, toc, lang=None): + from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree + tocname = find_existing_nav_toc(container) + if tocname is None: + item = container.generate_item('nav.html', id_prefix='nav') + item.set('properties', 'nav') + tocname = container.href_to_name(item.get('href'), base=container.opf_name) + try: + root = container.parsed(tocname) + except KeyError: + root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8')) + et = '{%s}type' % EPUB_NS + navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == 'toc'] + for x in navs[1:]: + x.getparent().remove(x) + if navs: + nav = navs[0] + tail = nav.tail + attrib = dict(nav.attrib) + nav.clear() + nav.attrib.update(attrib) + nav.tail = tail + else: + nav = root.makeelement(XHTML('nav')) + first_child(root, XHTML('body')).append(nav) + nav.set('{%s}type' % EPUB_NS, 'toc') + if toc.toc_title: + nav.append(nav.makeelement(XHTML('h1'))) + nav[-1].text = toc.toc_title + + rnode = nav.makeelement(XHTML('ol')) + nav.append(rnode) + to_href = partial(container.name_to_href, base=tocname) + spat = re.compile(r'\s+') + + def process_node(xml_parent, toc_parent): + for child in toc_parent: + li = xml_parent.makeelement(XHTML('li')) + xml_parent.append(li) + title = child.title or '' + title = spat.sub(' ', title).strip() + a = li.makeelement(XHTML('a' if child.dest else 'span')) + a.text = title + li.append(a) + if child.dest: + href = to_href(child.dest) + if child.frag: + href += '#'+child.frag + a.set('href', href) + if len(child): + ol = li.makeelement(XHTML('ol')) + li.append(ol) + process_node(ol, child) + process_node(rnode, toc) + pretty_xml_tree(rnode) + container.replace(tocname, root) + +def commit_toc(container, toc, lang=None, uid=None): + commit_ncx_toc(container, toc, lang=lang, uid=uid) + if container.opf_version_parsed.major > 2: + commit_nav_toc(container, toc, lang=lang) + def remove_names_from_toc(container, names): - toc = get_toc(container) - if len(toc) == 0: - return False - remove = [] + changed = [] names = frozenset(names) - for node in toc.iterdescendants(): - if node.dest in names: - remove.append(node) - if remove: - for node in reversed(remove): - node.remove_from_parent() - commit_toc(container, toc) - return True - return False + for find_toc, parse_toc, commit_toc in ( + (find_existing_ncx_toc, parse_ncx, commit_ncx_toc), + (find_existing_nav_toc, parse_nav, commit_nav_toc), + ): + toc = get_x_toc(container, find_toc, parse_toc, verify_destinations=False) + if len(toc) > 0: + remove = [] + for node in toc.iterdescendants(): + if node.dest in names: + remove.append(node) + if remove: + for node in reversed(remove): + node.remove_from_parent() + commit_toc(container, toc) + changed.append(find_toc(container)) + return changed def find_inline_toc(container): for name, linear in container.spine_names: diff --git a/src/calibre/gui2/toc/main.py b/src/calibre/gui2/toc/main.py index 6ed5f5b88a..40ccb96588 100644 --- a/src/calibre/gui2/toc/main.py +++ b/src/calibre/gui2/toc/main.py @@ -575,6 +575,7 @@ class TOCView(QWidget): # {{{ def __init__(self, parent, prefs): QWidget.__init__(self, parent) + self.toc_title = None self.prefs = prefs l = self.l = QGridLayout() self.setLayout(l) @@ -748,6 +749,7 @@ class TOCView(QWidget): # {{{ self.item_view.hide_azw3_warning() self.toc = get_toc(self.ebook) self.toc_lang, self.toc_uid = self.toc.lang, self.toc.uid + self.toc_title = self.toc.toc_title self.blank = QIcon(I('blank.png')) self.ok = QIcon(I('ok.png')) self.err = QIcon(I('dot_red.png')) @@ -972,6 +974,7 @@ class TOCEditor(QDialog): # {{{ tb = None try: toc = self.toc_view.create_toc() + toc.toc_title = getattr(self.toc_view, 'toc_title', None) commit_toc(self.ebook, toc, lang=self.toc_view.toc_lang, uid=self.toc_view.toc_uid) self.ebook.commit() diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py index 81fdb2e28c..aaa4d2488d 100644 --- a/src/calibre/gui2/tweak_book/boss.py +++ b/src/calibre/gui2/tweak_book/boss.py @@ -25,7 +25,7 @@ from calibre.ebooks.oeb.polish.css import filter_css from calibre.ebooks.oeb.polish.pretty import fix_all_html, pretty_all from calibre.ebooks.oeb.polish.replace import rename_files, replace_file, get_recommended_folders, rationalize_folders from calibre.ebooks.oeb.polish.split import split, merge, AbortError, multisplit -from calibre.ebooks.oeb.polish.toc import remove_names_from_toc, find_existing_toc, create_inline_toc +from calibre.ebooks.oeb.polish.toc import remove_names_from_toc, create_inline_toc from calibre.ebooks.oeb.polish.utils import link_stylesheets, setup_cssutils_serialization as scs from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog, choose_save_file, open_url, choose_dir from calibre.gui2.dialogs.confirm_delete import confirm @@ -397,11 +397,12 @@ class Boss(QObject): if not editors: self.gui.preview.clear() self.gui.live_css.clear() - if remove_names_from_toc(current_container(), spine_names + list(other_items)): + changed = remove_names_from_toc(current_container(), spine_names + list(other_items)) + if changed: self.gui.toc_view.update_if_visible() - toc = find_existing_toc(current_container()) - if toc and toc in editors: - editors[toc].replace_data(c.raw_data(toc)) + for toc in changed: + if toc and toc in editors: + editors[toc].replace_data(c.raw_data(toc)) if c.opf_name in editors: editors[c.opf_name].replace_data(c.raw_data(c.opf_name)) @@ -514,6 +515,7 @@ class Boss(QObject): self.set_modified() self.update_editors_from_container() self.gui.toc_view.update_if_visible() + self.gui.file_list.build(current_container()) def insert_inline_toc(self): self.commit_all_editors_to_container() diff --git a/src/calibre/gui2/tweak_book/toc.py b/src/calibre/gui2/tweak_book/toc.py index 4133786595..5b8cbfae91 100644 --- a/src/calibre/gui2/tweak_book/toc.py +++ b/src/calibre/gui2/tweak_book/toc.py @@ -95,6 +95,7 @@ class TOCEditor(QDialog): def write_toc(self): toc = self.toc_view.create_toc() + toc.toc_title = getattr(self.toc_view, 'toc_title', None) commit_toc(current_container(), toc, lang=self.toc_view.toc_lang, uid=self.toc_view.toc_uid) @@ -115,6 +116,7 @@ class TOCViewer(QWidget): def __init__(self, parent=None): QWidget.__init__(self, parent) self.l = l = QGridLayout(self) + self.toc_title = None self.setLayout(l) l.setContentsMargins(0, 0, 0, 0) @@ -194,6 +196,7 @@ class TOCViewer(QWidget): return toc = get_toc(c, verify_destinations=False) self.toc_name = getattr(toc, 'toc_file_name', None) + self.toc_title = toc.toc_title def process_node(toc, parent): for child in toc: