diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index d5b013fb72..abac6bd7d1 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -15,7 +15,7 @@ from functools import partial from lxml import etree from calibre import __version__ -from calibre.ebooks.oeb.base import XPath, uuid_id, xml2text, NCX, NCX_NS, XML +from calibre.ebooks.oeb.base import XPath, uuid_id, xml2text, NCX, NCX_NS, XML, XHTML from calibre.ebooks.oeb.polish.container import guess_type from calibre.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1 @@ -43,6 +43,9 @@ class TOC(object): for c in self.children: yield c + def __len__(self): + return len(self.children) + def iterdescendants(self): for child in self: yield child @@ -169,6 +172,62 @@ def get_toc(container, verify_destinations=True): verify_toc_destinations(container, ans) return ans +def ensure_id(elem): + if elem.tag == XHTML('a'): + anchor = elem.get('name', None) + if anchor: + return False, anchor + elem_id = elem.get('id', None) + if elem_id: + return False, elem_id + elem.set('id', uuid_id()) + return True, elem.get('id') + +def elem_to_toc_text(elem): + text = xml2text(elem).strip() + if not text: + text = elem.get('title', '') + if not text: + text = elem.get('alt', '') + text = re.sub(r'\s+', ' ', text.strip()) + text = text[:1000].strip() + return text + +def from_xpaths(container, xpaths): + tocroot = TOC() + xpaths = [XPath(xp) for xp in xpaths] + level_prev = {i+1:None for i in xrange(len(xpaths))} + level_prev[0] = tocroot + + for spinepath in container.spine_items: + name = container.abspath_to_name(spinepath) + root = container.parsed(name) + level_item_map = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)} + item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems} + item_dirtied = False + + for item in root.iterdescendants(etree.Element): + lvl = plvl = item_level_map.get(item, None) + if lvl is None: + continue + parent = None + while parent is None: + plvl -= 1 + parent = level_prev[plvl] + lvl = plvl + 1 + dirtied, elem_id = ensure_id(item) + text = elem_to_toc_text(item) + item_dirtied = dirtied or item_dirtied + toc = parent.add(text, name, elem_id) + toc.dest_exists = True + level_prev[lvl] = toc + for i in xrange(lvl+1, len(xpaths)+1): + level_prev[i] = None + + if item_dirtied: + container.commit_item(name, keep_parsed=True) + + return tocroot def add_id(container, name, loc): root = container.parsed(name) diff --git a/src/calibre/gui2/toc/main.py b/src/calibre/gui2/toc/main.py index 63aad654ad..d0e2a8e0f6 100644 --- a/src/calibre/gui2/toc/main.py +++ b/src/calibre/gui2/toc/main.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import sys, os +import sys, os, textwrap from threading import Thread from functools import partial @@ -17,7 +17,8 @@ from PyQt4.Qt import (QPushButton, QFrame, QVariant, QToolButton, QItemSelectionModel) from calibre.ebooks.oeb.polish.container import get_container, AZW3Container -from calibre.ebooks.oeb.polish.toc import get_toc, add_id, TOC, commit_toc +from calibre.ebooks.oeb.polish.toc import ( + get_toc, add_id, TOC, commit_toc, from_xpaths) from calibre.gui2 import Application, error_dialog, gprefs from calibre.gui2.progress_indicator import ProgressIndicator from calibre.gui2.toc.location import ItemEdit @@ -31,6 +32,7 @@ class ItemView(QFrame): # {{{ delete_item = pyqtSignal() flatten_item = pyqtSignal() go_to_root = pyqtSignal() + create_from_xpath = pyqtSignal(object) def __init__(self, parent): QFrame.__init__(self, parent) @@ -60,6 +62,25 @@ class ItemView(QFrame): # {{{ self.add_new_to_root_button = b = QPushButton(_('Create a &new entry')) b.clicked.connect(self.add_new_to_root) l.addWidget(b) + l.addStretch() + + self.cfmhb = b = QPushButton(_('Generate ToC from &major headings')) + b.clicked.connect(self.create_from_major_headings) + b.setToolTip(textwrap.fill(_( + 'Generate a Table of Contents from the major headings in the book.' + ' This will work if the book identifies its headings using HTML' + ' heading tags. Uses the

,

and

tags.'))) + l.addWidget(b) + self.cfmab = b = QPushButton(_('Generate ToC from &all headings')) + b.clicked.connect(self.create_from_all_headings) + b.setToolTip(textwrap.fill(_( + 'Generate a Table of Contents from all the headings in the book.' + ' This will work if the book identifies its headings using HTML' + ' heading tags. Uses the tags.'))) + l.addWidget(b) + + + l.addStretch() self.w1 = la = QLabel(_('WARNING: calibre only supports the ' 'creation of linear ToCs in AZW3 files. In a ' @@ -133,7 +154,7 @@ class ItemView(QFrame): # {{{ b.setToolTip(_('All children of this entry are brought to the same ' 'level as this entry.')) l.addWidget(b, l.rowCount()+1, 0, 1, 2) - ip.b4 = b = QPushButton(QIcon(I('back.png')), _('&Return to root')) + ip.b4 = b = QPushButton(QIcon(I('back.png')), _('&Return to welcome screen')) b.clicked.connect(self.go_to_root) b.setToolTip(_('Go back to the top level view')) l.addWidget(b, l.rowCount()+1, 0, 1, 2) @@ -147,6 +168,12 @@ class ItemView(QFrame): # {{{ self.w2.setWordWrap(True) l.addWidget(la, l.rowCount(), 0, 1, 2) + def create_from_major_headings(self): + self.create_from_xpath.emit(['//h:h%d'%i for i in xrange(1, 4)]) + + def create_from_all_headings(self): + self.create_from_xpath.emit(['//h:h%d'%i for i in xrange(1, 7)]) + def hide_azw3_warning(self): self.w1.setVisible(False), self.w2.setVisible(False) @@ -242,6 +269,7 @@ class TOCView(QWidget): # {{{ self.item_view = i = ItemView(self) self.item_view.delete_item.connect(self.delete_current_item) i.add_new_item.connect(self.add_new_item) + i.create_from_xpath.connect(self.create_from_xpath) i.flatten_item.connect(self.flatten_item) i.go_to_root.connect(self.go_to_root) l.addWidget(i, 0, 4, col, 1) @@ -443,6 +471,25 @@ class TOCView(QWidget): # {{{ process_node(self.tocw.invisibleRootItem(), root) return root + def insert_toc_fragment(self, toc): + + def process_node(root, tocparent, added): + for child in tocparent: + item = self.create_item(root, child) + added.append(item) + process_node(item, child, added) + + nodes = [] + process_node(self.root, toc, nodes) + self.highlight_item(nodes[0]) + + def create_from_xpath(self, xpaths): + toc = from_xpaths(self.ebook, xpaths) + if len(toc) == 0: + return error_dialog(self, _('No items found'), + _('No items were found that could be added to the Table of Contents.'), show=True) + self.insert_toc_fragment(toc) + # }}} class TOCEditor(QDialog): # {{{