diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index bc8336d768..23191864ff 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -25,9 +25,8 @@ from calibre.ebooks.docx.tables import Tables from calibre.ebooks.docx.footnotes import Footnotes from calibre.ebooks.docx.cleanup import cleanup_markup from calibre.ebooks.docx.theme import Theme +from calibre.ebooks.docx.toc import create_toc from calibre.ebooks.metadata.opf2 import OPFCreator -from calibre.ebooks.metadata.toc import TOC -from calibre.ebooks.oeb.polish.toc import elem_to_toc_text from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 class Text: @@ -267,48 +266,8 @@ class Convert(object): self.styles.resolve_numbering(numbering) - def create_toc(self): - ' Create a TOC from headings in the document ' - root = self.body - headings = ('h1', 'h2', 'h3') - tocroot = TOC() - xpaths = [XPath('//%s' % x) for x in headings] - level_prev = {i+1:None for i in xrange(len(xpaths))} - level_prev[0] = tocroot - level_item_map = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)} - item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems} - - self.idcount = 0 - - def ensure_id(elem): - ans = elem.get('id', None) - if not ans: - self.idcount += 1 - ans = 'toc_id_%d' % self.idcount - elem.set('id', ans) - return ans - - for item in descendants(root, *headings): - lvl = plvl = item_level_map.get(item, None) - if lvl is None: - continue - parent = None - while parent is None: - plvl -= 1 - parent = level_prev[plvl] - lvl = plvl + 1 - elem_id = ensure_id(item) - text = elem_to_toc_text(item) - toc = parent.add_item('index.html', elem_id, text) - level_prev[lvl] = toc - for i in xrange(lvl+1, len(xpaths)+1): - level_prev[i] = None - - if len(tuple(tocroot.flat())) > 1: - return tocroot - def write(self): - toc = self.create_toc() + toc = create_toc(self.body) raw = html.tostring(self.html, encoding='utf-8', doctype='') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) diff --git a/src/calibre/ebooks/docx/toc.py b/src/calibre/ebooks/docx/toc.py new file mode 100644 index 0000000000..8036808701 --- /dev/null +++ b/src/calibre/ebooks/docx/toc.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +from calibre.ebooks.docx.names import XPath, descendants +from calibre.ebooks.metadata.toc import TOC +from calibre.ebooks.oeb.polish.toc import elem_to_toc_text + +class Count(object): + + __slots__ = ('val',) + + def __init__(self): + self.val = 0 + +def create_toc(body): + ' Create a TOC from headings in the document ' + headings = ('h1', 'h2', 'h3') + tocroot = TOC() + xpaths = [XPath('//%s' % x) for x in headings] + level_prev = {i+1:None for i in xrange(len(xpaths))} + level_prev[0] = tocroot + level_item_map = {i+1:frozenset(xp(body)) for i, xp in enumerate(xpaths)} + item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems} + + idcount = Count() + + def ensure_id(elem): + ans = elem.get('id', None) + if not ans: + idcount.val += 1 + ans = 'toc_id_%d' % idcount.val + elem.set('id', ans) + return ans + + for item in descendants(body, *headings): + lvl = plvl = item_level_map.get(item, None) + if lvl is None: + continue + parent = None + while parent is None: + plvl -= 1 + parent = level_prev[plvl] + lvl = plvl + 1 + elem_id = ensure_id(item) + text = elem_to_toc_text(item) + toc = parent.add_item('index.html', elem_id, text) + level_prev[lvl] = toc + for i in xrange(lvl+1, len(xpaths)+1): + level_prev[i] = None + + if len(tuple(tocroot.flat())) > 1: + return tocroot + + +