diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index c96f1260e0..ed95aa6c2f 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -394,6 +394,9 @@ class Styles(object): dl.notes dt a { text-decoration: none } dl.notes dd { page-break-after: always } + + dl.notes dd:last-of-type { page-break-after: avoid } + ''') % (self.body_font_family, self.body_font_size) if ef: prefix = ef + '\n' + prefix diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 3a9cd51580..a46687d6c3 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -23,6 +23,8 @@ from calibre.ebooks.docx.fonts import Fonts from calibre.ebooks.docx.images import Images from calibre.ebooks.docx.footnotes import Footnotes from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.metadata.toc import TOC +from calibre.ebooks.oeb.polish.toc import elem_to_toc_text from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 class Text: @@ -227,7 +229,48 @@ class Convert(object): self.styles.resolve_numbering(numbering) + def create_toc(self): + ' Create a TOC from headings in the document ' + root = self.body + headings = ('h1', 'h2', 'h3') + tocroot = TOC() + xpaths = [XPath('//%s' % x) for x in headings] + level_prev = {i+1:None for i in xrange(len(xpaths))} + level_prev[0] = tocroot + level_item_map = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)} + item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems} + + self.idcount = 0 + + def ensure_id(elem): + ans = elem.get('id', None) + if not ans: + self.idcount += 1 + ans = 'toc_id_%d' % self.idcount + elem.set('id', ans) + return ans + + for item in root.iterdescendants(*headings): + lvl = plvl = item_level_map.get(item, None) + if lvl is None: + continue + parent = None + while parent is None: + plvl -= 1 + parent = level_prev[plvl] + lvl = plvl + 1 + elem_id = ensure_id(item) + text = elem_to_toc_text(item) + toc = parent.add_item('index.html', elem_id, text) + level_prev[lvl] = toc + for i in xrange(lvl+1, len(xpaths)+1): + level_prev[i] = None + + if len(tuple(tocroot.flat())) > 1: + return tocroot + def write(self): + toc = self.create_toc() raw = html.tostring(self.html, encoding='utf-8', doctype='') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) @@ -237,6 +280,7 @@ class Convert(object): f.write(css.encode('utf-8')) opf = OPFCreator(self.dest_dir, self.mi) + opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) opf.create_spine(['index.html']) with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx: @@ -276,7 +320,7 @@ class Convert(object): m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE) if m is not None: - n = min(1, max(6, int(m.group(1)))) + n = min(6, max(1, int(m.group(1)))) dest.tag = 'h%d' % n if style.direction == 'rtl':