From 741bf9577e6484c086e637f3186b3166759979e2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 Dec 2013 16:20:32 +0530 Subject: [PATCH] ToC Editor: When auto-geenrating the table of contents from headings or XPath, if an element is at the top of the file, link only to the file instead of to the element. Fixes #1261099 [[Enhancement]ToC Editor senses when link is to top of file](https://bugs.launchpad.net/calibre/+bug/1261099) --- .../ebooks/conversion/plugins/epub_output.py | 20 ++------------ src/calibre/ebooks/oeb/polish/toc.py | 27 ++++++++++++++++++- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/calibre/ebooks/conversion/plugins/epub_output.py b/src/calibre/ebooks/conversion/plugins/epub_output.py index 90af27d4d2..bb59ee71dc 100644 --- a/src/calibre/ebooks/conversion/plugins/epub_output.py +++ b/src/calibre/ebooks/conversion/plugins/epub_output.py @@ -484,31 +484,15 @@ class EPUBOutput(OutputFormatPlugin): Perform toc link transforms to alleviate slow loading. ''' from calibre.ebooks.oeb.base import urldefrag, XPath + from calibre.ebooks.oeb.polish.toc import item_at_top def frag_is_at_top(root, frag): - body = XPath('//h:body')(root) - if body: - body = body[0] - else: - return False - tree = body.getroottree() elem = XPath('//*[@id="%s" or @name="%s"]'%(frag, frag))(root) if elem: elem = elem[0] else: return False - path = tree.getpath(elem) - for el in body.iterdescendants(): - epath = tree.getpath(el) - if epath == path: - break - if el.text and el.text.strip(): - return False - if not path.startswith(epath): - # Only check tail of non-parent elements - if el.tail and el.tail.strip(): - return False - return True + return item_at_top(elem) def simplify_toc_entry(toc): if toc.href: diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 7f119b2556..cb63a82afc 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -211,6 +211,28 @@ def elem_to_toc_text(elem): text = _('(Untitled)') return text +def item_at_top(elem): + try: + body = XPath('//h:body')(elem.getroottree().getroot())[0] + except (TypeError, IndexError, KeyError, AttributeError): + return False + tree = body.getroottree() + path = tree.getpath(elem) + for el in body.iterdescendants(etree.Element): + epath = tree.getpath(el) + if epath == path: + break + try: + if el.tag.endswith('}img') or (el.text and el.text.strip()): + return False + except: + return False + if not path.startswith(epath): + # Only check tail of non-parent elements + if el.tail and el.tail.strip(): + return False + return True + def from_xpaths(container, xpaths): tocroot = TOC() xpaths = [XPath(xp) for xp in xpaths] @@ -249,7 +271,10 @@ def from_xpaths(container, xpaths): plvl -= 1 parent = level_prev[plvl] lvl = plvl + 1 - dirtied, elem_id = ensure_id(item) + if item_at_top(item): + dirtied, elem_id = False, None + else: + dirtied, elem_id = ensure_id(item) text = elem_to_toc_text(item) item_dirtied = dirtied or item_dirtied toc = parent.add(text, name, elem_id)