ToC wizard: Fix generating toc from headings/xpath yielding unexpected results when tags are present out of sequence

This commit is contained in:
Kovid Goyal 2016-08-25 19:56:52 +05:30
parent 7152a09ee2
commit 3347f5b011
2 changed files with 46 additions and 13 deletions

View File

@ -15,7 +15,7 @@ from calibre.ebooks.oeb.polish.create import create_book
from calibre.ebooks.oeb.polish.cover import (
find_cover_image, mark_as_cover, find_cover_page, mark_as_titlepage, clean_opf
)
from calibre.ebooks.oeb.polish.toc import get_toc
from calibre.ebooks.oeb.polish.toc import get_toc, from_xpaths as toc_from_xpaths
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.base import OEB_DOCS
from calibre.ebooks.metadata.book.base import Metadata
@ -95,6 +95,29 @@ class Structure(BaseTest):
self.assertTrue(len(toc))
self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')
def tfx(linear, expected):
items = ['<t{0}>{0}</t{0}>'.format(x) for x in linear]
html = '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">'
html += '<body>%s</body></html>' % '\n'.join(items)
with c.open('nav.html', 'wb') as f:
f.write(html.encode('utf-8'))
toc = toc_from_xpaths(c, ['//h:t'+x for x in sorted(set(linear))])
def p(node):
ans = ''
if node.children:
ans += '['
for c in node.children:
ans += c.title + p(c)
ans += ']'
return ans
self.assertEqual('[%s]'%expected, p(toc))
tfx('121333', '1[2]1[333]')
tfx('1223424', '1[22[3[4]]2[4]]')
tfx('32123', '321[2[3]]')
tfx('123123', '1[2[3]]1[2[3]]')
def test_epub3_covers(self):
# cover image
ce = partial(self.create_epub, ver=3)

View File

@ -85,6 +85,10 @@ class TOC(object):
except ValueError:
return 1
@property
def last_child(self):
return self.children[-1] if self.children else None
def get_lines(self, lvl=0):
frag = ('#'+self.frag) if self.frag else ''
ans = [(u'\t'*lvl) + u'TOC: %s --> %s%s'%(self.title, self.dest, frag)]
@ -315,8 +319,6 @@ def from_xpaths(container, xpaths):
'''
tocroot = TOC()
xpaths = [XPath(xp) for xp in xpaths]
level_prev = {i+1:None for i in xrange(len(xpaths))}
level_prev[0] = tocroot
# Find those levels that have no elements in all spine items
maps = OrderedDict()
@ -336,31 +338,39 @@ def from_xpaths(container, xpaths):
lmap = {i+1:items for i, (l, items) in enumerate(lmap)}
maps[name] = lmap
node_level_map = {tocroot: 0}
def parent_for_level(child_level):
limit = child_level - 1
def process_node(node):
child = node.last_child
if child is None:
return node
lvl = node_level_map[child]
return node if lvl > limit else child if lvl == limit else process_node(child)
return process_node(tocroot)
for name, level_item_map in maps.iteritems():
root = container.parsed(name)
item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems}
item_dirtied = False
for item in root.iterdescendants(etree.Element):
lvl = plvl = item_level_map.get(item, None)
lvl = item_level_map.get(item, None)
if lvl is None:
continue
parent = None
while parent is None:
plvl -= 1
parent = level_prev[plvl]
lvl = plvl + 1
text = elem_to_toc_text(item)
parent = parent_for_level(lvl)
if item_at_top(item):
dirtied, elem_id = False, None
else:
dirtied, elem_id = ensure_id(item)
text = elem_to_toc_text(item)
item_dirtied = dirtied or item_dirtied
toc = parent.add(text, name, elem_id)
node_level_map[toc] = lvl
toc.dest_exists = True
level_prev[lvl] = toc
for i in xrange(lvl+1, len(xpaths)+1):
level_prev[i] = None
if item_dirtied:
container.commit_item(name, keep_parsed=True)