diff --git a/src/calibre/ebooks/oeb/polish/tests/structure.py b/src/calibre/ebooks/oeb/polish/tests/structure.py index c26b237efd..59ddf97ab6 100644 --- a/src/calibre/ebooks/oeb/polish/tests/structure.py +++ b/src/calibre/ebooks/oeb/polish/tests/structure.py @@ -15,7 +15,7 @@ from calibre.ebooks.oeb.polish.create import create_book from calibre.ebooks.oeb.polish.cover import ( find_cover_image, mark_as_cover, find_cover_page, mark_as_titlepage, clean_opf ) -from calibre.ebooks.oeb.polish.toc import get_toc +from calibre.ebooks.oeb.polish.toc import get_toc, from_xpaths as toc_from_xpaths from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.base import OEB_DOCS from calibre.ebooks.metadata.book.base import Metadata @@ -95,6 +95,29 @@ class Structure(BaseTest): self.assertTrue(len(toc)) self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav') + def tfx(linear, expected): + items = ['{0}'.format(x) for x in linear] + html = '' + html += '%s' % '\n'.join(items) + with c.open('nav.html', 'wb') as f: + f.write(html.encode('utf-8')) + toc = toc_from_xpaths(c, ['//h:t'+x for x in sorted(set(linear))]) + + def p(node): + ans = '' + if node.children: + ans += '[' + for c in node.children: + ans += c.title + p(c) + ans += ']' + return ans + self.assertEqual('[%s]'%expected, p(toc)) + + tfx('121333', '1[2]1[333]') + tfx('1223424', '1[22[3[4]]2[4]]') + tfx('32123', '321[2[3]]') + tfx('123123', '1[2[3]]1[2[3]]') + def test_epub3_covers(self): # cover image ce = partial(self.create_epub, ver=3) diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 1b6c216a67..6abdab6669 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -85,6 +85,10 @@ class TOC(object): except ValueError: return 1 + @property + def last_child(self): + return self.children[-1] if self.children else None + def get_lines(self, lvl=0): frag = ('#'+self.frag) if self.frag else '' ans = [(u'\t'*lvl) + u'TOC: %s --> %s%s'%(self.title, self.dest, frag)] @@ -315,8 +319,6 @@ def from_xpaths(container, xpaths): ''' tocroot = TOC() xpaths = [XPath(xp) for xp in xpaths] - level_prev = {i+1:None for i in xrange(len(xpaths))} - level_prev[0] = tocroot # Find those levels that have no elements in all spine items maps = OrderedDict() @@ -336,31 +338,39 @@ def from_xpaths(container, xpaths): lmap = {i+1:items for i, (l, items) in enumerate(lmap)} maps[name] = lmap + node_level_map = {tocroot: 0} + + def parent_for_level(child_level): + limit = child_level - 1 + + def process_node(node): + child = node.last_child + if child is None: + return node + lvl = node_level_map[child] + return node if lvl > limit else child if lvl == limit else process_node(child) + + return process_node(tocroot) + for name, level_item_map in maps.iteritems(): root = container.parsed(name) item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems} item_dirtied = False for item in root.iterdescendants(etree.Element): - lvl = plvl = item_level_map.get(item, None) + lvl = item_level_map.get(item, None) if lvl is None: continue - parent = None - while parent is None: - plvl -= 1 - parent = level_prev[plvl] - lvl = plvl + 1 + text = elem_to_toc_text(item) + parent = parent_for_level(lvl) if item_at_top(item): dirtied, elem_id = False, None else: dirtied, elem_id = ensure_id(item) - text = elem_to_toc_text(item) item_dirtied = dirtied or item_dirtied toc = parent.add(text, name, elem_id) + node_level_map[toc] = lvl toc.dest_exists = True - level_prev[lvl] = toc - for i in xrange(lvl+1, len(xpaths)+1): - level_prev[i] = None if item_dirtied: container.commit_item(name, keep_parsed=True)