mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
ToC wizard: Fix generating toc from headings/xpath yielding unexpected results when tags are present out of sequence
This commit is contained in:
parent
7152a09ee2
commit
3347f5b011
@ -15,7 +15,7 @@ from calibre.ebooks.oeb.polish.create import create_book
|
|||||||
from calibre.ebooks.oeb.polish.cover import (
|
from calibre.ebooks.oeb.polish.cover import (
|
||||||
find_cover_image, mark_as_cover, find_cover_page, mark_as_titlepage, clean_opf
|
find_cover_image, mark_as_cover, find_cover_page, mark_as_titlepage, clean_opf
|
||||||
)
|
)
|
||||||
from calibre.ebooks.oeb.polish.toc import get_toc
|
from calibre.ebooks.oeb.polish.toc import get_toc, from_xpaths as toc_from_xpaths
|
||||||
from calibre.ebooks.oeb.polish.utils import guess_type
|
from calibre.ebooks.oeb.polish.utils import guess_type
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
@ -95,6 +95,29 @@ class Structure(BaseTest):
|
|||||||
self.assertTrue(len(toc))
|
self.assertTrue(len(toc))
|
||||||
self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')
|
self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')
|
||||||
|
|
||||||
|
def tfx(linear, expected):
|
||||||
|
items = ['<t{0}>{0}</t{0}>'.format(x) for x in linear]
|
||||||
|
html = '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">'
|
||||||
|
html += '<body>%s</body></html>' % '\n'.join(items)
|
||||||
|
with c.open('nav.html', 'wb') as f:
|
||||||
|
f.write(html.encode('utf-8'))
|
||||||
|
toc = toc_from_xpaths(c, ['//h:t'+x for x in sorted(set(linear))])
|
||||||
|
|
||||||
|
def p(node):
|
||||||
|
ans = ''
|
||||||
|
if node.children:
|
||||||
|
ans += '['
|
||||||
|
for c in node.children:
|
||||||
|
ans += c.title + p(c)
|
||||||
|
ans += ']'
|
||||||
|
return ans
|
||||||
|
self.assertEqual('[%s]'%expected, p(toc))
|
||||||
|
|
||||||
|
tfx('121333', '1[2]1[333]')
|
||||||
|
tfx('1223424', '1[22[3[4]]2[4]]')
|
||||||
|
tfx('32123', '321[2[3]]')
|
||||||
|
tfx('123123', '1[2[3]]1[2[3]]')
|
||||||
|
|
||||||
def test_epub3_covers(self):
|
def test_epub3_covers(self):
|
||||||
# cover image
|
# cover image
|
||||||
ce = partial(self.create_epub, ver=3)
|
ce = partial(self.create_epub, ver=3)
|
||||||
|
@ -85,6 +85,10 @@ class TOC(object):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def last_child(self):
|
||||||
|
return self.children[-1] if self.children else None
|
||||||
|
|
||||||
def get_lines(self, lvl=0):
|
def get_lines(self, lvl=0):
|
||||||
frag = ('#'+self.frag) if self.frag else ''
|
frag = ('#'+self.frag) if self.frag else ''
|
||||||
ans = [(u'\t'*lvl) + u'TOC: %s --> %s%s'%(self.title, self.dest, frag)]
|
ans = [(u'\t'*lvl) + u'TOC: %s --> %s%s'%(self.title, self.dest, frag)]
|
||||||
@ -315,8 +319,6 @@ def from_xpaths(container, xpaths):
|
|||||||
'''
|
'''
|
||||||
tocroot = TOC()
|
tocroot = TOC()
|
||||||
xpaths = [XPath(xp) for xp in xpaths]
|
xpaths = [XPath(xp) for xp in xpaths]
|
||||||
level_prev = {i+1:None for i in xrange(len(xpaths))}
|
|
||||||
level_prev[0] = tocroot
|
|
||||||
|
|
||||||
# Find those levels that have no elements in all spine items
|
# Find those levels that have no elements in all spine items
|
||||||
maps = OrderedDict()
|
maps = OrderedDict()
|
||||||
@ -336,31 +338,39 @@ def from_xpaths(container, xpaths):
|
|||||||
lmap = {i+1:items for i, (l, items) in enumerate(lmap)}
|
lmap = {i+1:items for i, (l, items) in enumerate(lmap)}
|
||||||
maps[name] = lmap
|
maps[name] = lmap
|
||||||
|
|
||||||
|
node_level_map = {tocroot: 0}
|
||||||
|
|
||||||
|
def parent_for_level(child_level):
|
||||||
|
limit = child_level - 1
|
||||||
|
|
||||||
|
def process_node(node):
|
||||||
|
child = node.last_child
|
||||||
|
if child is None:
|
||||||
|
return node
|
||||||
|
lvl = node_level_map[child]
|
||||||
|
return node if lvl > limit else child if lvl == limit else process_node(child)
|
||||||
|
|
||||||
|
return process_node(tocroot)
|
||||||
|
|
||||||
for name, level_item_map in maps.iteritems():
|
for name, level_item_map in maps.iteritems():
|
||||||
root = container.parsed(name)
|
root = container.parsed(name)
|
||||||
item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems}
|
item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems}
|
||||||
item_dirtied = False
|
item_dirtied = False
|
||||||
|
|
||||||
for item in root.iterdescendants(etree.Element):
|
for item in root.iterdescendants(etree.Element):
|
||||||
lvl = plvl = item_level_map.get(item, None)
|
lvl = item_level_map.get(item, None)
|
||||||
if lvl is None:
|
if lvl is None:
|
||||||
continue
|
continue
|
||||||
parent = None
|
text = elem_to_toc_text(item)
|
||||||
while parent is None:
|
parent = parent_for_level(lvl)
|
||||||
plvl -= 1
|
|
||||||
parent = level_prev[plvl]
|
|
||||||
lvl = plvl + 1
|
|
||||||
if item_at_top(item):
|
if item_at_top(item):
|
||||||
dirtied, elem_id = False, None
|
dirtied, elem_id = False, None
|
||||||
else:
|
else:
|
||||||
dirtied, elem_id = ensure_id(item)
|
dirtied, elem_id = ensure_id(item)
|
||||||
text = elem_to_toc_text(item)
|
|
||||||
item_dirtied = dirtied or item_dirtied
|
item_dirtied = dirtied or item_dirtied
|
||||||
toc = parent.add(text, name, elem_id)
|
toc = parent.add(text, name, elem_id)
|
||||||
|
node_level_map[toc] = lvl
|
||||||
toc.dest_exists = True
|
toc.dest_exists = True
|
||||||
level_prev[lvl] = toc
|
|
||||||
for i in xrange(lvl+1, len(xpaths)+1):
|
|
||||||
level_prev[i] = None
|
|
||||||
|
|
||||||
if item_dirtied:
|
if item_dirtied:
|
||||||
container.commit_item(name, keep_parsed=True)
|
container.commit_item(name, keep_parsed=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user