mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Make parsing of HTML ToC in KF8 more robust
This commit is contained in:
parent
3b72f160e0
commit
fa196cf1af
@ -446,6 +446,7 @@ class Mobi8Reader(object):
|
|||||||
current_depth = None
|
current_depth = None
|
||||||
parent = ans
|
parent = ans
|
||||||
seen = set()
|
seen = set()
|
||||||
|
links = []
|
||||||
for elem in root.iterdescendants(etree.Element):
|
for elem in root.iterdescendants(etree.Element):
|
||||||
if reached and elem.tag == XHTML('a') and elem.get('href',
|
if reached and elem.tag == XHTML('a') and elem.get('href',
|
||||||
False):
|
False):
|
||||||
@ -453,24 +454,32 @@ class Mobi8Reader(object):
|
|||||||
href, frag = urldefrag(href)
|
href, frag = urldefrag(href)
|
||||||
href = base_href + '/' + href
|
href = base_href + '/' + href
|
||||||
text = xml2text(elem).strip()
|
text = xml2text(elem).strip()
|
||||||
if text in seen:
|
if (text, href, frag) in seen:
|
||||||
continue
|
continue
|
||||||
seen.add(text)
|
seen.add((text, href, frag))
|
||||||
depth = node_depth(elem)
|
links.append((text, href, frag, node_depth(elem)))
|
||||||
if current_depth is None:
|
elif elem is start:
|
||||||
current_depth = depth
|
reached = True
|
||||||
if current_depth == depth:
|
|
||||||
parent.add_item(href, frag, text)
|
depths = sorted(set(x[-1] for x in links))
|
||||||
elif current_depth < depth:
|
depth_map = {x:i for i, x in enumerate(depths)}
|
||||||
parent = parent[-1]
|
for text, href, frag, depth in links:
|
||||||
parent.add_item(href, frag, text)
|
depth = depth_map[depth]
|
||||||
current_depth = depth
|
if current_depth is None:
|
||||||
else:
|
current_depth = 0
|
||||||
parent = parent.parent
|
parent.add_item(href, frag, text)
|
||||||
parent.add_item(href, frag, text)
|
elif current_depth == depth:
|
||||||
current_depth = depth
|
parent.add_item(href, frag, text)
|
||||||
|
elif current_depth < depth:
|
||||||
|
parent = parent[-1] if len(parent) > 0 else parent
|
||||||
|
parent.add_item(href, frag, text)
|
||||||
|
current_depth += 1
|
||||||
else:
|
else:
|
||||||
if elem is start:
|
delta = current_depth - depth
|
||||||
reached = True
|
while delta > 0 and parent.parent is not None:
|
||||||
|
parent = parent.parent
|
||||||
|
delta -= 1
|
||||||
|
parent.add_item(href, frag, text)
|
||||||
|
current_depth = depth
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user