mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: When autodetecting the Table of Contents from headings, work even if the headings use numbering. Fixes #1834661 [Private bug](https://bugs.launchpad.net/calibre/+bug/1834661)
This commit is contained in:
parent
ce01b4c571
commit
0d9047cfa3
@ -497,6 +497,7 @@ class Convert(object):
|
|||||||
if m is not None:
|
if m is not None:
|
||||||
n = min(6, max(1, int(m.group(1))))
|
n = min(6, max(1, int(m.group(1))))
|
||||||
dest.tag = 'h%d' % n
|
dest.tag = 'h%d' % n
|
||||||
|
dest.set('data-heading-level', unicode_type(n))
|
||||||
|
|
||||||
if style.bidi is True:
|
if style.bidi is True:
|
||||||
dest.set('dir', 'rtl')
|
dest.set('dir', 'rtl')
|
||||||
|
@ -15,15 +15,15 @@ from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
|
|||||||
from polyglot.builtins import iteritems, range
|
from polyglot.builtins import iteritems, range
|
||||||
|
|
||||||
|
|
||||||
def from_headings(body, log, namespace):
|
def from_headings(body, log, namespace, num_levels=3):
|
||||||
' Create a TOC from headings in the document '
|
' Create a TOC from headings in the document '
|
||||||
XPath, descendants = namespace.XPath, namespace.descendants
|
|
||||||
headings = ('h1', 'h2', 'h3')
|
|
||||||
tocroot = TOC()
|
tocroot = TOC()
|
||||||
xpaths = [XPath('//%s' % x) for x in headings]
|
all_heading_nodes = body.xpath('//*[@data-heading-level]')
|
||||||
level_prev = {i+1:None for i in range(len(xpaths))}
|
level_prev = {i+1:None for i in range(num_levels)}
|
||||||
level_prev[0] = tocroot
|
level_prev[0] = tocroot
|
||||||
level_item_map = {i+1:frozenset(xp(body)) for i, xp in enumerate(xpaths)}
|
level_item_map = {i:frozenset(
|
||||||
|
x for x in all_heading_nodes if int(x.get('data-heading-level')) == i)
|
||||||
|
for i in range(1, num_levels+1)}
|
||||||
item_level_map = {e:i for i, elems in iteritems(level_item_map) for e in elems}
|
item_level_map = {e:i for i, elems in iteritems(level_item_map) for e in elems}
|
||||||
|
|
||||||
idcount = count()
|
idcount = count()
|
||||||
@ -35,7 +35,7 @@ def from_headings(body, log, namespace):
|
|||||||
elem.set('id', ans)
|
elem.set('id', ans)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
for item in descendants(body, *headings):
|
for item in all_heading_nodes:
|
||||||
lvl = plvl = item_level_map.get(item, None)
|
lvl = plvl = item_level_map.get(item, None)
|
||||||
if lvl is None:
|
if lvl is None:
|
||||||
continue
|
continue
|
||||||
@ -48,7 +48,7 @@ def from_headings(body, log, namespace):
|
|||||||
text = elem_to_toc_text(item)
|
text = elem_to_toc_text(item)
|
||||||
toc = parent.add_item('index.html', elem_id, text)
|
toc = parent.add_item('index.html', elem_id, text)
|
||||||
level_prev[lvl] = toc
|
level_prev[lvl] = toc
|
||||||
for i in range(lvl+1, len(xpaths)+1):
|
for i in range(lvl+1, num_levels+1):
|
||||||
level_prev[i] = None
|
level_prev[i] = None
|
||||||
|
|
||||||
if len(tuple(tocroot.flat())) > 1:
|
if len(tuple(tocroot.flat())) > 1:
|
||||||
@ -136,4 +136,8 @@ def from_toc(docx, link_map, styles, object_map, log, namespace):
|
|||||||
|
|
||||||
|
|
||||||
def create_toc(docx, body, link_map, styles, object_map, log, namespace):
|
def create_toc(docx, body, link_map, styles, object_map, log, namespace):
|
||||||
return from_toc(docx, link_map, styles, object_map, log, namespace) or from_headings(body, log, namespace)
|
ans = from_toc(docx, link_map, styles, object_map, log, namespace) or from_headings(body, log, namespace)
|
||||||
|
# Remove heading level attributes
|
||||||
|
for h in body.xpath('//*[@data-heading-level]'):
|
||||||
|
del h.attrib['data-heading-level']
|
||||||
|
return ans
|
||||||
|
Loading…
x
Reference in New Issue
Block a user