DOCX Input: When some text has multiple footnotes insert a space between the consecutive foot note numbers so that they are distinct. Fixes #2089433 [Separate several footnote/endnote references](https://bugs.launchpad.net/calibre/+bug/2089433)

This commit is contained in:
Kovid Goyal 2024-11-24 13:24:46 +05:30
parent 90b33c9648
commit 2dacaf7da2
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 12 additions and 2 deletions

View File

@ -113,11 +113,19 @@ def wrap_contents(tag_name, elem):
elem.append(wrapper)
def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath):
def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath, uuid):
# Apply vertical-align
for span in root.xpath('//span[@data-docx-vert]'):
wrap_contents(span.attrib.pop('data-docx-vert'), span)
for span in root.xpath(f'//*[@data-noteref-container="{uuid}"]'):
span.attrib.pop('data-noteref-container')
parent = span.getparent()
idx = parent.index(span)
if idx + 1 < len(parent) and (ns := parent[idx+1]) and hasattr(ns, 'get') and ns.get('data-noteref-container'):
if len(span) and not span[-1].tail:
span[-1].tail = '\xa0'
# Move <hr>s outside paragraphs, if possible.
pancestor = XPath('|'.join('ancestor::%s[1]' % x for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6')))
for hr in root.xpath('//span/hr'):

View File

@ -75,6 +75,7 @@ class Convert:
self.dest_dir = dest_dir or os.getcwd()
self.mi = self.docx.metadata
self.body = BODY()
self.uuid = uuid.uuid4().hex
self.theme = Theme(self.namespace)
self.settings = Settings(self.namespace)
self.tables = Tables(self.namespace)
@ -241,7 +242,7 @@ class Convert:
self.fields.polish_markup(self.object_map)
self.log.debug('Cleaning up redundant markup generated by Word')
self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath)
self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath, self.uuid)
return self.write(doc)
@ -713,6 +714,7 @@ class Convert:
l.set('role', 'doc-noteref')
text.add_elem(l)
ans.append(text.elem)
ans.set('data-noteref-container', self.uuid)
elif self.namespace.is_tag(child, 'w:tab'):
spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6))
text.add_elem(SPAN(NBSP * spaces))