diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py
index 322e44ba9a..5507a20772 100644
--- a/src/calibre/ebooks/docx/cleanup.py
+++ b/src/calibre/ebooks/docx/cleanup.py
@@ -113,11 +113,19 @@ def wrap_contents(tag_name, elem):
elem.append(wrapper)
-def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath):
+def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath, uuid):
# Apply vertical-align
for span in root.xpath('//span[@data-docx-vert]'):
wrap_contents(span.attrib.pop('data-docx-vert'), span)
+ for span in root.xpath(f'//*[@data-noteref-container="{uuid}"]'):
+ span.attrib.pop('data-noteref-container')
+ parent = span.getparent()
+ idx = parent.index(span)
+ if idx + 1 < len(parent) and (ns := parent[idx+1]) and hasattr(ns, 'get') and ns.get('data-noteref-container'):
+ if len(span) and not span[-1].tail:
+ span[-1].tail = '\xa0'
+
# Move
s outside paragraphs, if possible.
pancestor = XPath('|'.join('ancestor::%s[1]' % x for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6')))
for hr in root.xpath('//span/hr'):
diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index a41daae32e..54b9578573 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -75,6 +75,7 @@ class Convert:
self.dest_dir = dest_dir or os.getcwd()
self.mi = self.docx.metadata
self.body = BODY()
+ self.uuid = uuid.uuid4().hex
self.theme = Theme(self.namespace)
self.settings = Settings(self.namespace)
self.tables = Tables(self.namespace)
@@ -241,7 +242,7 @@ class Convert:
self.fields.polish_markup(self.object_map)
self.log.debug('Cleaning up redundant markup generated by Word')
- self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath)
+ self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath, self.uuid)
return self.write(doc)
@@ -713,6 +714,7 @@ class Convert:
l.set('role', 'doc-noteref')
text.add_elem(l)
ans.append(text.elem)
+ ans.set('data-noteref-container', self.uuid)
elif self.namespace.is_tag(child, 'w:tab'):
spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6))
text.add_elem(SPAN(NBSP * spaces))