diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index 322e44ba9a..5507a20772 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -113,11 +113,19 @@ def wrap_contents(tag_name, elem): elem.append(wrapper) -def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath): +def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath, uuid): # Apply vertical-align for span in root.xpath('//span[@data-docx-vert]'): wrap_contents(span.attrib.pop('data-docx-vert'), span) + for span in root.xpath(f'//*[@data-noteref-container="{uuid}"]'): + span.attrib.pop('data-noteref-container') + parent = span.getparent() + idx = parent.index(span) + if idx + 1 < len(parent) and (ns := parent[idx+1]) and hasattr(ns, 'get') and ns.get('data-noteref-container'): + if len(span) and not span[-1].tail: + span[-1].tail = '\xa0' + # Move
s outside paragraphs, if possible. pancestor = XPath('|'.join('ancestor::%s[1]' % x for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) for hr in root.xpath('//span/hr'): diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index a41daae32e..54b9578573 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -75,6 +75,7 @@ class Convert: self.dest_dir = dest_dir or os.getcwd() self.mi = self.docx.metadata self.body = BODY() + self.uuid = uuid.uuid4().hex self.theme = Theme(self.namespace) self.settings = Settings(self.namespace) self.tables = Tables(self.namespace) @@ -241,7 +242,7 @@ class Convert: self.fields.polish_markup(self.object_map) self.log.debug('Cleaning up redundant markup generated by Word') - self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath) + self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath, self.uuid) return self.write(doc) @@ -713,6 +714,7 @@ class Convert: l.set('role', 'doc-noteref') text.add_elem(l) ans.append(text.elem) + ans.set('data-noteref-container', self.uuid) elif self.namespace.is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces))