From 9d8ad144ffee0a713ea7b7dc125a94f3898111ba Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Dec 2019 07:45:31 +0530 Subject: [PATCH] DOCX Input: Fix incorrect font sizes for footnote references in paragraphs that have text with multiple font sizes. Fixes #1855403 [Private bug](https://bugs.launchpad.net/calibre/+bug/1855403) --- src/calibre/ebooks/docx/cleanup.py | 13 +++++++++++++ src/calibre/ebooks/docx/to_html.py | 11 +++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index 0ff083c40c..8f79458f4c 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -104,7 +104,20 @@ def before_count(root, tag, limit=10): return limit +def wrap_contents(tag_name, elem): + wrapper = elem.makeelement(tag_name) + wrapper.text, elem.text = elem.text, '' + for child in elem: + elem.remove(child) + wrapper.append(child) + elem.append(wrapper) + + def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath): + # Apply vertical-align + for span in root.xpath('//span[@data-docx-vert]'): + wrap_contents(span.attrib.pop('data-docx-vert'), span) + # Move
s outside paragraphs, if possible. pancestor = XPath('|'.join('ancestor::%s[1]' % x for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) for hr in root.xpath('//span/hr'): diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 59f40d7745..081095f2ad 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -10,7 +10,7 @@ from collections import OrderedDict, defaultdict from lxml import html from lxml.html.builder import ( - HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, SUP, A, DT, DL, DD, H1) + HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, A, DT, DL, DD, H1) from calibre import guess_type from calibre.ebooks.docx.container import DOCX, fromstring @@ -684,7 +684,7 @@ class Convert(object): elif self.namespace.is_tag(child, 'w:footnoteReference') or self.namespace.is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: - l = A(SUP(name, id='back_%s' % anchor), href='#' + anchor, title=name) + l = A(name, id='back_%s' % anchor, href='#' + anchor, title=name) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) @@ -703,12 +703,7 @@ class Convert(object): style = self.styles.resolve_run(run) if style.vert_align in {'superscript', 'subscript'}: if ans.text or len(ans): - ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup' - try: - if ans[0].tag == 'a' and ans[0].get('class', 'noteref') and ans[0][0].tag == 'sup' and ans.tag == 'sup': - ans[0][0].tag = 'span' - except Exception: - pass + ans.set('data-docx-vert', 'sup' if style.vert_align == 'superscript' else 'sub') if style.lang is not inherit: lang = html_lang(style.lang) if lang is not None and lang != self.doc_lang: