diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index 33f869faca..60db64b621 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -8,8 +8,12 @@ __copyright__ = '2013, Kovid Goyal ' import os +from lxml.html.builder import P + from calibre.ebooks.docx.names import XPath +NBSP = '\xa0' + def mergeable(previous, current): if previous.tail or current.tail: return False @@ -161,6 +165,16 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover): for span in root.xpath('//span[not(@class) and not(@id)]'): lift(span) + # If a paragraph ends with a
, that
is not rendered in HTML, but + # it is in Word, so move it out + for br in root.xpath('//p/node()[position()=last()]/self::br'): + if not br.tail: + p = br.getparent() + p.remove(br) + gp = p.getparent() + blank = P(NBSP) + gp.insert(gp.index(p)+1, blank) + if detect_cover: # Check if the first image in the document is possibly a cover img = root.xpath('//img[@src][1]')