From 604530e569efb3a7899f626fd6fb630630486757 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Sep 2013 12:40:52 +0530 Subject: [PATCH] DOCX Input: Line breaks at the end of a paragraph DOCX Input: Fix a single line break at the end of a paragraph not being rendered as a blank line. --- src/calibre/ebooks/docx/cleanup.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index 33f869faca..60db64b621 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -8,8 +8,12 @@ __copyright__ = '2013, Kovid Goyal ' import os +from lxml.html.builder import P + from calibre.ebooks.docx.names import XPath +NBSP = '\xa0' + def mergeable(previous, current): if previous.tail or current.tail: return False @@ -161,6 +165,16 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover): for span in root.xpath('//span[not(@class) and not(@id)]'): lift(span) + # If a paragraph ends with a
, that
is not rendered in HTML, but + # it is in Word, so move it out + for br in root.xpath('//p/node()[position()=last()]/self::br'): + if not br.tail: + p = br.getparent() + p.remove(br) + gp = p.getparent() + blank = P(NBSP) + gp.insert(gp.index(p)+1, blank) + if detect_cover: # Check if the first image in the document is possibly a cover img = root.xpath('//img[@src][1]')