DOCX Input: Line breaks at the end of a paragraph

DOCX Input: Fix a single line break at the end of a paragraph not being
rendered as a blank line.
This commit is contained in:
Kovid Goyal 2013-09-08 12:40:52 +05:30
parent 7a4f87b992
commit 604530e569

View File

@ -8,8 +8,12 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from lxml.html.builder import P
from calibre.ebooks.docx.names import XPath
NBSP = '\xa0'
def mergeable(previous, current):
if previous.tail or current.tail:
return False
@ -161,6 +165,16 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover):
for span in root.xpath('//span[not(@class) and not(@id)]'):
lift(span)
# If a paragraph ends with a <br>, that <br> is not rendered in HTML, but
# it is in Word, so move it out
for br in root.xpath('//p/node()[position()=last()]/self::br'):
if not br.tail:
p = br.getparent()
p.remove(br)
gp = p.getparent()
blank = P(NBSP)
gp.insert(gp.index(p)+1, blank)
if detect_cover:
# Check if the first image in the document is possibly a cover
img = root.xpath('//img[@src][1]')