From 229232f6cf949a4437fbd7c54e3396c36a18ee2c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 14 Nov 2013 13:32:11 +0530 Subject: [PATCH] DOCX Input: Nicer markup for paragraphs containing only a page break DOCX Input: Fix page-breaks created as a page break inside an otherwise empty paragraph not being rendered on conversion to PDF. Fixes #1249502 [Can't apply page breaks on word to pdf](https://bugs.launchpad.net/calibre/+bug/1249502) --- src/calibre/ebooks/docx/cleanup.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index cea90f137f..941893ab4f 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -9,6 +9,7 @@ __copyright__ = '2013, Kovid Goyal ' import os from calibre.ebooks.docx.names import XPath +NBSP = '\xa0' def mergeable(previous, current): if previous.tail or current.tail: @@ -161,6 +162,17 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover): for span in root.xpath('//span[not(@class) and not(@id) and not(@style)]'): lift(span) + # Convert


style page breaks + # into something the viewer will render as a page break + for p in root.xpath('//p[br[@style="page-break-after:always"]]'): + if len(p) == 1 and (not p[0].tail or not p[0].tail.strip()): + p.remove(p[0]) + prefix = p.get('style', '') + if prefix: + prefix += '; ' + p.set('style', prefix + 'page-break-after:always') + p.text = NBSP + if detect_cover: # Check if the first image in the document is possibly a cover img = root.xpath('//img[@src][1]')