From 229232f6cf949a4437fbd7c54e3396c36a18ee2c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 14 Nov 2013 13:32:11 +0530
Subject: [PATCH] DOCX Input: Nicer markup for paragraphs containing only a
 page break

DOCX Input: Fix page-breaks created as a page break inside an otherwise
empty paragraph not being rendered on conversion to PDF. Fixes #1249502 [Can't apply page breaks on word to pdf](https://bugs.launchpad.net/calibre/+bug/1249502)
---
 src/calibre/ebooks/docx/cleanup.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)
diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py
index cea90f137f..941893ab4f 100644
--- a/src/calibre/ebooks/docx/cleanup.py
+++ b/src/calibre/ebooks/docx/cleanup.py
@@ -9,6 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import os
 
 from calibre.ebooks.docx.names import XPath
+NBSP = '\xa0'
 
 def mergeable(previous, current):
     if previous.tail or current.tail:
@@ -161,6 +162,17 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover):
     for span in root.xpath('//span[not(@class) and not(@id) and not(@style)]'):
         lift(span)
 
+    # Convert <p><br style="page-break-after:always"> </p> style page breaks
+    # into something the viewer will render as a page break
+    for p in root.xpath('//p[br[@style="page-break-after:always"]]'):
+        if len(p) == 1 and (not p[0].tail or not p[0].tail.strip()):
+            p.remove(p[0])
+            prefix = p.get('style', '')
+            if prefix:
+                prefix += '; '
+            p.set('style', prefix + 'page-break-after:always')
+            p.text = NBSP
+
     if detect_cover:
         # Check if the first image in the document is possibly a cover
         img = root.xpath('//img[@src][1]')