Nicer fix for trailing <br>

Also handles headings and list items with trailing <br>s
2025-07-09 03:04:10 -04:00 · 2013-09-08 13:11:22 +05:30 · 2013-09-08 13:11:22 +05:30 · e9b531584f
commit e9b531584f
parent 604530e569
1 changed files with 3 additions and 9 deletions
--- a/src/calibre/ebooks/docx/cleanup.py
+++ b/src/calibre/ebooks/docx/cleanup.py
@ -8,8 +8,6 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import os
 from lxml.html.builder import P
 from calibre.ebooks.docx.names import XPath
 NBSP = '\xa0'
@ -166,14 +164,10 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover):
        lift(span)
    # If a paragraph ends with a <br>, that <br> is not rendered in HTML, but
-    # it is in Word, so move it out
+    # it is in Word, so add a trailing space to ensure it is rendered.
-    for br in root.xpath('//p/node()[position()=last()]/self::br'):
+    for br in root.xpath('//*[contains("p,h1,h2,h3,h4,h5,h6,li", name())]/node()[position()=last()]/self::br'):
        if not br.tail:
-            p = br.getparent()
+            br.tail = NBSP
            p.remove(br)
            gp = p.getparent()
            blank = P(NBSP)
            gp.insert(gp.index(p)+1, blank)
    if detect_cover:
        # Check if the first image in the document is possibly a cover