pdftohtml preprocess rules work

2025-08-30 23:00:21 -04:00 · 2009-04-15 20:11:00 -04:00 · 2009-04-15 20:11:00 -04:00 · 575b021f48
commit 575b021f48
parent aafc6d9764
1 changed files with 1 additions and 1 deletions
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -73,7 +73,7 @@ class HTMLPreProcessor(object):
                  (re.compile(r'<br.*?>'), lambda match : '<p>'),
                  
                  # Un wrap lines
-                  (re.compile(r'(?<=\w)\s*</i>\s*<p.*?>\s*<i>\s*(?=\w)'), lambda match: ' '),
+                  (re.compile(r'(?<=\w)\s*</(i|b|u)>\s*<p.*?>\s*<(i|b|u)>\s*(?=\w)'), lambda match: ' '),
                  (re.compile(r'(?<=\w)\s*<p.*?>\s*(?=\w)', re.UNICODE), lambda match: ' '),
                  # Clean up spaces
                  (re.compile(u'(?<=\.|,|:|;|\?|!|”|"|\')[\s^ ]*(?=<)'), lambda match: ' '),