PDF preprocessing rule additions.

2025-07-07 18:24:30 -04:00 · 2009-06-14 20:33:55 -04:00 · 2009-06-14 20:33:55 -04:00 · ab9c4b39f0
commit ab9c4b39f0
parent 0c3e217e41
1 changed files with 5 additions and 1 deletions
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -130,7 +130,11 @@ class HTMLPreProcessor(object):
                  # Have paragraphs show better
                  (re.compile(r'<br.*?>'), lambda match : '<p>'),
                  # Clean up spaces
-                  (re.compile(u'(?<=[\.,:;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
+                  (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
                  # Connect paragraphs split by -
                  (re.compile(u'(?<=[^\s][-–])[\s]*(</p>)*[\s]*(<p>)*\s*(?=[^\s])'), lambda match: ''),
                  # Remove - that splits words
                  (re.compile(u'(?<=[^\s])[-–]+(?=[^\s])'), lambda match: ''),
                  # Add space before and after italics
                  (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'),
                  (re.compile(r'</i>(?=\w)'), lambda match: '</i> '),