diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 2dc404e586..1cbec251e3 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -130,7 +130,11 @@ class HTMLPreProcessor(object): # Have paragraphs show better (re.compile(r''), lambda match : '

'), # Clean up spaces - (re.compile(u'(?<=[\.,:;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '), + (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '), + # Connect paragraphs split by - + (re.compile(u'(?<=[^\s][-–])[\s]*(

)*[\s]*(

)*\s*(?=[^\s])'), lambda match: ''), + # Remove - that splits words + (re.compile(u'(?<=[^\s])[-–]+(?=[^\s])'), lambda match: ''), # Add space before and after italics (re.compile(u'(?'), lambda match: ' '), (re.compile(r'(?=\w)'), lambda match: ' '),