From 575b021f48ea9cab351648999bc69737ea2aafa0 Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 15 Apr 2009 20:11:00 -0400 Subject: [PATCH] pdftohtml preprocess rules work --- src/calibre/ebooks/conversion/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 6b58d2d18d..632a7a3291 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -73,7 +73,7 @@ class HTMLPreProcessor(object): (re.compile(r''), lambda match : '

'), # Un wrap lines - (re.compile(r'(?<=\w)\s*\s*\s*\s*(?=\w)'), lambda match: ' '), + (re.compile(r'(?<=\w)\s*\s*\s*<(i|b|u)>\s*(?=\w)'), lambda match: ' '), (re.compile(r'(?<=\w)\s*\s*(?=\w)', re.UNICODE), lambda match: ' '), # Clean up spaces (re.compile(u'(?<=\.|,|:|;|\?|!|”|"|\')[\s^ ]*(?=<)'), lambda match: ' '),