From d5bf14f1d88b1e3f7cb57bc0a84f1c146636a09d Mon Sep 17 00:00:00 2001 From: John Schember Date: Mon, 15 Jun 2009 20:27:17 -0400 Subject: [PATCH] Fix bug 2587: Use WayneD solution because it's cleaner. --- src/calibre/ebooks/conversion/preprocess.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 8081dce325..816dd54ade 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -21,9 +21,7 @@ _span_pat = re.compile('', re.DOTALL|re.IGNORECASE) def sanitize_head(match): x = match.group(1) x = _span_pat.sub('', x) - x = ('\n%s' % x) if not x.startswith('\n') else x - x += '\n' if not x.endswith('\n') else '' - return '%s' % x + return '\n%s\n' % x def chap_head(match): chap = match.group('chap') @@ -86,7 +84,7 @@ class HTMLPreProcessor(object): PREPROCESS = [ # Some idiotic HTML generators (Frontpage I'm looking at you) # Put all sorts of crap into . This messes up lxml - (re.compile(r']*>(.*?)', re.IGNORECASE|re.DOTALL), + (re.compile(r']*>\n*(.*?)\n*', re.IGNORECASE|re.DOTALL), sanitize_head), # Convert all entities, since lxml doesn't handle them well (re.compile(r'&(\S+?);'), convert_entities),