diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 885d0621e0..751d4f8cd6 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -303,6 +303,9 @@ class CSSPreProcessor(object): class HTMLPreProcessor(object): PREPROCESS = [ + # Remove huge block of contiguous spaces as they slow down + # the following regexes pretty badly + (re.compile(r'\s{10000,}'), lambda m: ''), # Some idiotic HTML generators (Frontpage I'm looking at you) # Put all sorts of crap into . This messes up lxml (re.compile(r']*>\n*(.*?)\n*', re.IGNORECASE|re.DOTALL),