From 70b99872c670aeb11f2cfa11ed8ff82a79f129c2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 20 Feb 2009 13:07:32 -0800 Subject: [PATCH] EPUB Output:Be less aggressive when stripping invalid HTML constructs inserted by MS Word. Fixes regression in the Time recipe. --- src/calibre/ebooks/html.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 1c15973d3b..1f1e6b94b1 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -331,7 +331,8 @@ class PreProcessor(object): # Convert all entities, since lxml doesn't handle them well (re.compile(r'&(\S+?);'), convert_entities), # Remove the ]*>'), lambda match: ''), + (re.compile(r'', re.IGNORECASE), + lambda match: ''), ] # Fix pdftohtml markup