EPUB Output:Be less aggressive when stripping invalid HTML constructs inserted by MS Word. Fixes regression in the Time recipe.

This commit is contained in:
Kovid Goyal 2009-02-20 13:07:32 -08:00
parent e6473047fe
commit 70b99872c6

View File

@ -331,7 +331,8 @@ class PreProcessor(object):
# Convert all entities, since lxml doesn't handle them well
(re.compile(r'&(\S+?);'), convert_entities),
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
(re.compile(r'(?i)<{0,1}!\[(end){0,1}if[^>]*>'), lambda match: ''),
(re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
lambda match: ''),
]
# Fix pdftohtml markup