MOBI Input: Remove invalid tags of the form <xyz: > Fixes #872883 (fatal error in converting mobi to rtf)

This commit is contained in:
Kovid Goyal 2011-10-13 10:05:17 +05:30
parent 12c8b70336
commit 68aa6195e0

View File

@ -325,6 +325,10 @@ class MobiReader(object):
self.processed_html = self.processed_html.replace('</</', '</') self.processed_html = self.processed_html.replace('</</', '</')
self.processed_html = re.sub(r'</([a-zA-Z]+)<', r'</\1><', self.processed_html = re.sub(r'</([a-zA-Z]+)<', r'</\1><',
self.processed_html) self.processed_html)
# Remove tags of the form <xyz: ...> as they can cause issues further
# along the pipeline
self.processed_html = re.sub(r'</{0,1}[a-zA-Z]+:\s+[^>]*>', '',
self.processed_html)
for pat in ENCODING_PATS: for pat in ENCODING_PATS:
self.processed_html = pat.sub('', self.processed_html) self.processed_html = pat.sub('', self.processed_html)