From ffb5b6f8b33ba246b4aeacd7b46e0d834c266f51 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 24 Nov 2010 23:19:06 -0700 Subject: [PATCH] MOBI Input: Workaround MOBI files that have invalid

constructs (
cannot be nested inside

in XHTML) --- src/calibre/ebooks/mobi/reader.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index c4845f9443..309023ede9 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -480,6 +480,9 @@ class MobiReader(object): # - lxml and beautifulsoup expect/assume a specific order based on xhtml spec self.processed_html = re.sub(r'(?i)(?P(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P]*>)', '\g'+'\g', self.processed_html) self.processed_html = re.sub(r'(?i)(?P]*>)\s*(?P(\s*){1,})', '\g'+'\g', self.processed_html) + self.processed_html = re.sub(r'(?i)(?P

(]*>\s*){1,})(?P]*>)', '\g'+'\g
', self.processed_html) + self.processed_html = re.sub(r'(?i)(?P]*>)\s*(?P
(]*>\s*){1,})', '\g
'+'\g', self.processed_html) + def remove_random_bytes(self, html): return re.sub('\x14|\x15|\x19|\x1c|\x1d|\xef|\x12|\x13|\xec|\x08',