From b56891cdaecd71deeb159ffbb5ca03e3827ed441 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 7 Feb 2012 14:10:18 +0530 Subject: [PATCH] MOBI Input: Handle files that have spurious closing and/or tags in their markup. Fixes #925833 (prc file fails to read or convert) --- src/calibre/ebooks/mobi/reader.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 347e4fda3a..7e69fc89d0 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -516,6 +516,17 @@ class MobiReader(object): self.processed_html = re.sub(r'(?i)(?P]*>)\s*(?P(\s*){1,})', '\g'+'\g', self.processed_html) self.processed_html = re.sub(r'(?i)(?P
(]*>\s*){1,})(?P]*>)', '\g'+'\g
', self.processed_html) self.processed_html = re.sub(r'(?i)(?P]*>)\s*(?P
(<(blockquote|div)[^>]*>\s*){1,})', '\g
'+'\g', self.processed_html) + bods = htmls = 0 + for x in re.finditer(ur'|', self.processed_html): + if x == '': bods +=1 + else: htmls += 1 + if bods > 1 and htmls > 1: + break + if bods > 1: + self.processed_html = self.processed_html.replace('', '') + if htmls > 1: + self.processed_html = self.processed_html.replace('', '') + def remove_random_bytes(self, html):