From f3c10853850ca274c9e4a06168f2b96431177a60 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 26 Nov 2010 10:01:37 -0700 Subject: [PATCH] MOBI Input: Fix bug in cleanup regex that broke parsing of escaped XML declarations. Fixes #7585 (eBook Viewer XML incompatibility) --- src/calibre/ebooks/mobi/reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 309023ede9..f80d15359c 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -475,8 +475,8 @@ class MobiReader(object): self.processed_html = self.processed_html.replace('\r\n', '\n') self.processed_html = self.processed_html.replace('> <', '>\n<') self.processed_html = self.processed_html.replace(']*>', '', self.processed_html) - # Swap inline and block level elements, and order block level elements according to priority + self.processed_html = re.sub(r'<\?xml[^>]*>', '', self.processed_html) + # Swap inline and block level elements, and order block level elements according to priority # - lxml and beautifulsoup expect/assume a specific order based on xhtml spec self.processed_html = re.sub(r'(?i)(?P(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P]*>)', '\g'+'\g', self.processed_html) self.processed_html = re.sub(r'(?i)(?P]*>)\s*(?P(\s*){1,})', '\g'+'\g', self.processed_html)