diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index c55b94cac5..c4845f9443 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -476,6 +476,10 @@ class MobiReader(object): self.processed_html = self.processed_html.replace('> <', '>\n<') self.processed_html = self.processed_html.replace(']*>', '', self.processed_html) + # Swap inline and block level elements, and order block level elements according to priority + # - lxml and beautifulsoup expect/assume a specific order based on xhtml spec + self.processed_html = re.sub(r'(?i)(?P(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P]*>)', '\g'+'\g', self.processed_html) + self.processed_html = re.sub(r'(?i)(?P]*>)\s*(?P(\s*){1,})', '\g'+'\g', self.processed_html) def remove_random_bytes(self, html): return re.sub('\x14|\x15|\x19|\x1c|\x1d|\xef|\x12|\x13|\xec|\x08',