From 2a6e7ea82d6ad80868e9ba1b51359b7a12f0d7b7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 15 Jul 2019 22:34:20 +0530 Subject: [PATCH] MOBI Input: Fix conversion of MOBI files with malformed markup and embeded tags not working. Fixes #1836548 [Private bug](https://bugs.launchpad.net/calibre/+bug/1836548) --- src/calibre/ebooks/mobi/reader/mobi6.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/ebooks/mobi/reader/mobi6.py b/src/calibre/ebooks/mobi/reader/mobi6.py index a5357c842c..6400295a39 100644 --- a/src/calibre/ebooks/mobi/reader/mobi6.py +++ b/src/calibre/ebooks/mobi/reader/mobi6.py @@ -197,6 +197,10 @@ class MobiReader(object): from html5_parser import parse self.log.warning('Malformed markup, parsing using html5-parser') self.processed_html = strip_encoding_declarations(self.processed_html) + # These trip up the html5 parser causing all content to be placed + # under the tag + self.processed_html = re.sub(r'.+?', '', self.processed_html, flags=re.I) + self.processed_html = re.sub(r'.+?', '', self.processed_html, flags=re.I) try: root = parse(self.processed_html, maybe_xhtml=False, keep_doctype=False, sanitize_names=True) except Exception: