From c2d85e81b772e54c7f946392ead78b4ed1c0b607 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 28 Feb 2011 08:45:31 -0700 Subject: [PATCH] MOBI Input: Ignore all ASCII control codes except CR, NL and Tab. Fixes #9219 (Instapaper magazine can't be shown in ebook-viewer) --- src/calibre/ebooks/mobi/reader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 9c52a18691..f1b1b1ef63 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -18,6 +18,7 @@ from calibre import xml_entity_to_unicode, CurrentDir, entity_to_unicode, \ replace_entities from calibre.utils.filenames import ascii_filename from calibre.utils.date import parse_date +from calibre.utils.cleantext import clean_ascii_chars from calibre.ptempfile import TemporaryDirectory from calibre.ebooks import DRMError from calibre.ebooks.chardet import ENCODING_PATS @@ -323,6 +324,7 @@ class MobiReader(object): self.cleanup_html() self.log.debug('Parsing HTML...') + self.processed_html = clean_ascii_chars(self.processed_html) try: root = html.fromstring(self.processed_html) if len(root.xpath('//html')) > 5: