From efee7be50d5475822406f239c1f7ef0a7dd1a0c0 Mon Sep 17 00:00:00 2001 From: Wolfgang Maier Date: Wed, 29 Apr 2020 00:44:15 +0200 Subject: [PATCH] Bypass decoding errors during html detection Decoding may fail on the header chunk if the file is utf-8 encoded and the chunk ends on a continuation byte. --- src/calibre/ebooks/html/input.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index c87af5dc7a..011b53dd35 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -115,7 +115,7 @@ class HTMLFile(object): encoding = detect_xml_encoding(src)[1] if encoding: try: - header = header.decode(encoding) + header = header.decode(encoding, errors='ignore') except ValueError: pass self.is_binary = level > 0 and not bool(self.HTML_PAT.search(header))