From efee7be50d5475822406f239c1f7ef0a7dd1a0c0 Mon Sep 17 00:00:00 2001
From: Wolfgang Maier <maierw@posteo.de>
Date: Wed, 29 Apr 2020 00:44:15 +0200
Subject: [PATCH] Bypass decoding errors during html detection

Decoding may fail on the header chunk if the file is utf-8 encoded and the chunk ends on a continuation byte.
---
 src/calibre/ebooks/html/input.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py
index c87af5dc7a..011b53dd35 100644
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@@ -115,7 +115,7 @@ class HTMLFile(object):
                 encoding = detect_xml_encoding(src)[1]
                 if encoding:
                     try:
-                        header = header.decode(encoding)
+                        header = header.decode(encoding, errors='ignore')
                     except ValueError:
                         pass
                 self.is_binary = level > 0 and not bool(self.HTML_PAT.search(header))