Bypass decoding errors during html detection

Decoding may fail on the header chunk if the file is utf-8 encoded and the chunk ends on a continuation byte.
This commit is contained in:
Wolfgang Maier 2020-04-29 00:44:15 +02:00 committed by GitHub
parent 529119caad
commit efee7be50d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -115,7 +115,7 @@ class HTMLFile(object):
encoding = detect_xml_encoding(src)[1]
if encoding:
try:
header = header.decode(encoding)
header = header.decode(encoding, errors='ignore')
except ValueError:
pass
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(header))