Handle spurious encoding match when detecting encoding for diff

This commit is contained in:
Kovid Goyal 2014-12-14 12:35:53 +05:30
parent 2c9ca9ea7e
commit cf01a5b969

View File

@ -108,9 +108,11 @@ def get_decoded_raw(name):
if syntax in {'html', 'xml'}:
raw = xml_to_unicode(raw, verbose=True)[0]
else:
m = re.search(r"coding[:=]\s*([-\w.]+)", raw[:1024], flags=re.I)
m = re.search(br"coding[:=]\s*([-\w.]+)", raw[:1024], flags=re.I)
if m is not None and m.group(1) != '8bit':
enc = m.group(1)
if enc == b'unicode':
enc = 'utf-8'
else:
enc = force_encoding(raw, verbose=True)
try: