Improve encoding detection.

This commit is contained in:
Kovid Goyal 2008-02-25 08:59:34 +00:00
parent 2ad3dcda4d
commit 1f710c1fd2

View File

@ -1812,7 +1812,7 @@ class UnicodeDammit:
('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')\
.match(xml_data)
if xml_encoding_match is None: # By Kovid to use the content-type header in HTML files
xml_encoding_match = re.compile(r'<meta.*?http-equiv=[\'"]Content-type[\'"].*?content=[\'"].*?charset=(\S+).*?[\'"]', re.IGNORECASE).search(xml_data)
xml_encoding_match = re.compile(r'<meta.*?content=[\'"].*?charset=(\S+).*?[\'"]', re.IGNORECASE).search(xml_data)
except:
xml_encoding_match = None
if xml_encoding_match: