Improve encoding detection.

This commit is contained in:
Kovid Goyal 2008-02-25 08:59:34 +00:00
parent 2ad3dcda4d
commit 1f710c1fd2

View File

@ -1812,7 +1812,7 @@ class UnicodeDammit:
('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')\ ('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')\
.match(xml_data) .match(xml_data)
if xml_encoding_match is None: # By Kovid to use the content-type header in HTML files if xml_encoding_match is None: # By Kovid to use the content-type header in HTML files
xml_encoding_match = re.compile(r'<meta.*?http-equiv=[\'"]Content-type[\'"].*?content=[\'"].*?charset=(\S+).*?[\'"]', re.IGNORECASE).search(xml_data) xml_encoding_match = re.compile(r'<meta.*?content=[\'"].*?charset=(\S+).*?[\'"]', re.IGNORECASE).search(xml_data)
except: except:
xml_encoding_match = None xml_encoding_match = None
if xml_encoding_match: if xml_encoding_match: