Fix #5567 (charset in meta http-equiv Content-Type not parsed if appearing on next line)

This commit is contained in:
Kovid Goyal 2010-05-20 21:32:59 -06:00
parent ae3f6ea4d3
commit b2d4f0d7be

View File

@ -32,7 +32,7 @@ def detect(aBuf):
ENCODING_PATS = [ ENCODING_PATS = [
re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>',
re.IGNORECASE), re.IGNORECASE),
re.compile(r'<meta.*?content=[\'"].*?charset=([^\s\'"]+).*?[\'"].*?>', re.compile(r'''<meta\s+?[^<>]+?content=['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''',
re.IGNORECASE) re.IGNORECASE)
] ]
ENTITY_PATTERN = re.compile(r'&(\S+?);') ENTITY_PATTERN = re.compile(r'&(\S+?);')