diff --git a/src/calibre/ebooks/chardet.py b/src/calibre/ebooks/chardet.py index 61aefafdac..a550b4265d 100644 --- a/src/calibre/ebooks/chardet.py +++ b/src/calibre/ebooks/chardet.py @@ -12,10 +12,10 @@ import re, codecs ENCODING_PATS = [ # XML declaration re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE), - # HTML 4 Pragma directive - re.compile(r''']*?content\s*=\s*['"][^'"]*?charset=([-_a-z0-9]+)[^'"]*?['"][^<>]*>(?:\s*){0,1}''', re.IGNORECASE), # HTML 5 charset re.compile(r''']*>(?:\s*){0,1}''', re.IGNORECASE), + # HTML 4 Pragma directive + re.compile(r''']*?content\s*=\s*['"][^'"]*?charset=([-_a-z0-9]+)[^'"]*?['"][^<>]*>(?:\s*){0,1}''', re.IGNORECASE), ] ENTITY_PATTERN = re.compile(r'&(\S+?);')