From 5f6ff5609db2743f4fc2b218cfa36941c13e5079 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 5 Nov 2010 18:59:20 -0600 Subject: [PATCH] Fix bug in regex to extract charset from tags --- src/calibre/ebooks/chardet/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index 8c101f9de5..dd279c6559 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -32,7 +32,7 @@ def detect(aBuf): ENCODING_PATS = [ re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE), - re.compile(r''']+?content\s*=\s*['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''', + re.compile(r''']*?content\s*=\s*['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''', re.IGNORECASE) ] ENTITY_PATTERN = re.compile(r'&(\S+?);')