diff --git a/src/libprs500/ebooks/BeautifulSoup.py b/src/libprs500/ebooks/BeautifulSoup.py index 5f23ae5935..628e4f148b 100644 --- a/src/libprs500/ebooks/BeautifulSoup.py +++ b/src/libprs500/ebooks/BeautifulSoup.py @@ -1676,7 +1676,7 @@ class UnicodeDammit: for proposedEncoding in (documentEncoding, sniffedEncoding): u = self._convertFrom(proposedEncoding) if u: break - + # If no luck and we have auto-detection library, try that: if not u and chardet and not isinstance(self.markup, unicode): u = self._convertFrom(chardet.detect(self.markup)['encoding']) @@ -1804,6 +1804,8 @@ class UnicodeDammit: xml_encoding_match = re.compile \ ('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')\ .match(xml_data) + if xml_encoding_match is None: # By Kovid to use the content-type header in HTML files + xml_encoding_match = re.compile(r'