mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
We can no longer rely on confidence from chardet since its always 1 with the move to the C based chardet library
So for files where we assume utf-8, use utf-8 if no explicit encoding is found. Fixes #1993029 [Apostrophe in book title turns into "à€™" upon import](https://bugs.launchpad.net/calibre/+bug/1993029)
This commit is contained in:
parent
74208b5330
commit
ad34b0ea3b
@ -154,6 +154,11 @@ def detect_xml_encoding(raw, verbose=False, assume_utf8=False):
|
||||
encoding = encoding.decode('ascii', 'replace')
|
||||
break
|
||||
if encoding is None:
|
||||
if assume_utf8:
|
||||
try:
|
||||
return raw.decode('utf-8'), 'utf-8'
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
encoding = force_encoding(raw, verbose, assume_utf8=assume_utf8)
|
||||
if encoding.lower().strip() == 'macintosh':
|
||||
encoding = 'mac-roman'
|
||||
|
Loading…
x
Reference in New Issue
Block a user