mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #745428 (Calibre doesn't convert encoding correctly.)
This commit is contained in:
parent
777c5ec251
commit
672d91d454
@ -100,6 +100,12 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
||||
try:
|
||||
if encoding.lower().strip() == 'macintosh':
|
||||
encoding = 'mac-roman'
|
||||
if encoding.lower().replace('_', '-').strip() in (
|
||||
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
|
||||
'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
|
||||
# Microsoft Word exports to HTML with encoding incorrectly set to
|
||||
# gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
|
||||
encoding = 'gbk'
|
||||
raw = raw.decode(encoding, 'replace')
|
||||
except LookupError:
|
||||
encoding = 'utf-8'
|
||||
@ -110,11 +116,6 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
||||
if resolve_entities:
|
||||
raw = substitute_entites(raw)
|
||||
|
||||
if encoding and encoding.lower().replace('_', '-').strip() in (
|
||||
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
|
||||
'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
|
||||
# Microsoft Word exports to HTML with encoding incorrectly set to
|
||||
# gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
|
||||
encoding = 'gbk'
|
||||
|
||||
|
||||
return raw, encoding
|
||||
|
Loading…
x
Reference in New Issue
Block a user