Fix #745428 (Calibre doesn't convert encoding correctly.)

This commit is contained in:
Kovid Goyal 2011-03-31 10:25:18 -06:00
parent a34e318107
commit 31e09338f6

View File

@ -110,4 +110,11 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
if resolve_entities: if resolve_entities:
raw = substitute_entites(raw) raw = substitute_entites(raw)
if encoding and encoding.lower().replace('_', '-').strip() in (
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
# Microsoft Word exports to HTML with encoding incorrectly set to
# gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
encoding = 'gbk'
return raw, encoding return raw, encoding