From 672d91d454b26f4cbd3c265b6dff15879a4be5da Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 2 Apr 2011 10:40:58 -0600
Subject: [PATCH] Fix #745428 (Calibre doesn't convert encoding correctly.)

---
 src/calibre/ebooks/chardet/__init__.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py
index c562176ef2..604cbdd360 100644
--- a/src/calibre/ebooks/chardet/__init__.py
+++ b/src/calibre/ebooks/chardet/__init__.py
@@ -100,6 +100,12 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
         try:
             if encoding.lower().strip() == 'macintosh':
                 encoding = 'mac-roman'
+            if encoding.lower().replace('_', '-').strip() in (
+                    'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
+                    'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
+                # Microsoft Word exports to HTML with encoding incorrectly set to
+                # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
+                encoding = 'gbk'
             raw = raw.decode(encoding, 'replace')
         except LookupError:
             encoding = 'utf-8'
@@ -110,11 +116,6 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
     if resolve_entities:
         raw = substitute_entites(raw)
 
-    if encoding and encoding.lower().replace('_', '-').strip() in (
-            'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
-            'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
-        # Microsoft Word exports to HTML with encoding incorrectly set to
-        # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
-        encoding = 'gbk'
+
 
     return raw, encoding