From ad34b0ea3b2d0ba514e93d17b060a3d9af1247eb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 15 Oct 2022 18:02:11 +0530
Subject: [PATCH] We can no longer rely on confidence from chardet since its
 always 1 with the move to the C based chardet library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So for files where we assume utf-8, use utf-8 if no explicit encoding is
found. Fixes #1993029 [Apostrophe in book title turns into "à€™" upon import](https://bugs.launchpad.net/calibre/+bug/1993029)
---
 src/calibre/ebooks/chardet.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/calibre/ebooks/chardet.py b/src/calibre/ebooks/chardet.py
index 4aeceea0fd..afa0d9b4d8 100644
--- a/src/calibre/ebooks/chardet.py
+++ b/src/calibre/ebooks/chardet.py
@@ -154,6 +154,11 @@ def detect_xml_encoding(raw, verbose=False, assume_utf8=False):
             encoding = encoding.decode('ascii', 'replace')
             break
     if encoding is None:
+        if assume_utf8:
+            try:
+                return raw.decode('utf-8'), 'utf-8'
+            except UnicodeDecodeError:
+                pass
         encoding = force_encoding(raw, verbose, assume_utf8=assume_utf8)
     if encoding.lower().strip() == 'macintosh':
         encoding = 'mac-roman'