diff --git a/src/calibre/ebooks/html_entities.c b/src/calibre/ebooks/html_entities.c
index 00181fb117..8b1b9ac9bd 100644
--- a/src/calibre/ebooks/html_entities.c
+++ b/src/calibre/ebooks/html_entities.c
@@ -65,60 +65,56 @@ parse_base16_integer(const char *input, size_t sz, bool *ok) {
             if (ch < '0' || ch > '9') { *ok = false; return 0; }
             digit = ch - '0';
         }
-        ans = ans * 10 + digit;
+        ans = ans * 16 + digit;
     }
     return ans;
 }
 
-static size_t
-add_entity(const char *entity, const size_t elen, char *output) {
-    size_t ans = 0;
-    if (elen > 64) {
-bad_entity:
-        output[ans++] = '&';
-        memcpy(output + ans, entity, elen);
-        ans += elen;
-        output[ans++] = ';';
-        return ans;
-    }
-    if (!elen) {
-        output[ans++] = '&';
-        output[ans++] = ';';
-        return ans;
-    }
+static bool
+is_xml_unsafe(uint32_t codepoint) {
+    return codepoint == '<' || codepoint == '>' || codepoint == '&' || codepoint == '"' || codepoint == '\'';
+}
+
+static ssize_t
+convert_entity(const char *entity, const size_t elen, char *output, bool keep_xml_entities) {
     if (entity[0] == '#') {
-        if (elen < 2) goto bad_entity;
+        if (elen < 2) return -1;
         uint32_t codepoint = 0;
-        bool ok;
+        bool ok = false;
         if (entity[1] == 'x' || entity[1] == 'X') {
-            if (elen < 3) goto bad_entity;
-            codepoint = parse_base16_integer(entity + 2, elen - 2, &ok);
-            if (!ok || !codepoint) goto bad_entity;
+            if (elen > 2) codepoint = parse_base16_integer(entity + 2, elen - 2, &ok);
         } else {
             codepoint = parse_base10_integer(entity + 1, elen - 1, &ok);
-            if (!ok || !codepoint) goto bad_entity;
         }
-        unsigned num = encode_utf8(codepoint, output);
-        if (!num) goto bad_entity;
-        return num;
-    } else {
-        const struct html_entity *s = in_word_set(entity, elen);
-        if (!s) goto bad_entity;
-        ans = strlen(s->val);
-        memcpy(output, s->val, ans);
-        return ans;
+        if (!ok || (keep_xml_entities && is_xml_unsafe(codepoint))) return -1;
+        return codepoint ? encode_utf8(codepoint, output) : 0;
     }
-    goto bad_entity;
+    const struct html_entity *s = in_word_set(entity, elen);
+    if (!s) return -1;
+    size_t ans = strlen(s->val);
+    if (keep_xml_entities && ans == 1 && is_xml_unsafe(s->val[0])) return -1;
+    memcpy(output, s->val, ans);
+    return ans;
+}
+
+static size_t
+add_entity(const char *entity, const size_t elen, char *output, bool keep_xml_entities) {
+    ssize_t ans;
+    if (elen > 64 || elen < 3 || (ans = convert_entity(entity + 1, elen - 2, output, keep_xml_entities)) < 0) {
+        memcpy(output, entity, elen);
+        return elen;
+    }
+    return ans;
 }
 
 
 static size_t
-process_entity(const char *input, size_t input_sz, char *output, size_t *output_pos) {
+process_entity(const char *input, size_t input_sz, char *output, size_t *output_pos, bool keep_xml_entities) {
     size_t input_pos = 1;  // ignore leading &
     while (input_pos < input_sz) {
         char ch = input[input_pos++];
-        if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9') || (ch == '#' && input_pos == 1));
-        else if (ch == ';') { *output_pos += add_entity(input, input_pos-1, output + *output_pos); return input_pos; }
+        if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9') || (ch == '#' && input_pos == 2));
+        else if (ch == ';') { *output_pos += add_entity(input, input_pos, output + *output_pos, keep_xml_entities); return input_pos; }
         else break;
     }
     memcpy(output + *output_pos, input, input_pos);
@@ -132,7 +128,10 @@ replace(const char *input, size_t input_sz, char *output, int keep_xml_entities)
     while (input_pos < input_sz) {
         const char *p = (const char*)memchr(input + input_pos, '&', input_sz - input_pos);
         if (p) {
-            input_pos += process_entity(p, input_sz - (p - input), output, &output_pos);
+            size_t before_amp = p - (input + input_pos);
+            memcpy(output + output_pos, input + input_pos, before_amp);
+            output_pos += before_amp; input_pos += before_amp;
+            input_pos += process_entity(p, input_sz - (p - input), output, &output_pos, keep_xml_entities);
         } else {
             memcpy(output + output_pos, input + input_pos, input_sz - input_pos);
             output_pos += input_sz - input_pos;
diff --git a/src/calibre/ebooks/html_entities.h b/src/calibre/ebooks/html_entities.h
index 073326ab4c..42c559732a 100644
--- a/src/calibre/ebooks/html_entities.h
+++ b/src/calibre/ebooks/html_entities.h
@@ -1,5 +1,5 @@
 /* ANSI-C code produced by gperf version 3.1 */
-/* Command-line: gperf --struct-type --readonly --includes  */
+/* Command-line: gperf --struct-type --readonly --includes --compare-strncmp  */
 /* Computed positions: -k'1-7,10,12,$' */
 
 #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
@@ -5344,7 +5344,7 @@ in_word_set (register const char *str, register size_t len)
         {
           register const char *s = wordlist[key].name;
 
-          if (*str == *s && !strcmp (str + 1, s + 1))
+          if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
             return &wordlist[key];
         }
     }
diff --git a/src/calibre/ebooks/html_entities.py b/src/calibre/ebooks/html_entities.py
index 2a1b88859d..8e60cac80b 100644
--- a/src/calibre/ebooks/html_entities.py
+++ b/src/calibre/ebooks/html_entities.py
@@ -2142,9 +2142,24 @@ def find_tests():
             from calibre_extensions.fast_html_entities import replace_entities
             def t(inp, exp):
                 self.assertEqual(exp, replace_entities(inp), f'Failed for input: {inp!r}')
-            t('&amp', '&amp')
+            def x(inp, exp):
+                self.assertEqual(exp, replace_entities(inp, True), f'Failed for input: {inp!r}')
+            t('a&#1234;b', 'aӒb')
             t('', '')
             t('a', 'a')
+            t('&', '&')
+            t('&amp', '&amp')
+            t('&amp;', '&')
+            t('a&;b &#;c', 'a&;b &#;c')
+            t('&lt;', '<')
+            t('&amp;&lt;', '&<')
+            t('a&amp;b&lt;c', 'a&b<c')
+            t('a&acE;b', 'a∾̳b')
+            t('a&#1234;b', 'aӒb')
+            t('a&#X1234;b', 'a\u1234b')
+            t('a&#x1034fA;b', 'a\U001034fAb')
+            t('a&#0;b&#x000;c', 'abc')
+            x('&amp;&lt;&gt;&apos;&quot;', '&amp;&lt;&gt;&apos;&quot;')
 
     return unittest.defaultTestLoader.loadTestsFromTestCase(TestHTMLEntityReplacement)
 
@@ -2184,6 +2199,6 @@ struct html_entity { const char *name, *val; }
 
     import subprocess
     with open(__file__.replace('.py', '.h'), 'wb') as f:
-        cp = subprocess.run(['gperf', '--struct-type', '--readonly', '--includes'], input='\n'.join(native_lines).encode(), stdout=f)
+        cp = subprocess.run(['gperf', '--struct-type', '--readonly', '--includes', '--compare-strncmp'], input='\n'.join(native_lines).encode(), stdout=f)
         if cp.returncode != 0:
             raise SystemExit(cp.returncode)