PDF Input: Fix < and > in the text not being correctly handled

2025-07-09 03:04:10 -04:00 · 2019-08-30 08:03:08 +05:30 · 2019-08-30 08:03:08 +05:30 · 05834f0b42
commit 05834f0b42
parent b0fe64571e
1 changed files with 2 additions and 2 deletions
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@ -11,7 +11,7 @@ import shutil
 import subprocess
 import sys
-from calibre import CurrentDir, replace_entities, prints
+from calibre import CurrentDir, xml_replace_entities, prints
 from calibre.constants import (
    filesystem_encoding, isbsd, islinux, isosx, ispy3, iswindows
 )
@ -106,7 +106,7 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
                raw = re.sub(r'<a\s+name=(\d+)', r'<a id="\1"', raw, flags=re.I)
                raw = re.sub(r'<a id="(\d+)"', r'<a id="p\1"', raw, flags=re.I)
                raw = re.sub(r'<a href="index.html#(\d+)"', r'<a href="#p\1"', raw, flags=re.I)
-                raw = replace_entities(raw)
+                raw = xml_replace_entities(raw)
                raw = raw.replace('\u00a0', ' ')
                i.write(raw.encode('utf-8'))