Report the lxml bug upstream

This commit is contained in:
Kovid Goyal
2025-09-26 17:04:16 +05:30
parent 8bb1f251b5
commit cc970340cd
2 changed files with 2 additions and 1 deletions
+1 -1
View File
@@ -167,7 +167,7 @@ class BuildTest(unittest.TestCase):
html.fromstring('<p>\U0001f63a')
from calibre.utils.xml_parse import safe_xml_fromstring
bad = '\U0001f468' * 8192
safe_xml_fromstring(f'<p x="\U0001f600">\U0001f63a&#x1f63a;{bad}</p>')
safe_xml_fromstring(f'<p>{bad}</p>')
def test_certgen(self):
from calibre.utils.certgen import create_key_pair
+1
View File
@@ -30,6 +30,7 @@ def safe_xml_fromstring(string_or_bytes, recover=True):
except etree.XMLSyntaxError:
# this happens on windows where if string_or_bytes is unicode and
# contains non-BMP chars lxml chokes
# https://bugs.launchpad.net/lxml/+bug/2125756
if sys.platform != 'win32' or not isinstance(string_or_bytes, str):
raise
ans = fs(string_or_bytes.encode('utf-8'), parser=create_parser(True, encoding='utf-8'))