Content server: Fix some OPDS feeds failing with non-ASCII content. Fixes #1947879 [IOS client access, the library does not display content.](https://bugs.launchpad.net/calibre/+bug/1947879)

This was caused by an underlying libxml2 bug.
https://bugs.launchpad.net/lxml/+bug/1873306
a697ed1e24

The workaround is to specify the encoding rather than using None.
This commit is contained in:
Kovid Goyal 2021-10-26 08:28:00 +05:30
parent 1a334d7577
commit c70e064c23
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -120,7 +120,7 @@ def html_to_lxml(raw):
root = parse(raw, keep_doctype=False, namespace_elements=False, maybe_xhtml=False, sanitize_names=True) root = parse(raw, keep_doctype=False, namespace_elements=False, maybe_xhtml=False, sanitize_names=True)
root = next(root.iterdescendants('div')) root = next(root.iterdescendants('div'))
root.set('xmlns', "http://www.w3.org/1999/xhtml") root.set('xmlns', "http://www.w3.org/1999/xhtml")
raw = etree.tostring(root, encoding=None) raw = etree.tostring(root, encoding='unicode')
try: try:
return safe_xml_fromstring(raw, recover=False) return safe_xml_fromstring(raw, recover=False)
except: except:
@ -131,7 +131,7 @@ def html_to_lxml(raw):
remove.append(attr) remove.append(attr)
for a in remove: for a in remove:
del x.attrib[a] del x.attrib[a]
raw = etree.tostring(root, encoding=None) raw = etree.tostring(root, encoding='unicode')
try: try:
return safe_xml_fromstring(raw, recover=False) return safe_xml_fromstring(raw, recover=False)
except: except: