From c70e064c2399f7f1d4c3d71ec28fe7fcf763cf5c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Oct 2021 08:28:00 +0530 Subject: [PATCH] Content server: Fix some OPDS feeds failing with non-ASCII content. Fixes #1947879 [IOS client access, the library does not display content.](https://bugs.launchpad.net/calibre/+bug/1947879) This was caused by an underlying libxml2 bug. https://bugs.launchpad.net/lxml/+bug/1873306 https://gitlab.gnome.org/GNOME/libxml2/-/commit/a697ed1e24234a9e6a4a4639555dcca230f752c1 The workaround is to specify the encoding rather than using None. --- src/calibre/srv/opds.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/srv/opds.py b/src/calibre/srv/opds.py index f19dd164cf..e7fae5e851 100644 --- a/src/calibre/srv/opds.py +++ b/src/calibre/srv/opds.py @@ -120,7 +120,7 @@ def html_to_lxml(raw): root = parse(raw, keep_doctype=False, namespace_elements=False, maybe_xhtml=False, sanitize_names=True) root = next(root.iterdescendants('div')) root.set('xmlns', "http://www.w3.org/1999/xhtml") - raw = etree.tostring(root, encoding=None) + raw = etree.tostring(root, encoding='unicode') try: return safe_xml_fromstring(raw, recover=False) except: @@ -131,7 +131,7 @@ def html_to_lxml(raw): remove.append(attr) for a in remove: del x.attrib[a] - raw = etree.tostring(root, encoding=None) + raw = etree.tostring(root, encoding='unicode') try: return safe_xml_fromstring(raw, recover=False) except: