From 99e86bbe29c03e3d52ac49e714c950b24e1f8d6f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 9 Oct 2020 09:29:07 +0530 Subject: [PATCH] Windows: Fix some MOBI files with non-BMP characters not being processed correctly. Fixes #1898894 [Conversion from epub to mobi en v5.2](https://bugs.launchpad.net/calibre/+bug/1898894) lxml passes unicode to libxml2 as UCS-4 when non-BMP chars are present, and libxml needs iconv to process UCS4 (god knows why) which was not being built on windows. --- bypy/sources.json | 6 +++++- src/calibre/test_build.py | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/bypy/sources.json b/bypy/sources.json index 5fd5e7ae3e..f9cca7cf3f 100644 --- a/bypy/sources.json +++ b/bypy/sources.json @@ -246,11 +246,15 @@ { "name": "iconv", - "os": "macos, linux", "unix": { "filename": "libiconv-1.16.tar.gz", "hash": "sha256:e6a1b1b589654277ee790cce3734f07876ac4ccfaecbee8afa0b649cf529cc04", "urls": ["https://ftp.gnu.org/pub/gnu/libiconv/{filename}"] + }, + "windows": { + "filename": "libiconv-for-Windows-1.16.zip", + "hash": "sha256:d542e635cad954a62a2b451b5644e855b848398917e93adf46d8da4c9cc88b6d", + "urls": ["https://github.com/pffang/libiconv-for-Windows/archive/v1.16.zip"] } }, diff --git a/src/calibre/test_build.py b/src/calibre/test_build.py index 8f1dcde6ff..05da265348 100644 --- a/src/calibre/test_build.py +++ b/src/calibre/test_build.py @@ -124,6 +124,8 @@ class BuildTest(unittest.TestCase): raw = b'' root = etree.fromstring(raw, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)) self.assertEqual(etree.tostring(root), raw) + from lxml import html + html.fromstring("

\U0001f63a") def test_certgen(self): from calibre.utils.certgen import create_key_pair