From dbf8c10e4c08872e149a59265f800c7d6d3ba8b7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 10 Jan 2019 16:06:35 +0530 Subject: [PATCH] Subset fonts: Fix error when trying to subset unicode characters that require two UTF-16 code points on Windows. Fixes #1811224 [Can not subset embed fonts for a epub with "wide" Unicode char](https://bugs.launchpad.net/calibre/+bug/1811224) --- src/calibre/utils/fonts/sfnt/subset.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/calibre/utils/fonts/sfnt/subset.py b/src/calibre/utils/fonts/sfnt/subset.py index 69f0e7af0e..8c7b5901a5 100644 --- a/src/calibre/utils/fonts/sfnt/subset.py +++ b/src/calibre/utils/fonts/sfnt/subset.py @@ -12,7 +12,7 @@ from collections import OrderedDict from operator import itemgetter from functools import partial -from calibre.utils.icu import safe_chr +from calibre.utils.icu import safe_chr, ord_string from calibre.utils.fonts.sfnt.container import Sfnt from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs @@ -105,16 +105,20 @@ def pdf_subset(sfnt, glyphs): 'or PostScript outlines') +def safe_ord(x): + return ord_string(unicode(x))[0] + + def subset(raw, individual_chars, ranges=(), warnings=None): warn = partial(do_warn, warnings) - chars = set(map(ord, individual_chars)) + chars = set(map(safe_ord, individual_chars)) for r in ranges: - chars |= set(xrange(ord(r[0]), ord(r[1])+1)) + chars |= set(xrange(safe_ord(r[0]), safe_ord(r[1])+1)) # Always add the space character for ease of use from the command line - if ord(' ') not in chars: - chars.add(ord(' ')) + if safe_ord(' ') not in chars: + chars.add(safe_ord(' ')) sfnt = Sfnt(raw) old_sizes = sfnt.sizes()