mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Much faster custom implementation for checking if a font supports some unicode text
This commit is contained in:
parent
9977bafa67
commit
d69b24371d
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import threading, unicodedata
|
import threading
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from future_builtins import map
|
from future_builtins import map
|
||||||
|
|
||||||
@ -20,10 +20,6 @@ class ThreadingViolation(Exception):
|
|||||||
'You cannot use the MTP driver from a thread other than the '
|
'You cannot use the MTP driver from a thread other than the '
|
||||||
' thread in which startup() was called')
|
' thread in which startup() was called')
|
||||||
|
|
||||||
def get_printable_characters(text):
|
|
||||||
return u''.join(x for x in unicodedata.normalize('NFC', text)
|
|
||||||
if unicodedata.category(x)[0] not in {'C', 'Z', 'M'})
|
|
||||||
|
|
||||||
def same_thread(func):
|
def same_thread(func):
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
def check_thread(self, *args, **kwargs):
|
def check_thread(self, *args, **kwargs):
|
||||||
@ -55,10 +51,18 @@ class Face(object):
|
|||||||
if not isinstance(text, unicode):
|
if not isinstance(text, unicode):
|
||||||
raise TypeError('%r is not a unicode object'%text)
|
raise TypeError('%r is not a unicode object'%text)
|
||||||
if has_non_printable_chars:
|
if has_non_printable_chars:
|
||||||
|
from calibre.utils.fonts.utils import get_printable_characters
|
||||||
text = get_printable_characters(text)
|
text = get_printable_characters(text)
|
||||||
chars = tuple(frozenset(map(ord, text)))
|
chars = tuple(frozenset(map(ord, text)))
|
||||||
return self.face.supports_text(chars)
|
return self.face.supports_text(chars)
|
||||||
|
|
||||||
|
@same_thread
|
||||||
|
def glyph_ids(self, text):
|
||||||
|
if not isinstance(text, unicode):
|
||||||
|
raise TypeError('%r is not a unicode object'%text)
|
||||||
|
for char in text:
|
||||||
|
yield self.face.glyph_id(ord(char))
|
||||||
|
|
||||||
class FreeType(object):
|
class FreeType(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -73,26 +77,4 @@ class FreeType(object):
|
|||||||
def load_font(self, data):
|
def load_font(self, data):
|
||||||
return Face(self.ft.load_font(data))
|
return Face(self.ft.load_font(data))
|
||||||
|
|
||||||
def test():
|
|
||||||
data = P('fonts/calibreSymbols.otf', data=True)
|
|
||||||
ft = FreeType()
|
|
||||||
font = ft.load_font(data)
|
|
||||||
if not font.supports_text('.\u2605★'):
|
|
||||||
raise RuntimeError('Incorrectly returning that text is not supported')
|
|
||||||
if font.supports_text('abc'):
|
|
||||||
raise RuntimeError('Incorrectly claiming that text is supported')
|
|
||||||
|
|
||||||
def test_find_font():
|
|
||||||
from calibre.utils.fonts.scanner import font_scanner
|
|
||||||
abcd = '诶比西迪'
|
|
||||||
family = font_scanner.find_font_for_text(abcd)[0]
|
|
||||||
print ('Family for Chinese text:', family)
|
|
||||||
family = font_scanner.find_font_for_text(abcd)[0]
|
|
||||||
abcd = 'لوحة المفاتيح العربية'
|
|
||||||
print ('Family for Arabic text:', family)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
test()
|
|
||||||
test_find_font()
|
|
||||||
|
|
||||||
|
@ -115,6 +115,14 @@ supports_text(Face *self, PyObject *args) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
glyph_id(Face *self, PyObject *args) {
|
||||||
|
unsigned long code;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "k", &code)) return NULL;
|
||||||
|
return Py_BuildValue("k", (unsigned long)FT_Get_Char_Index(self->face, (FT_ULong)code));
|
||||||
|
}
|
||||||
|
|
||||||
static PyGetSetDef Face_getsetters[] = {
|
static PyGetSetDef Face_getsetters[] = {
|
||||||
{(char *)"family_name",
|
{(char *)"family_name",
|
||||||
(getter)family_name, NULL,
|
(getter)family_name, NULL,
|
||||||
@ -134,6 +142,10 @@ static PyMethodDef Face_methods[] = {
|
|||||||
"supports_text(sequence of unicode character codes) -> Return True iff this font has glyphs for all the specified characters."
|
"supports_text(sequence of unicode character codes) -> Return True iff this font has glyphs for all the specified characters."
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{"glyph_id", (PyCFunction)glyph_id, METH_VARARGS,
|
||||||
|
"glyph_id(character code) -> Returns the glyph id for the specified character code."
|
||||||
|
},
|
||||||
|
|
||||||
{NULL} /* Sentinel */
|
{NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -15,7 +15,6 @@ from calibre import walk, prints, as_unicode
|
|||||||
from calibre.constants import (config_dir, iswindows, isosx, plugins, DEBUG,
|
from calibre.constants import (config_dir, iswindows, isosx, plugins, DEBUG,
|
||||||
isworker)
|
isworker)
|
||||||
from calibre.utils.fonts.metadata import FontMetadata, UnsupportedFont
|
from calibre.utils.fonts.metadata import FontMetadata, UnsupportedFont
|
||||||
from calibre.utils.fonts.utils import panose_to_css_generic_family
|
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
|
|
||||||
class NoFonts(ValueError):
|
class NoFonts(ValueError):
|
||||||
@ -117,17 +116,17 @@ class Scanner(Thread):
|
|||||||
|
|
||||||
:return: (family name, faces) or None, None
|
:return: (family name, faces) or None, None
|
||||||
'''
|
'''
|
||||||
from calibre.utils.fonts.free_type import FreeType, get_printable_characters
|
from calibre.utils.fonts.utils import (supports_text,
|
||||||
ft = FreeType()
|
panose_to_css_generic_family, get_printable_characters)
|
||||||
found = {}
|
|
||||||
if not isinstance(text, unicode):
|
if not isinstance(text, unicode):
|
||||||
raise TypeError(u'%r is not unicode'%text)
|
raise TypeError(u'%r is not unicode'%text)
|
||||||
text = get_printable_characters(text)
|
text = get_printable_characters(text)
|
||||||
|
found = {}
|
||||||
|
|
||||||
def filter_faces(font):
|
def filter_faces(font):
|
||||||
try:
|
try:
|
||||||
ftface = ft.load_font(self.get_font_data(font))
|
raw = self.get_font_data(font)
|
||||||
return ftface.supports_text(text, has_non_printable_chars=False)
|
return supports_text(raw, text)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
return False
|
return False
|
||||||
|
@ -14,6 +14,11 @@ from collections import defaultdict
|
|||||||
class UnsupportedFont(ValueError):
|
class UnsupportedFont(ValueError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_printable_characters(text):
|
||||||
|
import unicodedata
|
||||||
|
return u''.join(x for x in unicodedata.normalize('NFC', text)
|
||||||
|
if unicodedata.category(x)[0] not in {'C', 'Z', 'M'})
|
||||||
|
|
||||||
def is_truetype_font(raw):
|
def is_truetype_font(raw):
|
||||||
sfnt_version = raw[:4]
|
sfnt_version = raw[:4]
|
||||||
return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version)
|
return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version)
|
||||||
@ -267,16 +272,87 @@ def remove_embed_restriction(raw):
|
|||||||
verify_checksums(raw)
|
verify_checksums(raw)
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
|
def get_bmp_glyph_ids(table, bmp, codes):
|
||||||
|
length, language, segcount = struct.unpack_from(b'>3H', table, bmp+2)
|
||||||
|
array_len = segcount //2
|
||||||
|
offset = bmp + 7*2
|
||||||
|
array_sz = 2*array_len
|
||||||
|
array = b'>%dH'%array_len
|
||||||
|
end_count = struct.unpack_from(array, table, offset)
|
||||||
|
offset += array_sz + 2
|
||||||
|
start_count = struct.unpack_from(array, table, offset)
|
||||||
|
offset += array_sz
|
||||||
|
id_delta = struct.unpack_from(array.replace(b'H', b'h'), table, offset)
|
||||||
|
offset += array_sz
|
||||||
|
range_offset = struct.unpack_from(array, table, offset)
|
||||||
|
if length + bmp < offset + array_sz:
|
||||||
|
raise ValueError('cmap subtable length is too small')
|
||||||
|
glyph_id_len = (length + bmp - (offset + array_sz))//2
|
||||||
|
glyph_id_map = struct.unpack_from(b'>%dH'%glyph_id_len, table, offset +
|
||||||
|
array_sz)
|
||||||
|
|
||||||
|
for code in codes:
|
||||||
|
found = False
|
||||||
|
for i, ec in enumerate(end_count):
|
||||||
|
if ec >= code:
|
||||||
|
sc = start_count[i]
|
||||||
|
if sc <= code:
|
||||||
|
found = True
|
||||||
|
ro = range_offset[i]
|
||||||
|
if ro == 0:
|
||||||
|
glyph_id = id_delta[i] + code
|
||||||
|
else:
|
||||||
|
idx = ro//2 + (code - sc) + i - array_len
|
||||||
|
glyph_id = glyph_id_map[idx]
|
||||||
|
if glyph_id != 0:
|
||||||
|
glyph_id += id_delta[i]
|
||||||
|
yield glyph_id % 0x1000
|
||||||
|
break
|
||||||
|
if not found:
|
||||||
|
yield 0
|
||||||
|
|
||||||
|
def get_glyph_ids(raw, text, raw_is_table=False):
|
||||||
|
if not isinstance(text, unicode):
|
||||||
|
raise TypeError('%r is not a unicode object'%text)
|
||||||
|
if raw_is_table:
|
||||||
|
table = raw
|
||||||
|
else:
|
||||||
|
table = get_table(raw, 'cmap')[0]
|
||||||
|
if table is None:
|
||||||
|
raise UnsupportedFont('Not a supported font, has no cmap table')
|
||||||
|
version, num_tables = struct.unpack_from(b'>HH', table)
|
||||||
|
bmp_table = None
|
||||||
|
for i in xrange(num_tables):
|
||||||
|
platform_id, encoding_id, offset = struct.unpack_from(b'>HHL', table,
|
||||||
|
4 + (i*8))
|
||||||
|
if platform_id == 3 and encoding_id == 1:
|
||||||
|
table_format = struct.unpack_from(b'>H', table, offset)[0]
|
||||||
|
if table_format == 4:
|
||||||
|
bmp_table = offset
|
||||||
|
break
|
||||||
|
if bmp_table is None:
|
||||||
|
raise UnsupportedFont('Not a supported font, has no format 4 cmap table')
|
||||||
|
|
||||||
|
for glyph_id in get_bmp_glyph_ids(table, bmp_table, map(ord, text)):
|
||||||
|
yield glyph_id
|
||||||
|
|
||||||
|
def supports_text(raw, text, has_only_printable_chars=False):
|
||||||
|
if not isinstance(text, unicode):
|
||||||
|
raise TypeError('%r is not a unicode object'%text)
|
||||||
|
if not has_only_printable_chars:
|
||||||
|
text = get_printable_characters(text)
|
||||||
|
try:
|
||||||
|
for glyph_id in get_glyph_ids(raw, text):
|
||||||
|
if glyph_id == 0:
|
||||||
|
return False
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def get_font_for_text(text, candidate_font_data=None):
|
def get_font_for_text(text, candidate_font_data=None):
|
||||||
ok = False
|
ok = False
|
||||||
if candidate_font_data is not None:
|
if candidate_font_data is not None:
|
||||||
from calibre.utils.fonts.free_type import FreeType, FreeTypeError
|
ok = supports_text(candidate_font_data, text)
|
||||||
ft = FreeType()
|
|
||||||
try:
|
|
||||||
font = ft.load_font(candidate_font_data)
|
|
||||||
ok = font.supports_text(text)
|
|
||||||
except FreeTypeError:
|
|
||||||
ok = True
|
|
||||||
if not ok:
|
if not ok:
|
||||||
from calibre.utils.fonts.scanner import font_scanner
|
from calibre.utils.fonts.scanner import font_scanner
|
||||||
family, faces = font_scanner.find_font_for_text(text)
|
family, faces = font_scanner.find_font_for_text(text)
|
||||||
@ -285,7 +361,40 @@ def get_font_for_text(text, candidate_font_data=None):
|
|||||||
candidate_font_data = f.read()
|
candidate_font_data = f.read()
|
||||||
return candidate_font_data
|
return candidate_font_data
|
||||||
|
|
||||||
|
def test_glyph_ids():
|
||||||
|
from calibre.utils.fonts.free_type import FreeType
|
||||||
|
data = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
|
||||||
|
ft = FreeType()
|
||||||
|
font = ft.load_font(data)
|
||||||
|
text = u'诶йab'
|
||||||
|
ft_glyphs = tuple(font.glyph_ids(text))
|
||||||
|
glyphs = tuple(get_glyph_ids(data, text))
|
||||||
|
if ft_glyphs != glyphs:
|
||||||
|
raise Exception('My code and FreeType differ on the glyph ids')
|
||||||
|
|
||||||
|
def test_supports_text():
|
||||||
|
data = P('fonts/calibreSymbols.otf', data=True)
|
||||||
|
if not supports_text(data, '.\u2605★'):
|
||||||
|
raise RuntimeError('Incorrectly returning that text is not supported')
|
||||||
|
if supports_text(data, 'abc'):
|
||||||
|
raise RuntimeError('Incorrectly claiming that text is supported')
|
||||||
|
|
||||||
|
def test_find_font():
|
||||||
|
from calibre.utils.fonts.scanner import font_scanner
|
||||||
|
abcd = '诶比西迪'
|
||||||
|
family = font_scanner.find_font_for_text(abcd)[0]
|
||||||
|
print ('Family for Chinese text:', family)
|
||||||
|
family = font_scanner.find_font_for_text(abcd)[0]
|
||||||
|
abcd = 'لوحة المفاتيح العربية'
|
||||||
|
print ('Family for Arabic text:', family)
|
||||||
|
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
|
test_glyph_ids()
|
||||||
|
test_supports_text()
|
||||||
|
test_find_font()
|
||||||
|
|
||||||
|
def main():
|
||||||
import sys, os
|
import sys, os
|
||||||
for f in sys.argv[1:]:
|
for f in sys.argv[1:]:
|
||||||
print (os.path.basename(f))
|
print (os.path.basename(f))
|
||||||
@ -299,5 +408,5 @@ def test():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test()
|
main()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user