mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Much faster custom implementation for checking if a font supports some unicode text
This commit is contained in:
parent
9977bafa67
commit
d69b24371d
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import threading, unicodedata
|
||||
import threading
|
||||
from functools import wraps
|
||||
from future_builtins import map
|
||||
|
||||
@ -20,10 +20,6 @@ class ThreadingViolation(Exception):
|
||||
'You cannot use the MTP driver from a thread other than the '
|
||||
' thread in which startup() was called')
|
||||
|
||||
def get_printable_characters(text):
|
||||
return u''.join(x for x in unicodedata.normalize('NFC', text)
|
||||
if unicodedata.category(x)[0] not in {'C', 'Z', 'M'})
|
||||
|
||||
def same_thread(func):
|
||||
@wraps(func)
|
||||
def check_thread(self, *args, **kwargs):
|
||||
@ -55,10 +51,18 @@ class Face(object):
|
||||
if not isinstance(text, unicode):
|
||||
raise TypeError('%r is not a unicode object'%text)
|
||||
if has_non_printable_chars:
|
||||
from calibre.utils.fonts.utils import get_printable_characters
|
||||
text = get_printable_characters(text)
|
||||
chars = tuple(frozenset(map(ord, text)))
|
||||
return self.face.supports_text(chars)
|
||||
|
||||
@same_thread
|
||||
def glyph_ids(self, text):
|
||||
if not isinstance(text, unicode):
|
||||
raise TypeError('%r is not a unicode object'%text)
|
||||
for char in text:
|
||||
yield self.face.glyph_id(ord(char))
|
||||
|
||||
class FreeType(object):
|
||||
|
||||
def __init__(self):
|
||||
@ -73,26 +77,4 @@ class FreeType(object):
|
||||
def load_font(self, data):
|
||||
return Face(self.ft.load_font(data))
|
||||
|
||||
def test():
|
||||
data = P('fonts/calibreSymbols.otf', data=True)
|
||||
ft = FreeType()
|
||||
font = ft.load_font(data)
|
||||
if not font.supports_text('.\u2605★'):
|
||||
raise RuntimeError('Incorrectly returning that text is not supported')
|
||||
if font.supports_text('abc'):
|
||||
raise RuntimeError('Incorrectly claiming that text is supported')
|
||||
|
||||
def test_find_font():
|
||||
from calibre.utils.fonts.scanner import font_scanner
|
||||
abcd = '诶比西迪'
|
||||
family = font_scanner.find_font_for_text(abcd)[0]
|
||||
print ('Family for Chinese text:', family)
|
||||
family = font_scanner.find_font_for_text(abcd)[0]
|
||||
abcd = 'لوحة المفاتيح العربية'
|
||||
print ('Family for Arabic text:', family)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
test_find_font()
|
||||
|
||||
|
@ -115,6 +115,14 @@ supports_text(Face *self, PyObject *args) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
glyph_id(Face *self, PyObject *args) {
|
||||
unsigned long code;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "k", &code)) return NULL;
|
||||
return Py_BuildValue("k", (unsigned long)FT_Get_Char_Index(self->face, (FT_ULong)code));
|
||||
}
|
||||
|
||||
static PyGetSetDef Face_getsetters[] = {
|
||||
{(char *)"family_name",
|
||||
(getter)family_name, NULL,
|
||||
@ -134,6 +142,10 @@ static PyMethodDef Face_methods[] = {
|
||||
"supports_text(sequence of unicode character codes) -> Return True iff this font has glyphs for all the specified characters."
|
||||
},
|
||||
|
||||
{"glyph_id", (PyCFunction)glyph_id, METH_VARARGS,
|
||||
"glyph_id(character code) -> Returns the glyph id for the specified character code."
|
||||
},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
@ -15,7 +15,6 @@ from calibre import walk, prints, as_unicode
|
||||
from calibre.constants import (config_dir, iswindows, isosx, plugins, DEBUG,
|
||||
isworker)
|
||||
from calibre.utils.fonts.metadata import FontMetadata, UnsupportedFont
|
||||
from calibre.utils.fonts.utils import panose_to_css_generic_family
|
||||
from calibre.utils.icu import sort_key
|
||||
|
||||
class NoFonts(ValueError):
|
||||
@ -117,17 +116,17 @@ class Scanner(Thread):
|
||||
|
||||
:return: (family name, faces) or None, None
|
||||
'''
|
||||
from calibre.utils.fonts.free_type import FreeType, get_printable_characters
|
||||
ft = FreeType()
|
||||
found = {}
|
||||
from calibre.utils.fonts.utils import (supports_text,
|
||||
panose_to_css_generic_family, get_printable_characters)
|
||||
if not isinstance(text, unicode):
|
||||
raise TypeError(u'%r is not unicode'%text)
|
||||
text = get_printable_characters(text)
|
||||
found = {}
|
||||
|
||||
def filter_faces(font):
|
||||
try:
|
||||
ftface = ft.load_font(self.get_font_data(font))
|
||||
return ftface.supports_text(text, has_non_printable_chars=False)
|
||||
raw = self.get_font_data(font)
|
||||
return supports_text(raw, text)
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
@ -14,6 +14,11 @@ from collections import defaultdict
|
||||
class UnsupportedFont(ValueError):
|
||||
pass
|
||||
|
||||
def get_printable_characters(text):
|
||||
import unicodedata
|
||||
return u''.join(x for x in unicodedata.normalize('NFC', text)
|
||||
if unicodedata.category(x)[0] not in {'C', 'Z', 'M'})
|
||||
|
||||
def is_truetype_font(raw):
|
||||
sfnt_version = raw[:4]
|
||||
return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version)
|
||||
@ -267,16 +272,87 @@ def remove_embed_restriction(raw):
|
||||
verify_checksums(raw)
|
||||
return raw
|
||||
|
||||
def get_bmp_glyph_ids(table, bmp, codes):
|
||||
length, language, segcount = struct.unpack_from(b'>3H', table, bmp+2)
|
||||
array_len = segcount //2
|
||||
offset = bmp + 7*2
|
||||
array_sz = 2*array_len
|
||||
array = b'>%dH'%array_len
|
||||
end_count = struct.unpack_from(array, table, offset)
|
||||
offset += array_sz + 2
|
||||
start_count = struct.unpack_from(array, table, offset)
|
||||
offset += array_sz
|
||||
id_delta = struct.unpack_from(array.replace(b'H', b'h'), table, offset)
|
||||
offset += array_sz
|
||||
range_offset = struct.unpack_from(array, table, offset)
|
||||
if length + bmp < offset + array_sz:
|
||||
raise ValueError('cmap subtable length is too small')
|
||||
glyph_id_len = (length + bmp - (offset + array_sz))//2
|
||||
glyph_id_map = struct.unpack_from(b'>%dH'%glyph_id_len, table, offset +
|
||||
array_sz)
|
||||
|
||||
for code in codes:
|
||||
found = False
|
||||
for i, ec in enumerate(end_count):
|
||||
if ec >= code:
|
||||
sc = start_count[i]
|
||||
if sc <= code:
|
||||
found = True
|
||||
ro = range_offset[i]
|
||||
if ro == 0:
|
||||
glyph_id = id_delta[i] + code
|
||||
else:
|
||||
idx = ro//2 + (code - sc) + i - array_len
|
||||
glyph_id = glyph_id_map[idx]
|
||||
if glyph_id != 0:
|
||||
glyph_id += id_delta[i]
|
||||
yield glyph_id % 0x1000
|
||||
break
|
||||
if not found:
|
||||
yield 0
|
||||
|
||||
def get_glyph_ids(raw, text, raw_is_table=False):
|
||||
if not isinstance(text, unicode):
|
||||
raise TypeError('%r is not a unicode object'%text)
|
||||
if raw_is_table:
|
||||
table = raw
|
||||
else:
|
||||
table = get_table(raw, 'cmap')[0]
|
||||
if table is None:
|
||||
raise UnsupportedFont('Not a supported font, has no cmap table')
|
||||
version, num_tables = struct.unpack_from(b'>HH', table)
|
||||
bmp_table = None
|
||||
for i in xrange(num_tables):
|
||||
platform_id, encoding_id, offset = struct.unpack_from(b'>HHL', table,
|
||||
4 + (i*8))
|
||||
if platform_id == 3 and encoding_id == 1:
|
||||
table_format = struct.unpack_from(b'>H', table, offset)[0]
|
||||
if table_format == 4:
|
||||
bmp_table = offset
|
||||
break
|
||||
if bmp_table is None:
|
||||
raise UnsupportedFont('Not a supported font, has no format 4 cmap table')
|
||||
|
||||
for glyph_id in get_bmp_glyph_ids(table, bmp_table, map(ord, text)):
|
||||
yield glyph_id
|
||||
|
||||
def supports_text(raw, text, has_only_printable_chars=False):
|
||||
if not isinstance(text, unicode):
|
||||
raise TypeError('%r is not a unicode object'%text)
|
||||
if not has_only_printable_chars:
|
||||
text = get_printable_characters(text)
|
||||
try:
|
||||
for glyph_id in get_glyph_ids(raw, text):
|
||||
if glyph_id == 0:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_font_for_text(text, candidate_font_data=None):
|
||||
ok = False
|
||||
if candidate_font_data is not None:
|
||||
from calibre.utils.fonts.free_type import FreeType, FreeTypeError
|
||||
ft = FreeType()
|
||||
try:
|
||||
font = ft.load_font(candidate_font_data)
|
||||
ok = font.supports_text(text)
|
||||
except FreeTypeError:
|
||||
ok = True
|
||||
ok = supports_text(candidate_font_data, text)
|
||||
if not ok:
|
||||
from calibre.utils.fonts.scanner import font_scanner
|
||||
family, faces = font_scanner.find_font_for_text(text)
|
||||
@ -285,7 +361,40 @@ def get_font_for_text(text, candidate_font_data=None):
|
||||
candidate_font_data = f.read()
|
||||
return candidate_font_data
|
||||
|
||||
def test_glyph_ids():
|
||||
from calibre.utils.fonts.free_type import FreeType
|
||||
data = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
|
||||
ft = FreeType()
|
||||
font = ft.load_font(data)
|
||||
text = u'诶йab'
|
||||
ft_glyphs = tuple(font.glyph_ids(text))
|
||||
glyphs = tuple(get_glyph_ids(data, text))
|
||||
if ft_glyphs != glyphs:
|
||||
raise Exception('My code and FreeType differ on the glyph ids')
|
||||
|
||||
def test_supports_text():
|
||||
data = P('fonts/calibreSymbols.otf', data=True)
|
||||
if not supports_text(data, '.\u2605★'):
|
||||
raise RuntimeError('Incorrectly returning that text is not supported')
|
||||
if supports_text(data, 'abc'):
|
||||
raise RuntimeError('Incorrectly claiming that text is supported')
|
||||
|
||||
def test_find_font():
|
||||
from calibre.utils.fonts.scanner import font_scanner
|
||||
abcd = '诶比西迪'
|
||||
family = font_scanner.find_font_for_text(abcd)[0]
|
||||
print ('Family for Chinese text:', family)
|
||||
family = font_scanner.find_font_for_text(abcd)[0]
|
||||
abcd = 'لوحة المفاتيح العربية'
|
||||
print ('Family for Arabic text:', family)
|
||||
|
||||
|
||||
def test():
|
||||
test_glyph_ids()
|
||||
test_supports_text()
|
||||
test_find_font()
|
||||
|
||||
def main():
|
||||
import sys, os
|
||||
for f in sys.argv[1:]:
|
||||
print (os.path.basename(f))
|
||||
@ -299,5 +408,5 @@ def test():
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
main()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user