Do not use fontconfig on windows

This commit is contained in:
Kovid Goyal 2012-10-01 13:34:53 +05:30
parent d24d34bef7
commit 24a9d26176
4 changed files with 376 additions and 185 deletions

View File

@ -6,71 +6,22 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, sys
from calibre.constants import iswindows
from calibre.constants import plugins, iswindows, islinux, isbsd
_fc, _fc_err = plugins['fontconfig']
if _fc is None:
raise RuntimeError('Failed to load fontconfig with error:'+_fc_err)
if islinux or isbsd:
Thread = object
else:
from threading import Thread
class FontConfig(Thread):
class Fonts(object):
def __init__(self):
Thread.__init__(self)
self.daemon = True
self.failed = False
if iswindows:
from calibre.utils.fonts.win_fonts import load_winfonts
self.backend = load_winfonts()
else:
from calibre.utils.fonts.fc import fontconfig
self.backend = fontconfig
def run(self):
config = None
if getattr(sys, 'frameworks_dir', False):
config_dir = os.path.join(os.path.dirname(
getattr(sys, 'frameworks_dir')), 'Resources', 'fonts')
if isinstance(config_dir, unicode):
config_dir = config_dir.encode(sys.getfilesystemencoding())
config = os.path.join(config_dir, 'fonts.conf')
if iswindows and getattr(sys, 'frozen', False):
config_dir = os.path.join(os.path.dirname(sys.executable),
'fontconfig')
if isinstance(config_dir, unicode):
config_dir = config_dir.encode(sys.getfilesystemencoding())
config = os.path.join(config_dir, 'fonts.conf')
try:
_fc.initialize(config)
except:
import traceback
traceback.print_exc()
self.failed = True
def wait(self):
if not (islinux or isbsd):
self.join()
if self.failed:
raise RuntimeError('Failed to initialize fontconfig')
def find_font_families(self, allowed_extensions=['ttf', 'otf']):
'''
Return an alphabetically sorted list of font families available on the system.
`allowed_extensions`: A list of allowed extensions for font file types. Defaults to
`['ttf', 'otf']`. If it is empty, it is ignored.
'''
self.wait()
ans = _fc.find_font_families([bytes('.'+x) for x in allowed_extensions])
ans = sorted(set(ans), cmp=lambda x,y:cmp(x.lower(), y.lower()))
ans2 = []
for x in ans:
try:
ans2.append(x.decode('utf-8'))
except UnicodeDecodeError:
continue
return ans2
def find_font_families(self, allowed_extensions={'ttf', 'otf'}):
if iswindows:
return self.backend.font_families()
return self.backend.find_font_families(allowed_extensions=allowed_extensions)
def files_for_family(self, family, normalize=True):
'''
@ -80,89 +31,42 @@ class FontConfig(Thread):
they are a tuple (slant, weight) otherwise they are strings from the set
`('normal', 'bold', 'italic', 'bi', 'light', 'li')`
'''
self.wait()
if isinstance(family, unicode):
family = family.encode('utf-8')
fonts = {}
ofamily = str(family).decode('utf-8')
for fullname, path, style, nfamily, weight, slant in \
_fc.files_for_family(str(family)):
style = (slant, weight)
if normalize:
italic = slant > 0
normal = weight == 80
bold = weight > 80
if italic:
style = 'italic' if normal else 'bi' if bold else 'li'
else:
style = 'normal' if normal else 'bold' if bold else 'light'
try:
fullname, path = fullname.decode('utf-8'), path.decode('utf-8')
nfamily = nfamily.decode('utf-8')
except UnicodeDecodeError:
continue
if style in fonts:
if nfamily.lower().strip() == ofamily.lower().strip() \
and 'Condensed' not in fullname and 'ExtraLight' not in fullname:
fonts[style] = (path, fullname)
else:
fonts[style] = (path, fullname)
if iswindows:
from calibre.ptempfile import PersistentTemporaryFile
fonts = self.backend.fonts_for_family(family, normalize=normalize)
ans = {}
for ft, val in fonts.iteritems():
ext, name, data = val
pt = PersistentTemporaryFile('.'+ext)
pt.write(data)
pt.close()
ans[ft] = (name, pt.name)
return ans
return self.backend.files_for_family(family, normalize=normalize)
return fonts
def match(self, name, all=False, verbose=False):
def fonts_for_family(self, family, normalize=True):
'''
Find the system font that most closely matches `name`, where `name` is a specification
of the form::
familyname-<pointsize>:<property1=value1>:<property2=value2>...
Just like files for family, except that it returns 3-tuples of the form
(extension, full name, font data).
'''
if iswindows:
return self.backend.fonts_for_family(family, normalize=normalize)
files = self.backend.files_for_family(family, normalize=normalize)
ans = {}
for ft, val in files.iteritems():
name, f = val
ext = f.rpartition('.')[-1].lower()
ans[ft] = (ext, name, open(f, 'rb').read())
return ans
For example, `verdana:weight=bold:slant=italic`
Returns a list of dictionaries, or a single dictionary.
Each dictionary has the keys:
'weight', 'slant', 'family', 'file', 'fullname', 'style'
`all`: If `True` return a sorted list of matching fonts, where the sort
is in order of decreasing closeness of matching. If `False` only the
best match is returned. '''
self.wait()
if isinstance(name, unicode):
name = name.encode('utf-8')
fonts = []
for fullname, path, style, family, weight, slant in \
_fc.match(str(name), bool(all), bool(verbose)):
try:
fullname = fullname.decode('utf-8')
path = path.decode('utf-8')
style = style.decode('utf-8')
family = family.decode('utf-8')
fonts.append({
'fullname' : fullname,
'path' : path,
'style' : style,
'family' : family,
'weight' : weight,
'slant' : slant
})
except UnicodeDecodeError:
continue
return fonts if all else (fonts[0] if fonts else None)
fontconfig = FontConfig()
if islinux or isbsd:
# On X11 Qt also uses fontconfig, so initialization must happen in the
# main thread. In any case on X11 initializing fontconfig should be very
# fast
fontconfig.run()
else:
fontconfig.start()
fontconfig = Fonts()
def test():
from pprint import pprint;
pprint(fontconfig.find_font_families())
pprint(fontconfig.files_for_family('liberation serif'))
import os
print(fontconfig.find_font_families())
m = 'times new roman' if iswindows else 'liberation serif'
pprint(fontconfig.match(m+':slant=italic:weight=bold', verbose=True))
for ft, val in fontconfig.files_for_family(m).iteritems():
print val[0], ft, val[1], os.path.getsize(val[1])
if __name__ == '__main__':
test()

View File

@ -0,0 +1,168 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, sys
from calibre.constants import plugins, iswindows, islinux, isbsd
_fc, _fc_err = plugins['fontconfig']
if _fc is None:
raise RuntimeError('Failed to load fontconfig with error:'+_fc_err)
if islinux or isbsd:
Thread = object
else:
from threading import Thread
class FontConfig(Thread):
def __init__(self):
Thread.__init__(self)
self.daemon = True
self.failed = False
def run(self):
config = None
if getattr(sys, 'frameworks_dir', False):
config_dir = os.path.join(os.path.dirname(
getattr(sys, 'frameworks_dir')), 'Resources', 'fonts')
if isinstance(config_dir, unicode):
config_dir = config_dir.encode(sys.getfilesystemencoding())
config = os.path.join(config_dir, 'fonts.conf')
if iswindows and getattr(sys, 'frozen', False):
config_dir = os.path.join(os.path.dirname(sys.executable),
'fontconfig')
if isinstance(config_dir, unicode):
config_dir = config_dir.encode(sys.getfilesystemencoding())
config = os.path.join(config_dir, 'fonts.conf')
try:
_fc.initialize(config)
except:
import traceback
traceback.print_exc()
self.failed = True
def wait(self):
if not (islinux or isbsd):
self.join()
if self.failed:
raise RuntimeError('Failed to initialize fontconfig')
def find_font_families(self, allowed_extensions={'ttf', 'otf'}):
'''
Return an alphabetically sorted list of font families available on the system.
`allowed_extensions`: A list of allowed extensions for font file types. Defaults to
`['ttf', 'otf']`. If it is empty, it is ignored.
'''
self.wait()
ans = _fc.find_font_families([bytes('.'+x) for x in allowed_extensions])
ans = sorted(set(ans), cmp=lambda x,y:cmp(x.lower(), y.lower()))
ans2 = []
for x in ans:
try:
ans2.append(x.decode('utf-8'))
except UnicodeDecodeError:
continue
return ans2
def files_for_family(self, family, normalize=True):
'''
Find all the variants in the font family `family`.
Returns a dictionary of tuples. Each tuple is of the form (Full font name, path to font file).
The keys of the dictionary depend on `normalize`. If `normalize` is `False`,
they are a tuple (slant, weight) otherwise they are strings from the set
`('normal', 'bold', 'italic', 'bi', 'light', 'li')`
'''
self.wait()
if isinstance(family, unicode):
family = family.encode('utf-8')
fonts = {}
ofamily = str(family).decode('utf-8')
for fullname, path, style, nfamily, weight, slant in \
_fc.files_for_family(str(family)):
style = (slant, weight)
if normalize:
italic = slant > 0
normal = weight == 80
bold = weight > 80
if italic:
style = 'italic' if normal else 'bi' if bold else 'li'
else:
style = 'normal' if normal else 'bold' if bold else 'light'
try:
fullname, path = fullname.decode('utf-8'), path.decode('utf-8')
nfamily = nfamily.decode('utf-8')
except UnicodeDecodeError:
continue
if style in fonts:
if nfamily.lower().strip() == ofamily.lower().strip() \
and 'Condensed' not in fullname and 'ExtraLight' not in fullname:
fonts[style] = (path, fullname)
else:
fonts[style] = (path, fullname)
return fonts
def match(self, name, all=False, verbose=False):
'''
Find the system font that most closely matches `name`, where `name` is a specification
of the form::
familyname-<pointsize>:<property1=value1>:<property2=value2>...
For example, `verdana:weight=bold:slant=italic`
Returns a list of dictionaries, or a single dictionary.
Each dictionary has the keys:
'weight', 'slant', 'family', 'file', 'fullname', 'style'
`all`: If `True` return a sorted list of matching fonts, where the sort
is in order of decreasing closeness of matching. If `False` only the
best match is returned. '''
self.wait()
if isinstance(name, unicode):
name = name.encode('utf-8')
fonts = []
for fullname, path, style, family, weight, slant in \
_fc.match(str(name), bool(all), bool(verbose)):
try:
fullname = fullname.decode('utf-8')
path = path.decode('utf-8')
style = style.decode('utf-8')
family = family.decode('utf-8')
fonts.append({
'fullname' : fullname,
'path' : path,
'style' : style,
'family' : family,
'weight' : weight,
'slant' : slant
})
except UnicodeDecodeError:
continue
return fonts if all else (fonts[0] if fonts else None)
fontconfig = FontConfig()
if islinux or isbsd:
# On X11 Qt also uses fontconfig, so initialization must happen in the
# main thread. In any case on X11 initializing fontconfig should be very
# fast
fontconfig.run()
else:
fontconfig.start()
def test():
from pprint import pprint;
pprint(fontconfig.find_font_families())
pprint(fontconfig.files_for_family('liberation serif'))
m = 'times new roman' if iswindows else 'liberation serif'
pprint(fontconfig.match(m+':slant=italic:weight=bold', verbose=True))
if __name__ == '__main__':
test()

View File

@ -7,7 +7,9 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, struct
import struct
from io import BytesIO
from collections import defaultdict
class UnsupportedFont(ValueError):
pass
@ -16,75 +18,170 @@ def is_truetype_font(raw):
sfnt_version = raw[:4]
return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version)
def get_font_characteristics(raw):
def get_table(raw, name):
''' Get the raw table bytes for the specified table in the font '''
num_tables = struct.unpack_from(b'>H', raw, 4)[0]
# Find OS/2 table
offset = 4 + 4*2 # Start of the Table record entries
os2_table_offset = None
offset = 4*3 # start of the table record entries
table_offset = table_checksum = table_length = table_index = table = None
name = bytes(name.lower())
for i in xrange(num_tables):
table_tag = raw[offset:offset+4]
if table_tag == b'OS/2':
os2_table_offset = struct.unpack_from(b'>I', raw, offset+8)[0]
if table_tag.lower() == name:
table_checksum, table_offset, table_length = struct.unpack_from(
b'>3L', raw, offset+4)
table_index = offset
break
offset += 16 # Size of a table record
if os2_table_offset is None:
offset += 4*4
if table_offset is not None:
table = raw[table_offset:table_offset+table_length]
return table, table_index, table_offset, table_checksum
def get_font_characteristics(raw):
'''
Return (weight, is_italic, is_bold, is_regular, fs_type). These values are taken
from the OS/2 table of the font. See
http://www.microsoft.com/typography/otspec/os2.htm for details
'''
os2_table = get_table(raw, 'os/2')[0]
if os2_table is None:
raise UnsupportedFont('Not a supported font, has no OS/2 table')
common_fields = b'>HhHHHhhhhhhhhhhh'
common_fields = b'>Hh3H11h'
(version, char_width, weight, width, fs_type, subscript_x_size,
subscript_y_size, subscript_x_offset, subscript_y_offset,
superscript_x_size, superscript_y_size, superscript_x_offset,
superscript_y_offset, strikeout_size, strikeout_position,
family_class) = struct.unpack_from(common_fields,
raw, os2_table_offset)
offset = os2_table_offset + struct.calcsize(common_fields)
panose = struct.unpack_from(b'>'+b'B'*10, raw, offset)
family_class) = struct.unpack_from(common_fields, os2_table)
offset = struct.calcsize(common_fields)
panose = struct.unpack_from(b'>10B', os2_table, offset)
panose
offset += 10
(range1,) = struct.unpack_from(b'>L', raw, offset)
(range1,) = struct.unpack_from(b'>L', os2_table, offset)
offset += struct.calcsize(b'>L')
if version > 0:
range2, range3, range4 = struct.unpack_from(b'>LLL', raw, offset)
offset += struct.calcsize(b'>LLL')
vendor_id = raw[offset:offset+4]
range2, range3, range4 = struct.unpack_from(b'>3L', os2_table, offset)
offset += struct.calcsize(b'>3L')
vendor_id = os2_table[offset:offset+4]
vendor_id
offset += 4
selection, = struct.unpack_from(b'>H', raw, offset)
selection, = struct.unpack_from(b'>H', os2_table, offset)
is_italic = (selection & 0b1) != 0
is_bold = (selection & 0b100000) != 0
is_regular = (selection & 0b1000000) != 0
return weight, is_italic, is_bold, is_regular
return weight, is_italic, is_bold, is_regular, fs_type
def decode_name_record(recs):
'''
Get the English names of this font. See
http://www.microsoft.com/typography/otspec/name.htm for details.
'''
if not recs: return None
unicode_names = {}
windows_names = {}
mac_names = {}
for platform_id, encoding_id, language_id, src in recs:
if language_id > 0x8000: continue
if platform_id == 0:
if encoding_id < 4:
try:
unicode_names[language_id] = src.decode('utf-16-be')
except ValueError:
continue
elif platform_id == 1:
try:
mac_names[language_id] = src.decode('utf-8')
except ValueError:
continue
elif platform_id == 2:
codec = {0:'ascii', 1:'utf-16-be', 2:'iso-8859-1'}.get(encoding_id,
None)
if codec is None: continue
try:
unicode_names[language_id] = src.decode(codec)
except ValueError:
continue
elif platform_id == 3:
codec = {1:16, 10:32}.get(encoding_id, None)
if codec is None: continue
try:
windows_names[language_id] = src.decode('utf-%d-be'%codec)
except ValueError:
continue
# First try the windows names
# First look for the US English name
if 1033 in windows_names:
return windows_names[1033]
# Look for some other english name variant
for lang in (3081, 10249, 4105, 9225, 16393, 6153, 8201, 17417, 5129,
13321, 18441, 7177, 11273, 2057, 12297):
if lang in windows_names:
return windows_names[lang]
# Look for Mac name
if 0 in mac_names:
return mac_names[0]
# Use unicode names
for val in unicode_names.itervalues():
return val
return None
def get_font_names(raw):
table = get_table(raw, 'name')[0]
if table is None:
raise UnsupportedFont('Not a supported font, has no name table')
table_type, count, string_offset = struct.unpack_from(b'>3H', table)
records = defaultdict(list)
for i in xrange(count):
try:
platform_id, encoding_id, language_id, name_id, length, offset = \
struct.unpack_from(b'>6H', table, 6+i*12)
except struct.error:
break
offset += string_offset
src = table[offset:offset+length]
records[name_id].append((platform_id, encoding_id, language_id,
src))
family_name = decode_name_record(records[1])
subfamily_name = decode_name_record(records[2])
full_name = decode_name_record(records[4])
return family_name, subfamily_name, full_name
def remove_embed_restriction(raw):
sfnt_version = raw[:4]
if sfnt_version not in {b'\x00\x01\x00\x00', b'OTTO'}:
raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sfnt_version)
ok, sig = is_truetype_font(raw)
if not ok:
raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
num_tables = struct.unpack_from(b'>H', raw, 4)[0]
# Find OS/2 table
offset = 4 + 4*2 # Start of the Table record entries
os2_table_offset = None
for i in xrange(num_tables):
table_tag = raw[offset:offset+4]
if table_tag == b'OS/2':
os2_table_offset = struct.unpack_from(b'>I', raw, offset+8)[0]
break
offset += 16 # Size of a table record
if os2_table_offset is None:
table, table_index, table_offset = get_table(raw, 'os/2')
if table is None:
raise UnsupportedFont('Not a supported font, has no OS/2 table')
version, = struct.unpack_from(b'>H', raw, os2_table_offset)
fs_type_offset = os2_table_offset + struct.calcsize(b'>HhHH')
fs_type = struct.unpack_from(b'>H', raw, fs_type_offset)[0]
fs_type_offset = struct.calcsize(b'>HhHH')
fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
if fs_type == 0:
return raw
return raw[:fs_type_offset] + struct.pack(b'>H', 0) + raw[fs_type_offset+2:]
f = BytesIO(raw)
f.seek(fs_type_offset + table_offset)
f.write(struct.pack(b'>H', 0))
return f.getvalue()
def test():
import sys, os
for f in sys.argv[1:]:
print (os.path.basename(f))
raw = open(f, 'rb').read()
print (get_font_names(raw))
print (get_font_characteristics(raw))
if __name__ == '__main__':
raw = remove_embed_restriction(open(sys.argv[-1], 'rb').read())
test()

View File

@ -12,7 +12,7 @@ from itertools import product
from calibre import prints
from calibre.constants import plugins
from calibre.utils.fonts.utils import (is_truetype_font,
from calibre.utils.fonts.utils import (is_truetype_font, get_font_names,
get_font_characteristics)
class WinFonts(object):
@ -57,13 +57,18 @@ class WinFonts(object):
ext = 'otf' if sig == b'OTTO' else 'ttf'
try:
weight, is_italic, is_bold, is_regular = get_font_characteristics(data)
weight, is_italic, is_bold, is_regular = get_font_characteristics(data)[:4]
except Exception as e:
prints('Failed to get font characteristic for font: %s [%s]'
' with error: %s'%(family,
self.get_normalized_name(is_italic, weight), e))
continue
try:
family_name, sub_family_name, full_name = get_font_names(data)
except:
pass
if normalize:
ft = {(True, True):'bi', (True, False):'italic', (False,
True):'bold', (False, False):'normal'}[(is_italic,
@ -71,7 +76,24 @@ class WinFonts(object):
else:
ft = (1 if is_italic else 0, weight//10)
ans[ft] = (ext, data)
if not (family_name or full_name):
# prints('Font %s [%s] has no names'%(family,
# self.get_normalized_name(is_italic, weight)))
family_name = family
name = full_name or family + ' ' + (sub_family_name or '')
try:
name.encode('ascii')
except ValueError:
try:
sub_family_name.encode('ascii')
subf = sub_family_name
except:
subf = ''
name = family + ((' ' + subf) if subf else '')
ans[ft] = (ext, name, data)
return ans
@ -105,8 +127,8 @@ if __name__ == '__main__':
print (families)
for family in families:
print (family + ':')
prints(family + ':')
for font, data in w.fonts_for_family(family).iteritems():
print (' ', font, data[0], len(data[1]))
prints(' ', font, data[0], data[1], len(data[2]))
print ()