From 24a9d26176a26b72fdbf3ea827bc80df6b92fe2b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Oct 2012 13:34:53 +0530 Subject: [PATCH] Do not use fontconfig on windows --- src/calibre/utils/fonts/__init__.py | 180 +++++++------------------- src/calibre/utils/fonts/fc.py | 168 +++++++++++++++++++++++++ src/calibre/utils/fonts/utils.py | 181 ++++++++++++++++++++------- src/calibre/utils/fonts/win_fonts.py | 32 ++++- 4 files changed, 376 insertions(+), 185 deletions(-) create mode 100644 src/calibre/utils/fonts/fc.py diff --git a/src/calibre/utils/fonts/__init__.py b/src/calibre/utils/fonts/__init__.py index 7b4f0abea4..c847718153 100644 --- a/src/calibre/utils/fonts/__init__.py +++ b/src/calibre/utils/fonts/__init__.py @@ -6,71 +6,22 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, sys +from calibre.constants import iswindows -from calibre.constants import plugins, iswindows, islinux, isbsd - -_fc, _fc_err = plugins['fontconfig'] - -if _fc is None: - raise RuntimeError('Failed to load fontconfig with error:'+_fc_err) - -if islinux or isbsd: - Thread = object -else: - from threading import Thread - -class FontConfig(Thread): +class Fonts(object): def __init__(self): - Thread.__init__(self) - self.daemon = True - self.failed = False + if iswindows: + from calibre.utils.fonts.win_fonts import load_winfonts + self.backend = load_winfonts() + else: + from calibre.utils.fonts.fc import fontconfig + self.backend = fontconfig - def run(self): - config = None - if getattr(sys, 'frameworks_dir', False): - config_dir = os.path.join(os.path.dirname( - getattr(sys, 'frameworks_dir')), 'Resources', 'fonts') - if isinstance(config_dir, unicode): - config_dir = config_dir.encode(sys.getfilesystemencoding()) - config = os.path.join(config_dir, 'fonts.conf') - if iswindows and getattr(sys, 'frozen', False): - config_dir = os.path.join(os.path.dirname(sys.executable), - 'fontconfig') - if isinstance(config_dir, unicode): - config_dir = config_dir.encode(sys.getfilesystemencoding()) - config = os.path.join(config_dir, 'fonts.conf') - try: - _fc.initialize(config) - except: - import traceback - traceback.print_exc() - self.failed = True - - def wait(self): - if not (islinux or isbsd): - self.join() - if self.failed: - raise RuntimeError('Failed to initialize fontconfig') - - def find_font_families(self, allowed_extensions=['ttf', 'otf']): - ''' - Return an alphabetically sorted list of font families available on the system. - - `allowed_extensions`: A list of allowed extensions for font file types. Defaults to - `['ttf', 'otf']`. If it is empty, it is ignored. - ''' - self.wait() - ans = _fc.find_font_families([bytes('.'+x) for x in allowed_extensions]) - ans = sorted(set(ans), cmp=lambda x,y:cmp(x.lower(), y.lower())) - ans2 = [] - for x in ans: - try: - ans2.append(x.decode('utf-8')) - except UnicodeDecodeError: - continue - return ans2 + def find_font_families(self, allowed_extensions={'ttf', 'otf'}): + if iswindows: + return self.backend.font_families() + return self.backend.find_font_families(allowed_extensions=allowed_extensions) def files_for_family(self, family, normalize=True): ''' @@ -80,89 +31,42 @@ class FontConfig(Thread): they are a tuple (slant, weight) otherwise they are strings from the set `('normal', 'bold', 'italic', 'bi', 'light', 'li')` ''' - self.wait() - if isinstance(family, unicode): - family = family.encode('utf-8') - fonts = {} - ofamily = str(family).decode('utf-8') - for fullname, path, style, nfamily, weight, slant in \ - _fc.files_for_family(str(family)): - style = (slant, weight) - if normalize: - italic = slant > 0 - normal = weight == 80 - bold = weight > 80 - if italic: - style = 'italic' if normal else 'bi' if bold else 'li' - else: - style = 'normal' if normal else 'bold' if bold else 'light' - try: - fullname, path = fullname.decode('utf-8'), path.decode('utf-8') - nfamily = nfamily.decode('utf-8') - except UnicodeDecodeError: - continue - if style in fonts: - if nfamily.lower().strip() == ofamily.lower().strip() \ - and 'Condensed' not in fullname and 'ExtraLight' not in fullname: - fonts[style] = (path, fullname) - else: - fonts[style] = (path, fullname) + if iswindows: + from calibre.ptempfile import PersistentTemporaryFile + fonts = self.backend.fonts_for_family(family, normalize=normalize) + ans = {} + for ft, val in fonts.iteritems(): + ext, name, data = val + pt = PersistentTemporaryFile('.'+ext) + pt.write(data) + pt.close() + ans[ft] = (name, pt.name) + return ans + return self.backend.files_for_family(family, normalize=normalize) - return fonts - - def match(self, name, all=False, verbose=False): + def fonts_for_family(self, family, normalize=True): ''' - Find the system font that most closely matches `name`, where `name` is a specification - of the form:: - familyname-::... + Just like files for family, except that it returns 3-tuples of the form + (extension, full name, font data). + ''' + if iswindows: + return self.backend.fonts_for_family(family, normalize=normalize) + files = self.backend.files_for_family(family, normalize=normalize) + ans = {} + for ft, val in files.iteritems(): + name, f = val + ext = f.rpartition('.')[-1].lower() + ans[ft] = (ext, name, open(f, 'rb').read()) + return ans - For example, `verdana:weight=bold:slant=italic` - - Returns a list of dictionaries, or a single dictionary. - Each dictionary has the keys: - 'weight', 'slant', 'family', 'file', 'fullname', 'style' - - `all`: If `True` return a sorted list of matching fonts, where the sort - is in order of decreasing closeness of matching. If `False` only the - best match is returned. ''' - self.wait() - if isinstance(name, unicode): - name = name.encode('utf-8') - fonts = [] - for fullname, path, style, family, weight, slant in \ - _fc.match(str(name), bool(all), bool(verbose)): - try: - fullname = fullname.decode('utf-8') - path = path.decode('utf-8') - style = style.decode('utf-8') - family = family.decode('utf-8') - fonts.append({ - 'fullname' : fullname, - 'path' : path, - 'style' : style, - 'family' : family, - 'weight' : weight, - 'slant' : slant - }) - except UnicodeDecodeError: - continue - return fonts if all else (fonts[0] if fonts else None) - -fontconfig = FontConfig() -if islinux or isbsd: - # On X11 Qt also uses fontconfig, so initialization must happen in the - # main thread. In any case on X11 initializing fontconfig should be very - # fast - fontconfig.run() -else: - fontconfig.start() +fontconfig = Fonts() def test(): - from pprint import pprint; - pprint(fontconfig.find_font_families()) - pprint(fontconfig.files_for_family('liberation serif')) + import os + print(fontconfig.find_font_families()) m = 'times new roman' if iswindows else 'liberation serif' - pprint(fontconfig.match(m+':slant=italic:weight=bold', verbose=True)) + for ft, val in fontconfig.files_for_family(m).iteritems(): + print val[0], ft, val[1], os.path.getsize(val[1]) if __name__ == '__main__': test() diff --git a/src/calibre/utils/fonts/fc.py b/src/calibre/utils/fonts/fc.py new file mode 100644 index 0000000000..a79b0e1963 --- /dev/null +++ b/src/calibre/utils/fonts/fc.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os, sys + +from calibre.constants import plugins, iswindows, islinux, isbsd + +_fc, _fc_err = plugins['fontconfig'] + +if _fc is None: + raise RuntimeError('Failed to load fontconfig with error:'+_fc_err) + +if islinux or isbsd: + Thread = object +else: + from threading import Thread + +class FontConfig(Thread): + + def __init__(self): + Thread.__init__(self) + self.daemon = True + self.failed = False + + def run(self): + config = None + if getattr(sys, 'frameworks_dir', False): + config_dir = os.path.join(os.path.dirname( + getattr(sys, 'frameworks_dir')), 'Resources', 'fonts') + if isinstance(config_dir, unicode): + config_dir = config_dir.encode(sys.getfilesystemencoding()) + config = os.path.join(config_dir, 'fonts.conf') + if iswindows and getattr(sys, 'frozen', False): + config_dir = os.path.join(os.path.dirname(sys.executable), + 'fontconfig') + if isinstance(config_dir, unicode): + config_dir = config_dir.encode(sys.getfilesystemencoding()) + config = os.path.join(config_dir, 'fonts.conf') + try: + _fc.initialize(config) + except: + import traceback + traceback.print_exc() + self.failed = True + + def wait(self): + if not (islinux or isbsd): + self.join() + if self.failed: + raise RuntimeError('Failed to initialize fontconfig') + + def find_font_families(self, allowed_extensions={'ttf', 'otf'}): + ''' + Return an alphabetically sorted list of font families available on the system. + + `allowed_extensions`: A list of allowed extensions for font file types. Defaults to + `['ttf', 'otf']`. If it is empty, it is ignored. + ''' + self.wait() + ans = _fc.find_font_families([bytes('.'+x) for x in allowed_extensions]) + ans = sorted(set(ans), cmp=lambda x,y:cmp(x.lower(), y.lower())) + ans2 = [] + for x in ans: + try: + ans2.append(x.decode('utf-8')) + except UnicodeDecodeError: + continue + return ans2 + + def files_for_family(self, family, normalize=True): + ''' + Find all the variants in the font family `family`. + Returns a dictionary of tuples. Each tuple is of the form (Full font name, path to font file). + The keys of the dictionary depend on `normalize`. If `normalize` is `False`, + they are a tuple (slant, weight) otherwise they are strings from the set + `('normal', 'bold', 'italic', 'bi', 'light', 'li')` + ''' + self.wait() + if isinstance(family, unicode): + family = family.encode('utf-8') + fonts = {} + ofamily = str(family).decode('utf-8') + for fullname, path, style, nfamily, weight, slant in \ + _fc.files_for_family(str(family)): + style = (slant, weight) + if normalize: + italic = slant > 0 + normal = weight == 80 + bold = weight > 80 + if italic: + style = 'italic' if normal else 'bi' if bold else 'li' + else: + style = 'normal' if normal else 'bold' if bold else 'light' + try: + fullname, path = fullname.decode('utf-8'), path.decode('utf-8') + nfamily = nfamily.decode('utf-8') + except UnicodeDecodeError: + continue + if style in fonts: + if nfamily.lower().strip() == ofamily.lower().strip() \ + and 'Condensed' not in fullname and 'ExtraLight' not in fullname: + fonts[style] = (path, fullname) + else: + fonts[style] = (path, fullname) + + return fonts + + def match(self, name, all=False, verbose=False): + ''' + Find the system font that most closely matches `name`, where `name` is a specification + of the form:: + familyname-::... + + For example, `verdana:weight=bold:slant=italic` + + Returns a list of dictionaries, or a single dictionary. + Each dictionary has the keys: + 'weight', 'slant', 'family', 'file', 'fullname', 'style' + + `all`: If `True` return a sorted list of matching fonts, where the sort + is in order of decreasing closeness of matching. If `False` only the + best match is returned. ''' + self.wait() + if isinstance(name, unicode): + name = name.encode('utf-8') + fonts = [] + for fullname, path, style, family, weight, slant in \ + _fc.match(str(name), bool(all), bool(verbose)): + try: + fullname = fullname.decode('utf-8') + path = path.decode('utf-8') + style = style.decode('utf-8') + family = family.decode('utf-8') + fonts.append({ + 'fullname' : fullname, + 'path' : path, + 'style' : style, + 'family' : family, + 'weight' : weight, + 'slant' : slant + }) + except UnicodeDecodeError: + continue + return fonts if all else (fonts[0] if fonts else None) + +fontconfig = FontConfig() +if islinux or isbsd: + # On X11 Qt also uses fontconfig, so initialization must happen in the + # main thread. In any case on X11 initializing fontconfig should be very + # fast + fontconfig.run() +else: + fontconfig.start() + +def test(): + from pprint import pprint; + pprint(fontconfig.find_font_families()) + pprint(fontconfig.files_for_family('liberation serif')) + m = 'times new roman' if iswindows else 'liberation serif' + pprint(fontconfig.match(m+':slant=italic:weight=bold', verbose=True)) + +if __name__ == '__main__': + test() diff --git a/src/calibre/utils/fonts/utils.py b/src/calibre/utils/fonts/utils.py index 085373318b..6822cbe4dd 100644 --- a/src/calibre/utils/fonts/utils.py +++ b/src/calibre/utils/fonts/utils.py @@ -7,7 +7,9 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import sys, struct +import struct +from io import BytesIO +from collections import defaultdict class UnsupportedFont(ValueError): pass @@ -16,75 +18,170 @@ def is_truetype_font(raw): sfnt_version = raw[:4] return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version) -def get_font_characteristics(raw): +def get_table(raw, name): + ''' Get the raw table bytes for the specified table in the font ''' num_tables = struct.unpack_from(b'>H', raw, 4)[0] - - # Find OS/2 table - offset = 4 + 4*2 # Start of the Table record entries - os2_table_offset = None + offset = 4*3 # start of the table record entries + table_offset = table_checksum = table_length = table_index = table = None + name = bytes(name.lower()) for i in xrange(num_tables): table_tag = raw[offset:offset+4] - if table_tag == b'OS/2': - os2_table_offset = struct.unpack_from(b'>I', raw, offset+8)[0] + if table_tag.lower() == name: + table_checksum, table_offset, table_length = struct.unpack_from( + b'>3L', raw, offset+4) + table_index = offset break - offset += 16 # Size of a table record - if os2_table_offset is None: + offset += 4*4 + if table_offset is not None: + table = raw[table_offset:table_offset+table_length] + return table, table_index, table_offset, table_checksum + +def get_font_characteristics(raw): + ''' + Return (weight, is_italic, is_bold, is_regular, fs_type). These values are taken + from the OS/2 table of the font. See + http://www.microsoft.com/typography/otspec/os2.htm for details + ''' + os2_table = get_table(raw, 'os/2')[0] + if os2_table is None: raise UnsupportedFont('Not a supported font, has no OS/2 table') - common_fields = b'>HhHHHhhhhhhhhhhh' + common_fields = b'>Hh3H11h' (version, char_width, weight, width, fs_type, subscript_x_size, subscript_y_size, subscript_x_offset, subscript_y_offset, superscript_x_size, superscript_y_size, superscript_x_offset, superscript_y_offset, strikeout_size, strikeout_position, - family_class) = struct.unpack_from(common_fields, - raw, os2_table_offset) - offset = os2_table_offset + struct.calcsize(common_fields) - panose = struct.unpack_from(b'>'+b'B'*10, raw, offset) + family_class) = struct.unpack_from(common_fields, os2_table) + offset = struct.calcsize(common_fields) + panose = struct.unpack_from(b'>10B', os2_table, offset) panose offset += 10 - (range1,) = struct.unpack_from(b'>L', raw, offset) + (range1,) = struct.unpack_from(b'>L', os2_table, offset) offset += struct.calcsize(b'>L') if version > 0: - range2, range3, range4 = struct.unpack_from(b'>LLL', raw, offset) - offset += struct.calcsize(b'>LLL') - vendor_id = raw[offset:offset+4] + range2, range3, range4 = struct.unpack_from(b'>3L', os2_table, offset) + offset += struct.calcsize(b'>3L') + vendor_id = os2_table[offset:offset+4] vendor_id offset += 4 - selection, = struct.unpack_from(b'>H', raw, offset) + selection, = struct.unpack_from(b'>H', os2_table, offset) is_italic = (selection & 0b1) != 0 is_bold = (selection & 0b100000) != 0 is_regular = (selection & 0b1000000) != 0 - return weight, is_italic, is_bold, is_regular + return weight, is_italic, is_bold, is_regular, fs_type + +def decode_name_record(recs): + ''' + Get the English names of this font. See + http://www.microsoft.com/typography/otspec/name.htm for details. + ''' + if not recs: return None + unicode_names = {} + windows_names = {} + mac_names = {} + for platform_id, encoding_id, language_id, src in recs: + if language_id > 0x8000: continue + if platform_id == 0: + if encoding_id < 4: + try: + unicode_names[language_id] = src.decode('utf-16-be') + except ValueError: + continue + elif platform_id == 1: + try: + mac_names[language_id] = src.decode('utf-8') + except ValueError: + continue + elif platform_id == 2: + codec = {0:'ascii', 1:'utf-16-be', 2:'iso-8859-1'}.get(encoding_id, + None) + if codec is None: continue + try: + unicode_names[language_id] = src.decode(codec) + except ValueError: + continue + elif platform_id == 3: + codec = {1:16, 10:32}.get(encoding_id, None) + if codec is None: continue + try: + windows_names[language_id] = src.decode('utf-%d-be'%codec) + except ValueError: + continue + + # First try the windows names + # First look for the US English name + if 1033 in windows_names: + return windows_names[1033] + # Look for some other english name variant + for lang in (3081, 10249, 4105, 9225, 16393, 6153, 8201, 17417, 5129, + 13321, 18441, 7177, 11273, 2057, 12297): + if lang in windows_names: + return windows_names[lang] + + # Look for Mac name + if 0 in mac_names: + return mac_names[0] + + # Use unicode names + for val in unicode_names.itervalues(): + return val + + return None + +def get_font_names(raw): + table = get_table(raw, 'name')[0] + if table is None: + raise UnsupportedFont('Not a supported font, has no name table') + table_type, count, string_offset = struct.unpack_from(b'>3H', table) + + records = defaultdict(list) + + for i in xrange(count): + try: + platform_id, encoding_id, language_id, name_id, length, offset = \ + struct.unpack_from(b'>6H', table, 6+i*12) + except struct.error: + break + offset += string_offset + src = table[offset:offset+length] + records[name_id].append((platform_id, encoding_id, language_id, + src)) + + family_name = decode_name_record(records[1]) + subfamily_name = decode_name_record(records[2]) + full_name = decode_name_record(records[4]) + + return family_name, subfamily_name, full_name + def remove_embed_restriction(raw): - sfnt_version = raw[:4] - if sfnt_version not in {b'\x00\x01\x00\x00', b'OTTO'}: - raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sfnt_version) + ok, sig = is_truetype_font(raw) + if not ok: + raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig) - num_tables = struct.unpack_from(b'>H', raw, 4)[0] - - # Find OS/2 table - offset = 4 + 4*2 # Start of the Table record entries - os2_table_offset = None - for i in xrange(num_tables): - table_tag = raw[offset:offset+4] - if table_tag == b'OS/2': - os2_table_offset = struct.unpack_from(b'>I', raw, offset+8)[0] - break - offset += 16 # Size of a table record - if os2_table_offset is None: + table, table_index, table_offset = get_table(raw, 'os/2') + if table is None: raise UnsupportedFont('Not a supported font, has no OS/2 table') - version, = struct.unpack_from(b'>H', raw, os2_table_offset) - - fs_type_offset = os2_table_offset + struct.calcsize(b'>HhHH') - fs_type = struct.unpack_from(b'>H', raw, fs_type_offset)[0] + fs_type_offset = struct.calcsize(b'>HhHH') + fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0] if fs_type == 0: return raw - return raw[:fs_type_offset] + struct.pack(b'>H', 0) + raw[fs_type_offset+2:] + f = BytesIO(raw) + f.seek(fs_type_offset + table_offset) + f.write(struct.pack(b'>H', 0)) + return f.getvalue() + +def test(): + import sys, os + for f in sys.argv[1:]: + print (os.path.basename(f)) + raw = open(f, 'rb').read() + print (get_font_names(raw)) + print (get_font_characteristics(raw)) if __name__ == '__main__': - raw = remove_embed_restriction(open(sys.argv[-1], 'rb').read()) + test() diff --git a/src/calibre/utils/fonts/win_fonts.py b/src/calibre/utils/fonts/win_fonts.py index 41e0081627..bcfa40758b 100644 --- a/src/calibre/utils/fonts/win_fonts.py +++ b/src/calibre/utils/fonts/win_fonts.py @@ -12,7 +12,7 @@ from itertools import product from calibre import prints from calibre.constants import plugins -from calibre.utils.fonts.utils import (is_truetype_font, +from calibre.utils.fonts.utils import (is_truetype_font, get_font_names, get_font_characteristics) class WinFonts(object): @@ -57,13 +57,18 @@ class WinFonts(object): ext = 'otf' if sig == b'OTTO' else 'ttf' try: - weight, is_italic, is_bold, is_regular = get_font_characteristics(data) + weight, is_italic, is_bold, is_regular = get_font_characteristics(data)[:4] except Exception as e: prints('Failed to get font characteristic for font: %s [%s]' ' with error: %s'%(family, self.get_normalized_name(is_italic, weight), e)) continue + try: + family_name, sub_family_name, full_name = get_font_names(data) + except: + pass + if normalize: ft = {(True, True):'bi', (True, False):'italic', (False, True):'bold', (False, False):'normal'}[(is_italic, @@ -71,7 +76,24 @@ class WinFonts(object): else: ft = (1 if is_italic else 0, weight//10) - ans[ft] = (ext, data) + if not (family_name or full_name): + # prints('Font %s [%s] has no names'%(family, + # self.get_normalized_name(is_italic, weight))) + family_name = family + name = full_name or family + ' ' + (sub_family_name or '') + + try: + name.encode('ascii') + except ValueError: + try: + sub_family_name.encode('ascii') + subf = sub_family_name + except: + subf = '' + + name = family + ((' ' + subf) if subf else '') + + ans[ft] = (ext, name, data) return ans @@ -105,8 +127,8 @@ if __name__ == '__main__': print (families) for family in families: - print (family + ':') + prints(family + ':') for font, data in w.fonts_for_family(family).iteritems(): - print (' ', font, data[0], len(data[1])) + prints(' ', font, data[0], data[1], len(data[2])) print ()