Do not use fontconfig on windows

2025-08-11 09:13:57 -04:00 · 2012-10-01 13:34:53 +05:30 · 2012-10-01 13:34:53 +05:30 · 24a9d26176
commit 24a9d26176
parent d24d34bef7
4 changed files with 376 additions and 185 deletions
--- a/src/calibre/utils/fonts/init.py
+++ b/src/calibre/utils/fonts/init.py
@ -6,71 +6,22 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, sys
+from calibre.constants import iswindows

-from calibre.constants import plugins, iswindows, islinux, isbsd
-
-_fc, _fc_err = plugins['fontconfig']
-
-if _fc is None:
-    raise RuntimeError('Failed to load fontconfig with error:'+_fc_err)
-
-if islinux or isbsd:
-    Thread = object
-else:
-    from threading import Thread
-
-class FontConfig(Thread):
+class Fonts(object):

    def __init__(self):
-        Thread.__init__(self)
-        self.daemon = True
-        self.failed = False
+        if iswindows:
+            from calibre.utils.fonts.win_fonts import load_winfonts
+            self.backend = load_winfonts()
+        else:
+            from calibre.utils.fonts.fc import fontconfig
+            self.backend = fontconfig

-    def run(self):
-        config = None
-        if getattr(sys, 'frameworks_dir', False):
-            config_dir = os.path.join(os.path.dirname(
-                getattr(sys, 'frameworks_dir')), 'Resources', 'fonts')
-            if isinstance(config_dir, unicode):
-                config_dir = config_dir.encode(sys.getfilesystemencoding())
-            config = os.path.join(config_dir, 'fonts.conf')
-        if iswindows and getattr(sys, 'frozen', False):
-            config_dir = os.path.join(os.path.dirname(sys.executable),
-                'fontconfig')
-            if isinstance(config_dir, unicode):
-                config_dir = config_dir.encode(sys.getfilesystemencoding())
-            config = os.path.join(config_dir, 'fonts.conf')
-        try:
-            _fc.initialize(config)
-        except:
-            import traceback
-            traceback.print_exc()
-            self.failed = True
-
-    def wait(self):
-        if not (islinux or isbsd):
-            self.join()
-        if self.failed:
-            raise RuntimeError('Failed to initialize fontconfig')
-
-    def find_font_families(self, allowed_extensions=['ttf', 'otf']):
-        '''
-        Return an alphabetically sorted list of font families available on the system.
-
-        `allowed_extensions`: A list of allowed extensions for font file types. Defaults to
-        `['ttf', 'otf']`. If it is empty, it is ignored.
-        '''
-        self.wait()
-        ans = _fc.find_font_families([bytes('.'+x) for x in allowed_extensions])
-        ans = sorted(set(ans), cmp=lambda x,y:cmp(x.lower(), y.lower()))
-        ans2 = []
-        for x in ans:
-            try:
-                ans2.append(x.decode('utf-8'))
-            except UnicodeDecodeError:
-                continue
-        return ans2
+    def find_font_families(self, allowed_extensions={'ttf', 'otf'}):
+        if iswindows:
+            return self.backend.font_families()
+        return self.backend.find_font_families(allowed_extensions=allowed_extensions)

    def files_for_family(self, family, normalize=True):
        '''
@ -80,89 +31,42 @@ class FontConfig(Thread):
        they are a tuple (slant, weight) otherwise they are strings from the set
        `('normal', 'bold', 'italic', 'bi', 'light', 'li')`
        '''
-        self.wait()
-        if isinstance(family, unicode):
-            family = family.encode('utf-8')
-        fonts = {}
-        ofamily = str(family).decode('utf-8')
-        for fullname, path, style, nfamily, weight, slant in \
-            _fc.files_for_family(str(family)):
-            style = (slant, weight)
-            if normalize:
-                italic = slant > 0
-                normal = weight == 80
-                bold = weight > 80
-                if italic:
-                    style = 'italic' if normal else 'bi' if bold else 'li'
-                else:
-                    style = 'normal' if normal else 'bold' if bold else 'light'
-            try:
-                fullname, path = fullname.decode('utf-8'), path.decode('utf-8')
-                nfamily = nfamily.decode('utf-8')
-            except UnicodeDecodeError:
-                continue
-            if style in fonts:
-                if nfamily.lower().strip() == ofamily.lower().strip() \
-                and 'Condensed' not in fullname and 'ExtraLight' not in fullname:
-                    fonts[style] = (path, fullname)
-            else:
-                fonts[style] = (path, fullname)
+        if iswindows:
+            from calibre.ptempfile import PersistentTemporaryFile
+            fonts = self.backend.fonts_for_family(family, normalize=normalize)
+            ans = {}
+            for ft, val in fonts.iteritems():
+                ext, name, data = val
+                pt = PersistentTemporaryFile('.'+ext)
+                pt.write(data)
+                pt.close()
+                ans[ft] = (name, pt.name)
+            return ans
+        return self.backend.files_for_family(family, normalize=normalize)

-        return fonts
-
-    def match(self, name, all=False, verbose=False):
+    def fonts_for_family(self, family, normalize=True):
        '''
-        Find the system font that most closely matches `name`, where `name` is a specification
-        of the form::
-        familyname-<pointsize>:<property1=value1>:<property2=value2>...
+        Just like files for family, except that it returns 3-tuples of the form
+        (extension, full name, font data).
+        '''
+        if iswindows:
+            return self.backend.fonts_for_family(family, normalize=normalize)
+        files = self.backend.files_for_family(family, normalize=normalize)
+        ans = {}
+        for ft, val in files.iteritems():
+            name, f = val
+            ext = f.rpartition('.')[-1].lower()
+            ans[ft] = (ext, name, open(f, 'rb').read())
+        return ans

-        For example, `verdana:weight=bold:slant=italic`
-
-        Returns a list of dictionaries, or a single dictionary.
-        Each dictionary has the keys:
-        'weight', 'slant', 'family', 'file', 'fullname', 'style'
-
-        `all`: If `True` return a sorted list of matching fonts, where the sort
-        is in order of decreasing closeness of matching. If `False` only the
-        best match is returned.        '''
-        self.wait()
-        if isinstance(name, unicode):
-            name = name.encode('utf-8')
-        fonts = []
-        for fullname, path, style, family, weight, slant in \
-            _fc.match(str(name), bool(all), bool(verbose)):
-            try:
-                fullname = fullname.decode('utf-8')
-                path = path.decode('utf-8')
-                style = style.decode('utf-8')
-                family = family.decode('utf-8')
-                fonts.append({
-                    'fullname' : fullname,
-                    'path'     : path,
-                    'style'    : style,
-                    'family'   : family,
-                    'weight'   : weight,
-                    'slant'    : slant
-                    })
-            except UnicodeDecodeError:
-                continue
-        return fonts if all else (fonts[0] if fonts else None)
-
-fontconfig = FontConfig()
-if islinux or isbsd:
-    # On X11 Qt also uses fontconfig, so initialization must happen in the
-    # main thread. In any case on X11 initializing fontconfig should be very
-    # fast
-    fontconfig.run()
-else:
-    fontconfig.start()
+fontconfig = Fonts()

 def test():
-    from pprint import pprint;
-    pprint(fontconfig.find_font_families())
-    pprint(fontconfig.files_for_family('liberation serif'))
+    import os
+    print(fontconfig.find_font_families())
    m = 'times new roman' if iswindows else 'liberation serif'
-    pprint(fontconfig.match(m+':slant=italic:weight=bold', verbose=True))
+    for ft, val in fontconfig.files_for_family(m).iteritems():
+        print val[0], ft, val[1], os.path.getsize(val[1])

 if __name__ == '__main__':
    test()
--- a/src/calibre/utils/fonts/fc.py
+++ b/src/calibre/utils/fonts/fc.py
@ -0,0 +1,168 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, sys
+
+from calibre.constants import plugins, iswindows, islinux, isbsd
+
+_fc, _fc_err = plugins['fontconfig']
+
+if _fc is None:
+    raise RuntimeError('Failed to load fontconfig with error:'+_fc_err)
+
+if islinux or isbsd:
+    Thread = object
+else:
+    from threading import Thread
+
+class FontConfig(Thread):
+
+    def __init__(self):
+        Thread.__init__(self)
+        self.daemon = True
+        self.failed = False
+
+    def run(self):
+        config = None
+        if getattr(sys, 'frameworks_dir', False):
+            config_dir = os.path.join(os.path.dirname(
+                getattr(sys, 'frameworks_dir')), 'Resources', 'fonts')
+            if isinstance(config_dir, unicode):
+                config_dir = config_dir.encode(sys.getfilesystemencoding())
+            config = os.path.join(config_dir, 'fonts.conf')
+        if iswindows and getattr(sys, 'frozen', False):
+            config_dir = os.path.join(os.path.dirname(sys.executable),
+                'fontconfig')
+            if isinstance(config_dir, unicode):
+                config_dir = config_dir.encode(sys.getfilesystemencoding())
+            config = os.path.join(config_dir, 'fonts.conf')
+        try:
+            _fc.initialize(config)
+        except:
+            import traceback
+            traceback.print_exc()
+            self.failed = True
+
+    def wait(self):
+        if not (islinux or isbsd):
+            self.join()
+        if self.failed:
+            raise RuntimeError('Failed to initialize fontconfig')
+
+    def find_font_families(self, allowed_extensions={'ttf', 'otf'}):
+        '''
+        Return an alphabetically sorted list of font families available on the system.
+
+        `allowed_extensions`: A list of allowed extensions for font file types. Defaults to
+        `['ttf', 'otf']`. If it is empty, it is ignored.
+        '''
+        self.wait()
+        ans = _fc.find_font_families([bytes('.'+x) for x in allowed_extensions])
+        ans = sorted(set(ans), cmp=lambda x,y:cmp(x.lower(), y.lower()))
+        ans2 = []
+        for x in ans:
+            try:
+                ans2.append(x.decode('utf-8'))
+            except UnicodeDecodeError:
+                continue
+        return ans2
+
+    def files_for_family(self, family, normalize=True):
+        '''
+        Find all the variants in the font family `family`.
+        Returns a dictionary of tuples. Each tuple is of the form (Full font name, path to font file).
+        The keys of the dictionary depend on `normalize`. If `normalize` is `False`,
+        they are a tuple (slant, weight) otherwise they are strings from the set
+        `('normal', 'bold', 'italic', 'bi', 'light', 'li')`
+        '''
+        self.wait()
+        if isinstance(family, unicode):
+            family = family.encode('utf-8')
+        fonts = {}
+        ofamily = str(family).decode('utf-8')
+        for fullname, path, style, nfamily, weight, slant in \
+            _fc.files_for_family(str(family)):
+            style = (slant, weight)
+            if normalize:
+                italic = slant > 0
+                normal = weight == 80
+                bold = weight > 80
+                if italic:
+                    style = 'italic' if normal else 'bi' if bold else 'li'
+                else:
+                    style = 'normal' if normal else 'bold' if bold else 'light'
+            try:
+                fullname, path = fullname.decode('utf-8'), path.decode('utf-8')
+                nfamily = nfamily.decode('utf-8')
+            except UnicodeDecodeError:
+                continue
+            if style in fonts:
+                if nfamily.lower().strip() == ofamily.lower().strip() \
+                and 'Condensed' not in fullname and 'ExtraLight' not in fullname:
+                    fonts[style] = (path, fullname)
+            else:
+                fonts[style] = (path, fullname)
+
+        return fonts
+
+    def match(self, name, all=False, verbose=False):
+        '''
+        Find the system font that most closely matches `name`, where `name` is a specification
+        of the form::
+        familyname-<pointsize>:<property1=value1>:<property2=value2>...
+
+        For example, `verdana:weight=bold:slant=italic`
+
+        Returns a list of dictionaries, or a single dictionary.
+        Each dictionary has the keys:
+        'weight', 'slant', 'family', 'file', 'fullname', 'style'
+
+        `all`: If `True` return a sorted list of matching fonts, where the sort
+        is in order of decreasing closeness of matching. If `False` only the
+        best match is returned.        '''
+        self.wait()
+        if isinstance(name, unicode):
+            name = name.encode('utf-8')
+        fonts = []
+        for fullname, path, style, family, weight, slant in \
+            _fc.match(str(name), bool(all), bool(verbose)):
+            try:
+                fullname = fullname.decode('utf-8')
+                path = path.decode('utf-8')
+                style = style.decode('utf-8')
+                family = family.decode('utf-8')
+                fonts.append({
+                    'fullname' : fullname,
+                    'path'     : path,
+                    'style'    : style,
+                    'family'   : family,
+                    'weight'   : weight,
+                    'slant'    : slant
+                    })
+            except UnicodeDecodeError:
+                continue
+        return fonts if all else (fonts[0] if fonts else None)
+
+fontconfig = FontConfig()
+if islinux or isbsd:
+    # On X11 Qt also uses fontconfig, so initialization must happen in the
+    # main thread. In any case on X11 initializing fontconfig should be very
+    # fast
+    fontconfig.run()
+else:
+    fontconfig.start()
+
+def test():
+    from pprint import pprint;
+    pprint(fontconfig.find_font_families())
+    pprint(fontconfig.files_for_family('liberation serif'))
+    m = 'times new roman' if iswindows else 'liberation serif'
+    pprint(fontconfig.match(m+':slant=italic:weight=bold', verbose=True))
+
+if __name__ == '__main__':
+    test()
--- a/src/calibre/utils/fonts/utils.py
+++ b/src/calibre/utils/fonts/utils.py
@ -7,7 +7,9 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import sys, struct
+import struct
+from io import BytesIO
+from collections import defaultdict

 class UnsupportedFont(ValueError):
    pass
@ -16,75 +18,170 @@ def is_truetype_font(raw):
    sfnt_version = raw[:4]
    return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version)

-def get_font_characteristics(raw):
+def get_table(raw, name):
+    ''' Get the raw table bytes for the specified table in the font '''
    num_tables = struct.unpack_from(b'>H', raw, 4)[0]
-
-    # Find OS/2 table
-    offset = 4 + 4*2 # Start of the Table record entries
-    os2_table_offset = None
+    offset = 4*3 # start of the table record entries
+    table_offset = table_checksum = table_length = table_index = table = None
+    name = bytes(name.lower())
    for i in xrange(num_tables):
        table_tag = raw[offset:offset+4]
-        if table_tag == b'OS/2':
-            os2_table_offset = struct.unpack_from(b'>I', raw, offset+8)[0]
+        if table_tag.lower() == name:
+            table_checksum, table_offset, table_length = struct.unpack_from(
+                    b'>3L', raw, offset+4)
+            table_index = offset
            break
-        offset += 16 # Size of a table record
-    if os2_table_offset is None:
+        offset += 4*4
+    if table_offset is not None:
+        table = raw[table_offset:table_offset+table_length]
+    return table, table_index, table_offset, table_checksum
+
+def get_font_characteristics(raw):
+    '''
+    Return (weight, is_italic, is_bold, is_regular, fs_type). These values are taken
+    from the OS/2 table of the font. See
+    http://www.microsoft.com/typography/otspec/os2.htm for details
+    '''
+    os2_table = get_table(raw, 'os/2')[0]
+    if os2_table is None:
        raise UnsupportedFont('Not a supported font, has no OS/2 table')

-    common_fields = b'>HhHHHhhhhhhhhhhh'
+    common_fields = b'>Hh3H11h'
    (version, char_width, weight, width, fs_type, subscript_x_size,
            subscript_y_size, subscript_x_offset, subscript_y_offset,
            superscript_x_size, superscript_y_size, superscript_x_offset,
            superscript_y_offset, strikeout_size, strikeout_position,
-            family_class) = struct.unpack_from(common_fields,
-                    raw, os2_table_offset)
-    offset = os2_table_offset + struct.calcsize(common_fields)
-    panose = struct.unpack_from(b'>'+b'B'*10, raw, offset)
+            family_class) = struct.unpack_from(common_fields, os2_table)
+    offset = struct.calcsize(common_fields)
+    panose = struct.unpack_from(b'>10B', os2_table, offset)
    panose
    offset += 10
-    (range1,) = struct.unpack_from(b'>L', raw, offset)
+    (range1,) = struct.unpack_from(b'>L', os2_table, offset)
    offset += struct.calcsize(b'>L')
    if version > 0:
-        range2, range3, range4 = struct.unpack_from(b'>LLL', raw, offset)
-        offset += struct.calcsize(b'>LLL')
-    vendor_id = raw[offset:offset+4]
+        range2, range3, range4 = struct.unpack_from(b'>3L', os2_table, offset)
+        offset += struct.calcsize(b'>3L')
+    vendor_id = os2_table[offset:offset+4]
    vendor_id
    offset += 4
-    selection, = struct.unpack_from(b'>H', raw, offset)
+    selection, = struct.unpack_from(b'>H', os2_table, offset)

    is_italic = (selection & 0b1) != 0
    is_bold = (selection & 0b100000) != 0
    is_regular = (selection & 0b1000000) != 0
-    return weight, is_italic, is_bold, is_regular
+    return weight, is_italic, is_bold, is_regular, fs_type
+
+def decode_name_record(recs):
+    '''
+    Get the English names of this font. See
+    http://www.microsoft.com/typography/otspec/name.htm for details.
+    '''
+    if not recs: return None
+    unicode_names = {}
+    windows_names = {}
+    mac_names = {}
+    for platform_id, encoding_id, language_id, src in recs:
+        if language_id > 0x8000: continue
+        if platform_id == 0:
+            if encoding_id < 4:
+                try:
+                    unicode_names[language_id] = src.decode('utf-16-be')
+                except ValueError:
+                    continue
+        elif platform_id == 1:
+            try:
+                mac_names[language_id] = src.decode('utf-8')
+            except ValueError:
+                continue
+        elif platform_id == 2:
+            codec = {0:'ascii', 1:'utf-16-be', 2:'iso-8859-1'}.get(encoding_id,
+                    None)
+            if codec is None: continue
+            try:
+                unicode_names[language_id] = src.decode(codec)
+            except ValueError:
+                continue
+        elif platform_id == 3:
+            codec = {1:16, 10:32}.get(encoding_id, None)
+            if codec is None: continue
+            try:
+                windows_names[language_id] = src.decode('utf-%d-be'%codec)
+            except ValueError:
+                continue
+
+    # First try the windows names
+    # First look for the US English name
+    if 1033 in windows_names:
+        return windows_names[1033]
+    # Look for some other english name variant
+    for lang in (3081, 10249, 4105, 9225, 16393, 6153, 8201, 17417, 5129,
+            13321, 18441, 7177, 11273, 2057, 12297):
+        if lang in windows_names:
+            return windows_names[lang]
+
+    # Look for Mac name
+    if 0 in mac_names:
+        return mac_names[0]
+
+    # Use unicode names
+    for val in unicode_names.itervalues():
+        return val
+
+    return None
+
+def get_font_names(raw):
+    table = get_table(raw, 'name')[0]
+    if table is None:
+        raise UnsupportedFont('Not a supported font, has no name table')
+    table_type, count, string_offset = struct.unpack_from(b'>3H', table)
+
+    records = defaultdict(list)
+
+    for i in xrange(count):
+        try:
+            platform_id, encoding_id, language_id, name_id, length, offset = \
+                    struct.unpack_from(b'>6H', table, 6+i*12)
+        except struct.error:
+            break
+        offset += string_offset
+        src = table[offset:offset+length]
+        records[name_id].append((platform_id, encoding_id, language_id,
+            src))
+
+    family_name = decode_name_record(records[1])
+    subfamily_name = decode_name_record(records[2])
+    full_name = decode_name_record(records[4])
+
+    return family_name, subfamily_name, full_name
+

 def remove_embed_restriction(raw):
-    sfnt_version = raw[:4]
-    if sfnt_version not in {b'\x00\x01\x00\x00', b'OTTO'}:
-        raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sfnt_version)
+    ok, sig = is_truetype_font(raw)
+    if not ok:
+        raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)

-    num_tables = struct.unpack_from(b'>H', raw, 4)[0]
-
-    # Find OS/2 table
-    offset = 4 + 4*2 # Start of the Table record entries
-    os2_table_offset = None
-    for i in xrange(num_tables):
-        table_tag = raw[offset:offset+4]
-        if table_tag == b'OS/2':
-            os2_table_offset = struct.unpack_from(b'>I', raw, offset+8)[0]
-            break
-        offset += 16 # Size of a table record
-    if os2_table_offset is None:
+    table, table_index, table_offset = get_table(raw, 'os/2')
+    if table is None:
        raise UnsupportedFont('Not a supported font, has no OS/2 table')

-    version, = struct.unpack_from(b'>H', raw, os2_table_offset)
-
-    fs_type_offset = os2_table_offset + struct.calcsize(b'>HhHH')
-    fs_type = struct.unpack_from(b'>H', raw, fs_type_offset)[0]
+    fs_type_offset = struct.calcsize(b'>HhHH')
+    fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
    if fs_type == 0:
        return raw

-    return raw[:fs_type_offset] + struct.pack(b'>H', 0) + raw[fs_type_offset+2:]
+    f = BytesIO(raw)
+    f.seek(fs_type_offset + table_offset)
+    f.write(struct.pack(b'>H', 0))
+    return f.getvalue()
+
+def test():
+    import sys, os
+    for f in sys.argv[1:]:
+        print (os.path.basename(f))
+        raw = open(f, 'rb').read()
+        print (get_font_names(raw))
+        print (get_font_characteristics(raw))

 if __name__ == '__main__':
-    raw = remove_embed_restriction(open(sys.argv[-1], 'rb').read())
+    test()

--- a/src/calibre/utils/fonts/win_fonts.py
+++ b/src/calibre/utils/fonts/win_fonts.py
@ -12,7 +12,7 @@ from itertools import product

 from calibre import prints
 from calibre.constants import plugins
-from calibre.utils.fonts.utils import (is_truetype_font,
+from calibre.utils.fonts.utils import (is_truetype_font, get_font_names,
        get_font_characteristics)

 class WinFonts(object):
@ -57,13 +57,18 @@ class WinFonts(object):
            ext = 'otf' if sig == b'OTTO' else 'ttf'

            try:
-                weight, is_italic, is_bold, is_regular = get_font_characteristics(data)
+                weight, is_italic, is_bold, is_regular = get_font_characteristics(data)[:4]
            except Exception as e:
                prints('Failed to get font characteristic for font: %s [%s]'
                        ' with error: %s'%(family,
                            self.get_normalized_name(is_italic, weight), e))
                continue

+            try:
+                family_name, sub_family_name, full_name = get_font_names(data)
+            except:
+                pass
+
            if normalize:
                ft = {(True, True):'bi', (True, False):'italic', (False,
                    True):'bold', (False, False):'normal'}[(is_italic,
@ -71,7 +76,24 @@ class WinFonts(object):
            else:
                ft = (1 if is_italic else 0, weight//10)

-            ans[ft] = (ext, data)
+            if not (family_name or full_name):
+                # prints('Font %s [%s] has no names'%(family,
+                #     self.get_normalized_name(is_italic, weight)))
+                family_name = family
+            name = full_name or family + ' ' + (sub_family_name or '')
+
+            try:
+                name.encode('ascii')
+            except ValueError:
+                try:
+                    sub_family_name.encode('ascii')
+                    subf = sub_family_name
+                except:
+                    subf = ''
+
+                name = family + ((' ' + subf) if subf else '')
+
+            ans[ft] = (ext, name, data)

        return ans

@ -105,8 +127,8 @@ if __name__ == '__main__':
    print (families)

    for family in families:
-        print (family + ':')
+        prints(family + ':')
        for font, data in w.fonts_for_family(family).iteritems():
-            print ('  ', font, data[0], len(data[1]))
+            prints('  ', font, data[0], data[1], len(data[2]))
        print ()