More work on CFF subsetting. The subset table is written but character mapping is off

2025-07-09 03:04:10 -04:00 · 2012-11-09 11:58:02 +05:30 · 2012-11-09 11:58:02 +05:30 · 98f4cbb7e3
commit 98f4cbb7e3
parent ac0605ae04
5 changed files with 425 additions and 188 deletions
--- a/src/calibre/utils/fonts/sfnt/cff/constants.py
+++ b/src/calibre/utils/fonts/sfnt/cff/constants.py
@ -0,0 +1,183 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 # cff_standard_strings {{{
 # The 391 Standard Strings as used in the CFF format.
 # from Adobe Technical None #5176, version 1.0, 18 March 1998
 cff_standard_strings = [
 '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
 'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
 'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
 'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
 'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
 'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
 'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
 'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
 'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
 'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
 'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
 'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
 'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
 'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
 'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
 'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
 'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
 'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
 'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
 'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
 'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
 'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
 'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
 'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
 'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
 'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
 'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
 'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
 'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
 'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
 'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
 'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
 'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
 'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
 'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
 'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
 'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
 'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
 'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
 'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
 'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
 'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
 'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
 'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
 'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
 'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
 'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
 '001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
 'Semibold'
 ]
 # }}}
 STANDARD_CHARSETS = [ # {{{
 # ISOAdobe
 (".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
    "percent", "ampersand", "quoteright", "parenleft", "parenright",
    "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
    "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
    "colon", "semicolon", "less", "equal", "greater", "question", "at",
    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
    "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
    "bracketleft", "backslash", "bracketright", "asciicircum",
    "underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
    "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
    "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
    "exclamdown", "cent", "sterling", "fraction", "yen", "florin",
    "section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
    "guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
    "daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
    "quotedblbase", "quotedblright", "guillemotright", "ellipsis",
    "perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
    "macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
    "hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
    "Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
    "oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
    "trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
    "divide", "brokenbar", "degree", "thorn", "threequarters",
    "twosuperior", "registered", "minus", "eth", "multiply",
    "threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
    "Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
    "Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
    "Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
    "Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
    "Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
    "aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
    "egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
    "oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
    "uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
    "zcaron"),
 # Expert
 ("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
    "dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
    "parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
    "hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
    "twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
    "sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
    "colon", "semicolon", "commasuperior", "threequartersemdash",
    "periodsuperior", "questionsmall", "asuperior", "bsuperior",
    "centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
    "msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
    "tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
    "parenrightinferior", "Circumflexsmall", "hyphensuperior",
    "Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
    "Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
    "Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
    "Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
    "colonmonetary", "onefitted", "rupiah", "Tildesmall",
    "exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
    "Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
    "Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
    "Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
    "threequarters", "questiondownsmall", "oneeighth", "threeeighths",
    "fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
    "onesuperior", "twosuperior", "threesuperior", "foursuperior",
    "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
    "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
    "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
    "seveninferior", "eightinferior", "nineinferior", "centinferior",
    "dollarinferior", "periodinferior", "commainferior", "Agravesmall",
    "Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
    "Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
    "Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
    "Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
    "Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
    "Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
    "Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
    "Thornsmall", "Ydieresissmall"),
 # Expert Subset
 (".notdef", "space", "dollaroldstyle", "dollarsuperior",
        "parenleftsuperior", "parenrightsuperior", "twodotenleader",
        "onedotenleader", "comma", "hyphen", "period", "fraction",
        "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
        "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
        "eightoldstyle", "nineoldstyle", "colon", "semicolon",
        "commasuperior", "threequartersemdash", "periodsuperior",
        "asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
        "isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
        "rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
        "ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
        "colonmonetary", "onefitted", "rupiah", "centoldstyle",
        "figuredash", "hypheninferior", "onequarter", "onehalf",
        "threequarters", "oneeighth", "threeeighths", "fiveeighths",
        "seveneighths", "onethird", "twothirds", "zerosuperior",
        "onesuperior", "twosuperior", "threesuperior", "foursuperior",
        "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
        "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
        "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
        "seveninferior", "eightinferior", "nineinferior", "centinferior",
        "dollarinferior", "periodinferior", "commainferior"),
 ] # }}}
--- a/src/calibre/utils/fonts/sfnt/cff/dict_data.py
+++ b/src/calibre/utils/fonts/sfnt/cff/dict_data.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-from struct import unpack
+from struct import unpack, pack
 t1_operand_encoding = [None] * 256
 t1_operand_encoding[0:32] = (32) * ["do_operator"]
@ -27,8 +27,9 @@ cff_dict_operand_encoding[255] = "reserved"
 real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
        '.', 'E', 'E-', None, '-']
 real_nibbles_map = {x:i for i, x in enumerate(real_nibbles)}
-class Reader(dict):
+class ByteCode(dict):
    def read_byte(self, b0, data, index):
        return b0 - 139, index
@ -71,13 +72,62 @@ class Reader(dict):
            number = number + real_nibbles[nibble1]
        return float(number), index
-class Dict(Reader):
+    def write_float(self, f, encoding='ignored'):
        s = type(u'')(f).upper()
        if s[:2] == "0.":
            s = s[1:]
        elif s[:3] == "-0.":
            s = "-" + s[2:]
        nibbles = []
        while s:
            c = s[0]
            s = s[1:]
            if c == "E" and s[:1] == "-":
                s = s[1:]
                c = "E-"
            nibbles.append(real_nibbles_map[c])
        nibbles.append(0xf)
        if len(nibbles) % 2:
            nibbles.append(0xf)
        d = bytearray([30])
        for i in xrange(0, len(nibbles), 2):
            d.append(nibbles[i] << 4 | nibbles[i+1])
        return bytes(d)
    def write_int(self, value, encoding="cff"):
        four_byte_op = {'cff':29, 't1':255}.get(encoding, None)
 		if -107 <= value <= 107:
 			code = bytes(bytearray([value + 139]))
 		elif 108 <= value <= 1131:
 			value = value - 108
 			code = bytes(bytearray([(value >> 8) + 247, (value & 0xFF)]))
 		elif -1131 <= value <= -108:
 			value = -value - 108
 			code = bytes(bytearray([(value >> 8) + 251, (value & 0xFF)]))
 		elif four_byte_op is None:
 			# T2 only supports 2 byte ints
            code = bytes(bytearray([28])) + pack(b">h", value)
 		else:
 			code = bytes(bytearray([four_byte_op])) + pack(b">l", value)
 		return code
    def write_offset(self, value):
        return bytes(bytearray([29])) + pack(b">l", value)
    def write_number(self, value, encoding="cff"):
        f = self.write_float if isinstance(value, float) else self.write_int
        return f(value, encoding)
 class Dict(ByteCode):
    operand_encoding = cff_dict_operand_encoding
-    TABLE = []
+    TABLE = ()
    FILTERED = frozenset()
    OFFSETS = frozenset()
    def __init__(self):
-        Reader.__init__(self)
+        ByteCode.__init__(self)
        self.operators = {op:(name, arg) for op, name, arg, default in
                self.TABLE}
@ -141,9 +191,53 @@ class Dict(Reader):
        del self.stack[:]
 		return out
    def compile(self, strings):
        data = []
        for op, name, arg, default in self.TABLE:
            if name in self.FILTERED:
                continue
            val = self.safe_get(name)
            opcode = bytes(bytearray(op if isinstance(op, tuple) else [op]))
            if val != self.defaults[name]:
                self.encoding_offset = name in self.OFFSETS
                if isinstance(arg, tuple):
                    if len(val) != len(arg):
                        raise ValueError('Invalid argument %s for operator: %s'
                                %(val, op))
                    for typ, v in zip(arg, val):
                        if typ == 'SID':
                            val = strings(val)
                        data.append(getattr(self, 'encode_'+typ)(v))
                else:
                    if arg == 'SID':
                        val = strings(val)
                    data.append(getattr(self, 'encode_'+arg)(val))
                data.append(opcode)
        self.raw = b''.join(data)
        return self.raw
    def encode_number(self, val):
        if self.encoding_offset:
            return self.write_offset(val)
        return self.write_number(val)
    def encode_SID(self, val):
        return self.write_int(val)
    def encode_array(self, val):
        return b''.join(map(self.encode_number, val))
 	def encode_delta(self, value):
 		out = []
 		last = 0
 		for v in value:
 			out.append(v - last)
 			last = v
        return self.encode_array(out)
 class TopDict(Dict):
-    TABLE = [
+    TABLE = (
 	#opcode     name                  argument type   default
 	((12, 30), 'ROS',        ('SID','SID','number'), None,      ),
 	((12, 20), 'SyntheticBase',      'number',       None,      ),
@ -179,11 +273,17 @@ class TopDict(Dict):
 	((12, 37), 'FDSelect',           'number',       None,      ),
 	((12, 36), 'FDArray',            'number',       None,      ),
 	(17,       'CharStrings',        'number',       None,      ),
-    ]
+    )
    # We will not write these operators out
    FILTERED = {'ROS', 'SyntheticBase', 'UniqueID', 'XUID',
            'CIDFontVersion', 'CIDFontRevision', 'CIDFontType', 'CIDCount',
            'UIDBase', 'Encoding', 'FDSelect', 'FDArray'}
    OFFSETS = {'charset', 'Encoding', 'CharStrings', 'Private'}
 class PrivateDict(Dict):
-    TABLE = [
+    TABLE = (
    #	opcode     name                  argument type   default
 	(6,        'BlueValues',         'delta',        None,      ),
 	(7,        'OtherBlues',         'delta',        None,      ),
@ -205,5 +305,7 @@ class PrivateDict(Dict):
 	(20,       'defaultWidthX',      'number',       0,         ),
 	(21,       'nominalWidthX',      'number',       0,         ),
 	(19,       'Subrs',              'number',       None,      ),
-    ]
+    )
    OFFSETS = {'Subrs'}
--- a/src/calibre/utils/fonts/sfnt/cff/table.py
+++ b/src/calibre/utils/fonts/sfnt/cff/table.py
@ -11,8 +11,10 @@ from struct import unpack_from, unpack, calcsize
 from functools import partial
 from calibre.utils.fonts.sfnt import UnknownTable
-from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
 from calibre.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict
 from calibre.utils.fonts.sfnt.cff.constants import (cff_standard_strings,
        STANDARD_CHARSETS)
 # Useful links
 # http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
@ -81,9 +83,9 @@ class CFF(object):
        self.charset = Charset(raw, self.top_dict.safe_get('charset'),
                self.strings, self.num_glyphs, self.is_CID)
-        import pprint
+        # import pprint
-        pprint.pprint(self.top_dict)
+        # pprint.pprint(self.top_dict)
-        pprint.pprint(self.private_dict)
+        # pprint.pprint(self.private_dict)
 class Index(list):
@ -125,107 +127,6 @@ class Strings(Index):
 class Charset(list):
    STANDARD_CHARSETS = [ # {{{
    # ISOAdobe
    (".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
        "percent", "ampersand", "quoteright", "parenleft", "parenright",
        "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
        "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
        "colon", "semicolon", "less", "equal", "greater", "question", "at",
        "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
        "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
        "bracketleft", "backslash", "bracketright", "asciicircum",
        "underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
        "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
        "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
        "exclamdown", "cent", "sterling", "fraction", "yen", "florin",
        "section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
        "guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
        "daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
        "quotedblbase", "quotedblright", "guillemotright", "ellipsis",
        "perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
        "macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
        "hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
        "Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
        "oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
        "trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
        "divide", "brokenbar", "degree", "thorn", "threequarters",
        "twosuperior", "registered", "minus", "eth", "multiply",
        "threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
        "Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
        "Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
        "Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
        "Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
        "Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
        "aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
        "egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
        "oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
        "uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
        "zcaron"),
    # Expert
    ("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
        "dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
        "parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
        "hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
        "twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
        "sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
        "colon", "semicolon", "commasuperior", "threequartersemdash",
        "periodsuperior", "questionsmall", "asuperior", "bsuperior",
        "centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
        "msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
        "tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
        "parenrightinferior", "Circumflexsmall", "hyphensuperior",
        "Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
        "Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
        "Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
        "Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
        "colonmonetary", "onefitted", "rupiah", "Tildesmall",
        "exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
        "Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
        "Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
        "Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
        "threequarters", "questiondownsmall", "oneeighth", "threeeighths",
        "fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
        "onesuperior", "twosuperior", "threesuperior", "foursuperior",
        "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
        "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
        "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
        "seveninferior", "eightinferior", "nineinferior", "centinferior",
        "dollarinferior", "periodinferior", "commainferior", "Agravesmall",
        "Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
        "Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
        "Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
        "Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
        "Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
        "Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
        "Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
        "Thornsmall", "Ydieresissmall"),
    # Expert Subset
    (".notdef", "space", "dollaroldstyle", "dollarsuperior",
            "parenleftsuperior", "parenrightsuperior", "twodotenleader",
            "onedotenleader", "comma", "hyphen", "period", "fraction",
            "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
            "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
            "eightoldstyle", "nineoldstyle", "colon", "semicolon",
            "commasuperior", "threequartersemdash", "periodsuperior",
            "asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
            "isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
            "rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
            "ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
            "colonmonetary", "onefitted", "rupiah", "centoldstyle",
            "figuredash", "hypheninferior", "onequarter", "onehalf",
            "threequarters", "oneeighth", "threeeighths", "fiveeighths",
            "seveneighths", "onethird", "twothirds", "zerosuperior",
            "onesuperior", "twosuperior", "threesuperior", "foursuperior",
            "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
            "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
            "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
            "seveninferior", "eightinferior", "nineinferior", "centinferior",
            "dollarinferior", "periodinferior", "commainferior"),
    ] # }}}
    def __init__(self, raw, offset, strings, num_glyphs, is_CID):
        super(Charset, self).__init__()
        self.standard_charset = offset if offset in {0, 1, 2} else None
@ -266,7 +167,7 @@ class Charset(list):
    def lookup(self, glyph_id):
        if self.standard_charset is None:
            return self[glyph_id]
-        return self.STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
+        return STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
    def safe_lookup(self, glyph_id):
        try:
@ -293,6 +194,8 @@ class CFFTable(UnknownTable):
                glyph_id in character_map.iteritems()}
        charset = set(charset_map.itervalues())
        charset.discard(None)
        if not charset:
            raise NoGlyphs('This font has no glyphs for the specified characters')
        s = Subset(self.cff, charset)
        # Rebuild character_map with the glyph ids from the subset font
@ -302,74 +205,10 @@ class CFFTable(UnknownTable):
            if glyph_id:
                character_map[code] = glyph_id
-# cff_standard_strings {{{
+        # Check that raw is parseable
-# The 391 Standard Strings as used in the CFF format.
+        CFF(s.raw)
-# from Adobe Technical None #5176, version 1.0, 18 March 1998
+
        self.raw = s.raw
 cff_standard_strings = [
 '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
 'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
 'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
 'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
 'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
 'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
 'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
 'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
 'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
 'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
 'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
 'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
 'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
 'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
 'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
 'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
 'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
 'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
 'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
 'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
 'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
 'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
 'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
 'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
 'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
 'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
 'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
 'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
 'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
 'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
 'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
 'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
 'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
 'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
 'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
 'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
 'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
 'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
 'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
 'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
 'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
 'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
 'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
 'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
 'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
 'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
 'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
 '001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
 'Semibold'
 ]
 # }}}
--- a/src/calibre/utils/fonts/sfnt/cff/writer.py
+++ b/src/calibre/utils/fonts/sfnt/cff/writer.py
@ -10,6 +10,8 @@ __docformat__ = 'restructuredtext en'
 from struct import pack
 from collections import OrderedDict
 from calibre.utils.fonts.sfnt.cff.constants import cff_standard_strings
 class Index(list):
    def __init__(self):
@ -40,12 +42,66 @@ class Index(list):
                offsets = b''.join(pack(b'>L', x)[1:] for x in offsets)
            else:
                fmt = {1:'B', 2:'H', 4:'L'}[offsize]
-                offsets = pack( ('>%d%s'%(len(self), fmt)).encode('ascii'),
+                offsets = pack( ('>%d%s'%(len(offsets), fmt)).encode('ascii'),
                        *offsets)
            self.raw = prefix + offsets + obj_data
        return self.raw
 class Strings(Index):
    def __init__(self):
        Index.__init__(self)
        self.added = {x:i for i, x in enumerate(cff_standard_strings)}
    def __call__(self, x):
        ans = self.added.get(x, None)
        if ans is None:
            ans = len(self) + len(cff_standard_strings)
            self.added[x] = ans
            self.append(x)
        return ans
 class Dict(Index):
    def __init__(self, src, strings):
        Index.__init__(self)
        self.src, self.strings = src, strings
    def compile(self):
        self[:] = [self.src.compile(self.strings)]
        Index.compile(self)
 class PrivateDict(object):
    def __init__(self, src, subrs, strings):
        self.src, self.strings = src, strings
        self.subrs = None
        if subrs is not None:
            self.subrs = Index()
            self.subrs.extend(subrs)
            self.subrs.compile()
    def compile(self):
        raw = self.src.compile(self.strings)
        if self.subrs is not None:
            self.src['Subrs'] = len(raw)
            raw = self.src.compile(self.strings)
        self.raw = raw
        return raw
 class Charsets(list):
    def __init__(self, strings):
        list.__init__(self)
        self.strings = strings
    def compile(self):
        ans = pack(b'>B', 0)
        sids = [self.strings(x) for x in self]
        ans += pack(('>%dH'%len(self)).encode('ascii'), *sids)
        self.raw = ans
        return ans
 class Subset(object):
@ -53,19 +109,77 @@ class Subset(object):
        self.cff = cff
        self.keep_charnames = keep_charnames
        header = pack(b'>4B', 1, 0, 4, cff.offset_size)
        # Font names Index
        font_names = Index()
        font_names.extend(self.cff.font_names)
-        # CharStrings Index
+        # Strings Index
        strings = Strings()
        # CharStrings Index and charsets
        char_strings = Index()
        self.charname_map = OrderedDict()
        charsets = Charsets(strings)
        for i in xrange(self.cff.num_glyphs):
            cname = self.cff.charset.safe_lookup(i)
            if cname in keep_charnames:
                char_strings.append(self.cff.char_strings[i])
                self.charname_map[cname] = i
            if i > 0: # .notdef is not included
                charsets.append(cname)
        # Add the strings
        char_strings.compile()
        charsets.compile()
        # Global subroutines
        global_subrs = Index()
        global_subrs.extend(cff.global_subrs)
        global_subrs.compile()
        # TOP DICT
        top_dict = Dict(cff.top_dict, strings)
        top_dict.compile() # Add strings
        private_dict = None
        if cff.private_dict is not None:
            private_dict = PrivateDict(cff.private_dict, cff.private_subrs,
                    strings)
            private_dict.compile() # Add strings
        fixed_prefix = header + font_names.compile()
        t = top_dict.src
        # Put in dummy offsets
        t['charset'] = 1
        t['CharStrings'] = 1
        if private_dict is not None:
            t['Private'] = (len(private_dict.raw), 1)
        top_dict.compile()
        strings.compile()
        # Calculate real offsets
        pos = len(fixed_prefix)
        pos += len(top_dict.raw)
        pos += len(strings.raw)
        pos += len(global_subrs.raw)
        t['charset'] = pos
        pos += len(charsets.raw)
        t['CharStrings'] = pos
        pos += len(char_strings.raw)
        if private_dict is not None:
            t['Private'] = (len(private_dict.raw), pos)
        top_dict.compile()
        self.raw = (fixed_prefix + top_dict.raw + strings.raw +
                global_subrs.raw + charsets.raw + char_strings.raw)
        if private_dict is not None:
            self.raw += private_dict.raw
            if private_dict.subrs is not None:
                self.raw += private_dict.subrs.raw
--- a/src/calibre/utils/fonts/sfnt/subset.py
+++ b/src/calibre/utils/fonts/sfnt/subset.py
@ -70,7 +70,6 @@ def subset_postscript(sfnt, character_map):
    cff = sfnt[b'CFF ']
    cff.decompile()
    cff.subset(character_map)
    raise Exception('TODO: Implement CFF subsetting')
 def subset(raw, individual_chars, ranges=()):
    chars = list(map(ord, individual_chars))