More work on CFF subsetting. The subset table is written but character mapping is off

2025-07-09 03:04:10 -04:00 · 2012-11-09 11:58:02 +05:30 · 2012-11-09 11:58:02 +05:30 · 98f4cbb7e3
commit 98f4cbb7e3
parent ac0605ae04
5 changed files with 425 additions and 188 deletions
--- a/src/calibre/utils/fonts/sfnt/cff/constants.py
+++ b/src/calibre/utils/fonts/sfnt/cff/constants.py
@ -0,0 +1,183 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+# cff_standard_strings {{{
+# The 391 Standard Strings as used in the CFF format.
+# from Adobe Technical None #5176, version 1.0, 18 March 1998
+
+cff_standard_strings = [
+'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
+'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
+'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
+'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
+'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
+'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
+'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
+'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
+'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
+'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
+'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
+'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
+'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
+'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
+'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
+'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
+'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
+'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
+'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
+'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
+'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
+'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
+'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
+'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
+'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
+'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
+'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
+'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
+'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
+'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
+'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
+'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
+'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
+'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
+'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
+'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
+'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
+'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
+'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
+'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
+'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
+'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
+'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
+'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
+'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
+'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
+'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
+'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
+'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
+'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
+'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
+'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
+'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
+'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
+'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
+'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
+'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
+'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
+'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
+'Semibold'
+]
+# }}}
+
+
+STANDARD_CHARSETS = [ # {{{
+# ISOAdobe
+(".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
+    "percent", "ampersand", "quoteright", "parenleft", "parenright",
+    "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
+    "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
+    "colon", "semicolon", "less", "equal", "greater", "question", "at",
+    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
+    "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
+    "bracketleft", "backslash", "bracketright", "asciicircum",
+    "underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
+    "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
+    "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
+    "exclamdown", "cent", "sterling", "fraction", "yen", "florin",
+    "section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
+    "guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
+    "daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
+    "quotedblbase", "quotedblright", "guillemotright", "ellipsis",
+    "perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
+    "macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
+    "hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
+    "Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
+    "oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
+    "trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
+    "divide", "brokenbar", "degree", "thorn", "threequarters",
+    "twosuperior", "registered", "minus", "eth", "multiply",
+    "threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
+    "Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
+    "Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
+    "Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
+    "Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
+    "Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
+    "aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
+    "egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
+    "oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
+    "uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
+    "zcaron"),
+
+# Expert
+("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
+    "dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
+    "parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
+    "hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
+    "twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
+    "sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
+    "colon", "semicolon", "commasuperior", "threequartersemdash",
+    "periodsuperior", "questionsmall", "asuperior", "bsuperior",
+    "centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
+    "msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
+    "tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
+    "parenrightinferior", "Circumflexsmall", "hyphensuperior",
+    "Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
+    "Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
+    "Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
+    "Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
+    "colonmonetary", "onefitted", "rupiah", "Tildesmall",
+    "exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
+    "Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
+    "Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
+    "Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
+    "threequarters", "questiondownsmall", "oneeighth", "threeeighths",
+    "fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
+    "onesuperior", "twosuperior", "threesuperior", "foursuperior",
+    "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
+    "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
+    "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
+    "seveninferior", "eightinferior", "nineinferior", "centinferior",
+    "dollarinferior", "periodinferior", "commainferior", "Agravesmall",
+    "Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
+    "Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
+    "Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
+    "Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
+    "Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
+    "Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
+    "Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
+    "Thornsmall", "Ydieresissmall"),
+
+# Expert Subset
+(".notdef", "space", "dollaroldstyle", "dollarsuperior",
+        "parenleftsuperior", "parenrightsuperior", "twodotenleader",
+        "onedotenleader", "comma", "hyphen", "period", "fraction",
+        "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
+        "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
+        "eightoldstyle", "nineoldstyle", "colon", "semicolon",
+        "commasuperior", "threequartersemdash", "periodsuperior",
+        "asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
+        "isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
+        "rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
+        "ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
+        "colonmonetary", "onefitted", "rupiah", "centoldstyle",
+        "figuredash", "hypheninferior", "onequarter", "onehalf",
+        "threequarters", "oneeighth", "threeeighths", "fiveeighths",
+        "seveneighths", "onethird", "twothirds", "zerosuperior",
+        "onesuperior", "twosuperior", "threesuperior", "foursuperior",
+        "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
+        "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
+        "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
+        "seveninferior", "eightinferior", "nineinferior", "centinferior",
+        "dollarinferior", "periodinferior", "commainferior"),
+] # }}}
+
--- a/src/calibre/utils/fonts/sfnt/cff/dict_data.py
+++ b/src/calibre/utils/fonts/sfnt/cff/dict_data.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-from struct import unpack
+from struct import unpack, pack

 t1_operand_encoding = [None] * 256
 t1_operand_encoding[0:32] = (32) * ["do_operator"]
@ -27,8 +27,9 @@ cff_dict_operand_encoding[255] = "reserved"

 real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
        '.', 'E', 'E-', None, '-']
+real_nibbles_map = {x:i for i, x in enumerate(real_nibbles)}

-class Reader(dict):
+class ByteCode(dict):

    def read_byte(self, b0, data, index):
        return b0 - 139, index
@ -71,13 +72,62 @@ class Reader(dict):
            number = number + real_nibbles[nibble1]
        return float(number), index

-class Dict(Reader):
+    def write_float(self, f, encoding='ignored'):
+        s = type(u'')(f).upper()
+        if s[:2] == "0.":
+            s = s[1:]
+        elif s[:3] == "-0.":
+            s = "-" + s[2:]
+        nibbles = []
+        while s:
+            c = s[0]
+            s = s[1:]
+            if c == "E" and s[:1] == "-":
+                s = s[1:]
+                c = "E-"
+            nibbles.append(real_nibbles_map[c])
+        nibbles.append(0xf)
+        if len(nibbles) % 2:
+            nibbles.append(0xf)
+        d = bytearray([30])
+        for i in xrange(0, len(nibbles), 2):
+            d.append(nibbles[i] << 4 | nibbles[i+1])
+        return bytes(d)
+
+    def write_int(self, value, encoding="cff"):
+        four_byte_op = {'cff':29, 't1':255}.get(encoding, None)
+
+		if -107 <= value <= 107:
+			code = bytes(bytearray([value + 139]))
+		elif 108 <= value <= 1131:
+			value = value - 108
+			code = bytes(bytearray([(value >> 8) + 247, (value & 0xFF)]))
+		elif -1131 <= value <= -108:
+			value = -value - 108
+			code = bytes(bytearray([(value >> 8) + 251, (value & 0xFF)]))
+		elif four_byte_op is None:
+			# T2 only supports 2 byte ints
+            code = bytes(bytearray([28])) + pack(b">h", value)
+		else:
+			code = bytes(bytearray([four_byte_op])) + pack(b">l", value)
+		return code
+
+    def write_offset(self, value):
+        return bytes(bytearray([29])) + pack(b">l", value)
+
+    def write_number(self, value, encoding="cff"):
+        f = self.write_float if isinstance(value, float) else self.write_int
+        return f(value, encoding)
+
+class Dict(ByteCode):

    operand_encoding = cff_dict_operand_encoding
-    TABLE = []
+    TABLE = ()
+    FILTERED = frozenset()
+    OFFSETS = frozenset()

    def __init__(self):
-        Reader.__init__(self)
+        ByteCode.__init__(self)

        self.operators = {op:(name, arg) for op, name, arg, default in
                self.TABLE}
@ -141,9 +191,53 @@ class Dict(Reader):
        del self.stack[:]
 		return out

+    def compile(self, strings):
+        data = []
+        for op, name, arg, default in self.TABLE:
+            if name in self.FILTERED:
+                continue
+            val = self.safe_get(name)
+            opcode = bytes(bytearray(op if isinstance(op, tuple) else [op]))
+            if val != self.defaults[name]:
+                self.encoding_offset = name in self.OFFSETS
+                if isinstance(arg, tuple):
+                    if len(val) != len(arg):
+                        raise ValueError('Invalid argument %s for operator: %s'
+                                %(val, op))
+                    for typ, v in zip(arg, val):
+                        if typ == 'SID':
+                            val = strings(val)
+                        data.append(getattr(self, 'encode_'+typ)(v))
+                else:
+                    if arg == 'SID':
+                        val = strings(val)
+                    data.append(getattr(self, 'encode_'+arg)(val))
+                data.append(opcode)
+        self.raw = b''.join(data)
+        return self.raw
+
+    def encode_number(self, val):
+        if self.encoding_offset:
+            return self.write_offset(val)
+        return self.write_number(val)
+
+    def encode_SID(self, val):
+        return self.write_int(val)
+
+    def encode_array(self, val):
+        return b''.join(map(self.encode_number, val))
+
+	def encode_delta(self, value):
+		out = []
+		last = 0
+		for v in value:
+			out.append(v - last)
+			last = v
+        return self.encode_array(out)
+
 class TopDict(Dict):

-    TABLE = [
+    TABLE = (
 	#opcode     name                  argument type   default
 	((12, 30), 'ROS',        ('SID','SID','number'), None,      ),
 	((12, 20), 'SyntheticBase',      'number',       None,      ),
@ -179,12 +273,18 @@ class TopDict(Dict):
 	((12, 37), 'FDSelect',           'number',       None,      ),
 	((12, 36), 'FDArray',            'number',       None,      ),
 	(17,       'CharStrings',        'number',       None,      ),
-    ]
+    )
+
+    # We will not write these operators out
+    FILTERED = {'ROS', 'SyntheticBase', 'UniqueID', 'XUID',
+            'CIDFontVersion', 'CIDFontRevision', 'CIDFontType', 'CIDCount',
+            'UIDBase', 'Encoding', 'FDSelect', 'FDArray'}
+    OFFSETS = {'charset', 'Encoding', 'CharStrings', 'Private'}

 class PrivateDict(Dict):

-    TABLE = [
-#	opcode     name                  argument type   default
+    TABLE = (
+    #	opcode     name                  argument type   default
 	(6,        'BlueValues',         'delta',        None,      ),
 	(7,        'OtherBlues',         'delta',        None,      ),
 	(8,        'FamilyBlues',        'delta',        None,      ),
@ -205,5 +305,7 @@ class PrivateDict(Dict):
 	(20,       'defaultWidthX',      'number',       0,         ),
 	(21,       'nominalWidthX',      'number',       0,         ),
 	(19,       'Subrs',              'number',       None,      ),
-    ]
+    )
+
+    OFFSETS = {'Subrs'}

--- a/src/calibre/utils/fonts/sfnt/cff/table.py
+++ b/src/calibre/utils/fonts/sfnt/cff/table.py
@ -11,8 +11,10 @@ from struct import unpack_from, unpack, calcsize
 from functools import partial

 from calibre.utils.fonts.sfnt import UnknownTable
-from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
 from calibre.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict
+from calibre.utils.fonts.sfnt.cff.constants import (cff_standard_strings,
+        STANDARD_CHARSETS)

 # Useful links
 # http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
@ -81,9 +83,9 @@ class CFF(object):
        self.charset = Charset(raw, self.top_dict.safe_get('charset'),
                self.strings, self.num_glyphs, self.is_CID)

-        import pprint
-        pprint.pprint(self.top_dict)
-        pprint.pprint(self.private_dict)
+        # import pprint
+        # pprint.pprint(self.top_dict)
+        # pprint.pprint(self.private_dict)

 class Index(list):

@ -125,107 +127,6 @@ class Strings(Index):

 class Charset(list):

-    STANDARD_CHARSETS = [ # {{{
-    # ISOAdobe
-    (".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
-        "percent", "ampersand", "quoteright", "parenleft", "parenright",
-        "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
-        "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
-        "colon", "semicolon", "less", "equal", "greater", "question", "at",
-        "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
-        "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
-        "bracketleft", "backslash", "bracketright", "asciicircum",
-        "underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
-        "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
-        "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
-        "exclamdown", "cent", "sterling", "fraction", "yen", "florin",
-        "section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
-        "guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
-        "daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
-        "quotedblbase", "quotedblright", "guillemotright", "ellipsis",
-        "perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
-        "macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
-        "hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
-        "Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
-        "oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
-        "trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
-        "divide", "brokenbar", "degree", "thorn", "threequarters",
-        "twosuperior", "registered", "minus", "eth", "multiply",
-        "threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
-        "Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
-        "Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
-        "Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
-        "Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
-        "Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
-        "aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
-        "egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
-        "oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
-        "uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
-        "zcaron"),
-
-    # Expert
-    ("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
-        "dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
-        "parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
-        "hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
-        "twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
-        "sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
-        "colon", "semicolon", "commasuperior", "threequartersemdash",
-        "periodsuperior", "questionsmall", "asuperior", "bsuperior",
-        "centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
-        "msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
-        "tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
-        "parenrightinferior", "Circumflexsmall", "hyphensuperior",
-        "Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
-        "Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
-        "Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
-        "Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
-        "colonmonetary", "onefitted", "rupiah", "Tildesmall",
-        "exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
-        "Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
-        "Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
-        "Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
-        "threequarters", "questiondownsmall", "oneeighth", "threeeighths",
-        "fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
-        "onesuperior", "twosuperior", "threesuperior", "foursuperior",
-        "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
-        "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
-        "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
-        "seveninferior", "eightinferior", "nineinferior", "centinferior",
-        "dollarinferior", "periodinferior", "commainferior", "Agravesmall",
-        "Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
-        "Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
-        "Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
-        "Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
-        "Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
-        "Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
-        "Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
-        "Thornsmall", "Ydieresissmall"),
-
-    # Expert Subset
-    (".notdef", "space", "dollaroldstyle", "dollarsuperior",
-            "parenleftsuperior", "parenrightsuperior", "twodotenleader",
-            "onedotenleader", "comma", "hyphen", "period", "fraction",
-            "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
-            "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
-            "eightoldstyle", "nineoldstyle", "colon", "semicolon",
-            "commasuperior", "threequartersemdash", "periodsuperior",
-            "asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
-            "isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
-            "rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
-            "ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
-            "colonmonetary", "onefitted", "rupiah", "centoldstyle",
-            "figuredash", "hypheninferior", "onequarter", "onehalf",
-            "threequarters", "oneeighth", "threeeighths", "fiveeighths",
-            "seveneighths", "onethird", "twothirds", "zerosuperior",
-            "onesuperior", "twosuperior", "threesuperior", "foursuperior",
-            "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
-            "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
-            "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
-            "seveninferior", "eightinferior", "nineinferior", "centinferior",
-            "dollarinferior", "periodinferior", "commainferior"),
-    ] # }}}
-
    def __init__(self, raw, offset, strings, num_glyphs, is_CID):
        super(Charset, self).__init__()
        self.standard_charset = offset if offset in {0, 1, 2} else None
@ -266,7 +167,7 @@ class Charset(list):
    def lookup(self, glyph_id):
        if self.standard_charset is None:
            return self[glyph_id]
-        return self.STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
+        return STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')

    def safe_lookup(self, glyph_id):
        try:
@ -293,6 +194,8 @@ class CFFTable(UnknownTable):
                glyph_id in character_map.iteritems()}
        charset = set(charset_map.itervalues())
        charset.discard(None)
+        if not charset:
+            raise NoGlyphs('This font has no glyphs for the specified characters')
        s = Subset(self.cff, charset)

        # Rebuild character_map with the glyph ids from the subset font
@ -302,74 +205,10 @@ class CFFTable(UnknownTable):
            if glyph_id:
                character_map[code] = glyph_id

-# cff_standard_strings {{{
-# The 391 Standard Strings as used in the CFF format.
-# from Adobe Technical None #5176, version 1.0, 18 March 1998
+        # Check that raw is parseable
+        CFF(s.raw)
+
+        self.raw = s.raw
+

-cff_standard_strings = [
-'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
-'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
-'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
-'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
-'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
-'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
-'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
-'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
-'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
-'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
-'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
-'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
-'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
-'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
-'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
-'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
-'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
-'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
-'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
-'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
-'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
-'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
-'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
-'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
-'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
-'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
-'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
-'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
-'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
-'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
-'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
-'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
-'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
-'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
-'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
-'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
-'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
-'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
-'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
-'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
-'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
-'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
-'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
-'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
-'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
-'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
-'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
-'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
-'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
-'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
-'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
-'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
-'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
-'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
-'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
-'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
-'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
-'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
-'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
-'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
-'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
-'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
-'Semibold'
-]
-# }}}

--- a/src/calibre/utils/fonts/sfnt/cff/writer.py
+++ b/src/calibre/utils/fonts/sfnt/cff/writer.py
@ -10,6 +10,8 @@ __docformat__ = 'restructuredtext en'
 from struct import pack
 from collections import OrderedDict

+from calibre.utils.fonts.sfnt.cff.constants import cff_standard_strings
+
 class Index(list):

    def __init__(self):
@ -40,12 +42,66 @@ class Index(list):
                offsets = b''.join(pack(b'>L', x)[1:] for x in offsets)
            else:
                fmt = {1:'B', 2:'H', 4:'L'}[offsize]
-                offsets = pack( ('>%d%s'%(len(self), fmt)).encode('ascii'),
+                offsets = pack( ('>%d%s'%(len(offsets), fmt)).encode('ascii'),
                        *offsets)

            self.raw = prefix + offsets + obj_data
        return self.raw

+class Strings(Index):
+
+    def __init__(self):
+        Index.__init__(self)
+        self.added = {x:i for i, x in enumerate(cff_standard_strings)}
+
+    def __call__(self, x):
+        ans = self.added.get(x, None)
+        if ans is None:
+            ans = len(self) + len(cff_standard_strings)
+            self.added[x] = ans
+            self.append(x)
+        return ans
+
+class Dict(Index):
+
+    def __init__(self, src, strings):
+        Index.__init__(self)
+        self.src, self.strings = src, strings
+
+    def compile(self):
+        self[:] = [self.src.compile(self.strings)]
+        Index.compile(self)
+
+class PrivateDict(object):
+
+    def __init__(self, src, subrs, strings):
+        self.src, self.strings = src, strings
+        self.subrs = None
+        if subrs is not None:
+            self.subrs = Index()
+            self.subrs.extend(subrs)
+            self.subrs.compile()
+
+    def compile(self):
+        raw = self.src.compile(self.strings)
+        if self.subrs is not None:
+            self.src['Subrs'] = len(raw)
+            raw = self.src.compile(self.strings)
+        self.raw = raw
+        return raw
+
+class Charsets(list):
+
+    def __init__(self, strings):
+        list.__init__(self)
+        self.strings = strings
+
+    def compile(self):
+        ans = pack(b'>B', 0)
+        sids = [self.strings(x) for x in self]
+        ans += pack(('>%dH'%len(self)).encode('ascii'), *sids)
+        self.raw = ans
+        return ans

 class Subset(object):

@ -53,19 +109,77 @@ class Subset(object):
        self.cff = cff
        self.keep_charnames = keep_charnames

+        header = pack(b'>4B', 1, 0, 4, cff.offset_size)
+
        # Font names Index
        font_names = Index()
        font_names.extend(self.cff.font_names)

-        # CharStrings Index
+        # Strings Index
+        strings = Strings()
+
+        # CharStrings Index and charsets
        char_strings = Index()
        self.charname_map = OrderedDict()
+        charsets = Charsets(strings)

        for i in xrange(self.cff.num_glyphs):
            cname = self.cff.charset.safe_lookup(i)
            if cname in keep_charnames:
                char_strings.append(self.cff.char_strings[i])
                self.charname_map[cname] = i
+            if i > 0: # .notdef is not included
+                charsets.append(cname)

+        # Add the strings
        char_strings.compile()
+        charsets.compile()
+
+        # Global subroutines
+        global_subrs = Index()
+        global_subrs.extend(cff.global_subrs)
+        global_subrs.compile()
+
+        # TOP DICT
+        top_dict = Dict(cff.top_dict, strings)
+        top_dict.compile() # Add strings
+
+        private_dict = None
+        if cff.private_dict is not None:
+            private_dict = PrivateDict(cff.private_dict, cff.private_subrs,
+                    strings)
+            private_dict.compile() # Add strings
+
+        fixed_prefix = header + font_names.compile()
+
+        t = top_dict.src
+        # Put in dummy offsets
+        t['charset'] = 1
+        t['CharStrings'] = 1
+        if private_dict is not None:
+            t['Private'] = (len(private_dict.raw), 1)
+        top_dict.compile()
+
+        strings.compile()
+
+        # Calculate real offsets
+        pos = len(fixed_prefix)
+        pos += len(top_dict.raw)
+        pos += len(strings.raw)
+        pos += len(global_subrs.raw)
+        t['charset'] = pos
+        pos += len(charsets.raw)
+        t['CharStrings'] = pos
+        pos += len(char_strings.raw)
+        if private_dict is not None:
+            t['Private'] = (len(private_dict.raw), pos)
+        top_dict.compile()
+
+        self.raw = (fixed_prefix + top_dict.raw + strings.raw +
+                global_subrs.raw + charsets.raw + char_strings.raw)
+        if private_dict is not None:
+            self.raw += private_dict.raw
+            if private_dict.subrs is not None:
+                self.raw += private_dict.subrs.raw
+

--- a/src/calibre/utils/fonts/sfnt/subset.py
+++ b/src/calibre/utils/fonts/sfnt/subset.py
@ -70,7 +70,6 @@ def subset_postscript(sfnt, character_map):
    cff = sfnt[b'CFF ']
    cff.decompile()
    cff.subset(character_map)
-    raise Exception('TODO: Implement CFF subsetting')

 def subset(raw, individual_chars, ranges=()):
    chars = list(map(ord, individual_chars))