More work on CFF subsetting. The subset table is written but character mapping is off

This commit is contained in:
Kovid Goyal 2012-11-09 11:58:02 +05:30
parent ac0605ae04
commit 98f4cbb7e3
5 changed files with 425 additions and 188 deletions

View File

@ -0,0 +1,183 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
# cff_standard_strings {{{
# The 391 Standard Strings as used in the CFF format.
# from Adobe Technical None #5176, version 1.0, 18 March 1998
cff_standard_strings = [
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
'Semibold'
]
# }}}
STANDARD_CHARSETS = [ # {{{
# ISOAdobe
(".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
"percent", "ampersand", "quoteright", "parenleft", "parenright",
"asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
"colon", "semicolon", "less", "equal", "greater", "question", "at",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"bracketleft", "backslash", "bracketright", "asciicircum",
"underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
"j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
"exclamdown", "cent", "sterling", "fraction", "yen", "florin",
"section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
"guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
"daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
"quotedblbase", "quotedblright", "guillemotright", "ellipsis",
"perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
"macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
"hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
"Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
"oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
"trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
"divide", "brokenbar", "degree", "thorn", "threequarters",
"twosuperior", "registered", "minus", "eth", "multiply",
"threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
"Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
"Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
"Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
"Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
"Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
"aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
"egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
"oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
"uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
"zcaron"),
# Expert
("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
"dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
"parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
"hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
"twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
"sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
"colon", "semicolon", "commasuperior", "threequartersemdash",
"periodsuperior", "questionsmall", "asuperior", "bsuperior",
"centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
"msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
"tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
"parenrightinferior", "Circumflexsmall", "hyphensuperior",
"Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
"Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
"Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
"Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
"colonmonetary", "onefitted", "rupiah", "Tildesmall",
"exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
"Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
"Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
"Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
"threequarters", "questiondownsmall", "oneeighth", "threeeighths",
"fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior", "Agravesmall",
"Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
"Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
"Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
"Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
"Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
"Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
"Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
"Thornsmall", "Ydieresissmall"),
# Expert Subset
(".notdef", "space", "dollaroldstyle", "dollarsuperior",
"parenleftsuperior", "parenrightsuperior", "twodotenleader",
"onedotenleader", "comma", "hyphen", "period", "fraction",
"zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
"fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
"eightoldstyle", "nineoldstyle", "colon", "semicolon",
"commasuperior", "threequartersemdash", "periodsuperior",
"asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
"isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
"rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
"ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
"colonmonetary", "onefitted", "rupiah", "centoldstyle",
"figuredash", "hypheninferior", "onequarter", "onehalf",
"threequarters", "oneeighth", "threeeighths", "fiveeighths",
"seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior"),
] # }}}

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from struct import unpack from struct import unpack, pack
t1_operand_encoding = [None] * 256 t1_operand_encoding = [None] * 256
t1_operand_encoding[0:32] = (32) * ["do_operator"] t1_operand_encoding[0:32] = (32) * ["do_operator"]
@ -27,8 +27,9 @@ cff_dict_operand_encoding[255] = "reserved"
real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'.', 'E', 'E-', None, '-'] '.', 'E', 'E-', None, '-']
real_nibbles_map = {x:i for i, x in enumerate(real_nibbles)}
class Reader(dict): class ByteCode(dict):
def read_byte(self, b0, data, index): def read_byte(self, b0, data, index):
return b0 - 139, index return b0 - 139, index
@ -71,13 +72,62 @@ class Reader(dict):
number = number + real_nibbles[nibble1] number = number + real_nibbles[nibble1]
return float(number), index return float(number), index
class Dict(Reader): def write_float(self, f, encoding='ignored'):
s = type(u'')(f).upper()
if s[:2] == "0.":
s = s[1:]
elif s[:3] == "-0.":
s = "-" + s[2:]
nibbles = []
while s:
c = s[0]
s = s[1:]
if c == "E" and s[:1] == "-":
s = s[1:]
c = "E-"
nibbles.append(real_nibbles_map[c])
nibbles.append(0xf)
if len(nibbles) % 2:
nibbles.append(0xf)
d = bytearray([30])
for i in xrange(0, len(nibbles), 2):
d.append(nibbles[i] << 4 | nibbles[i+1])
return bytes(d)
def write_int(self, value, encoding="cff"):
four_byte_op = {'cff':29, 't1':255}.get(encoding, None)
if -107 <= value <= 107:
code = bytes(bytearray([value + 139]))
elif 108 <= value <= 1131:
value = value - 108
code = bytes(bytearray([(value >> 8) + 247, (value & 0xFF)]))
elif -1131 <= value <= -108:
value = -value - 108
code = bytes(bytearray([(value >> 8) + 251, (value & 0xFF)]))
elif four_byte_op is None:
# T2 only supports 2 byte ints
code = bytes(bytearray([28])) + pack(b">h", value)
else:
code = bytes(bytearray([four_byte_op])) + pack(b">l", value)
return code
def write_offset(self, value):
return bytes(bytearray([29])) + pack(b">l", value)
def write_number(self, value, encoding="cff"):
f = self.write_float if isinstance(value, float) else self.write_int
return f(value, encoding)
class Dict(ByteCode):
operand_encoding = cff_dict_operand_encoding operand_encoding = cff_dict_operand_encoding
TABLE = [] TABLE = ()
FILTERED = frozenset()
OFFSETS = frozenset()
def __init__(self): def __init__(self):
Reader.__init__(self) ByteCode.__init__(self)
self.operators = {op:(name, arg) for op, name, arg, default in self.operators = {op:(name, arg) for op, name, arg, default in
self.TABLE} self.TABLE}
@ -141,9 +191,53 @@ class Dict(Reader):
del self.stack[:] del self.stack[:]
return out return out
def compile(self, strings):
data = []
for op, name, arg, default in self.TABLE:
if name in self.FILTERED:
continue
val = self.safe_get(name)
opcode = bytes(bytearray(op if isinstance(op, tuple) else [op]))
if val != self.defaults[name]:
self.encoding_offset = name in self.OFFSETS
if isinstance(arg, tuple):
if len(val) != len(arg):
raise ValueError('Invalid argument %s for operator: %s'
%(val, op))
for typ, v in zip(arg, val):
if typ == 'SID':
val = strings(val)
data.append(getattr(self, 'encode_'+typ)(v))
else:
if arg == 'SID':
val = strings(val)
data.append(getattr(self, 'encode_'+arg)(val))
data.append(opcode)
self.raw = b''.join(data)
return self.raw
def encode_number(self, val):
if self.encoding_offset:
return self.write_offset(val)
return self.write_number(val)
def encode_SID(self, val):
return self.write_int(val)
def encode_array(self, val):
return b''.join(map(self.encode_number, val))
def encode_delta(self, value):
out = []
last = 0
for v in value:
out.append(v - last)
last = v
return self.encode_array(out)
class TopDict(Dict): class TopDict(Dict):
TABLE = [ TABLE = (
#opcode name argument type default #opcode name argument type default
((12, 30), 'ROS', ('SID','SID','number'), None, ), ((12, 30), 'ROS', ('SID','SID','number'), None, ),
((12, 20), 'SyntheticBase', 'number', None, ), ((12, 20), 'SyntheticBase', 'number', None, ),
@ -179,11 +273,17 @@ class TopDict(Dict):
((12, 37), 'FDSelect', 'number', None, ), ((12, 37), 'FDSelect', 'number', None, ),
((12, 36), 'FDArray', 'number', None, ), ((12, 36), 'FDArray', 'number', None, ),
(17, 'CharStrings', 'number', None, ), (17, 'CharStrings', 'number', None, ),
] )
# We will not write these operators out
FILTERED = {'ROS', 'SyntheticBase', 'UniqueID', 'XUID',
'CIDFontVersion', 'CIDFontRevision', 'CIDFontType', 'CIDCount',
'UIDBase', 'Encoding', 'FDSelect', 'FDArray'}
OFFSETS = {'charset', 'Encoding', 'CharStrings', 'Private'}
class PrivateDict(Dict): class PrivateDict(Dict):
TABLE = [ TABLE = (
# opcode name argument type default # opcode name argument type default
(6, 'BlueValues', 'delta', None, ), (6, 'BlueValues', 'delta', None, ),
(7, 'OtherBlues', 'delta', None, ), (7, 'OtherBlues', 'delta', None, ),
@ -205,5 +305,7 @@ class PrivateDict(Dict):
(20, 'defaultWidthX', 'number', 0, ), (20, 'defaultWidthX', 'number', 0, ),
(21, 'nominalWidthX', 'number', 0, ), (21, 'nominalWidthX', 'number', 0, ),
(19, 'Subrs', 'number', None, ), (19, 'Subrs', 'number', None, ),
] )
OFFSETS = {'Subrs'}

View File

@ -11,8 +11,10 @@ from struct import unpack_from, unpack, calcsize
from functools import partial from functools import partial
from calibre.utils.fonts.sfnt import UnknownTable from calibre.utils.fonts.sfnt import UnknownTable
from calibre.utils.fonts.sfnt.errors import UnsupportedFont from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
from calibre.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict from calibre.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict
from calibre.utils.fonts.sfnt.cff.constants import (cff_standard_strings,
STANDARD_CHARSETS)
# Useful links # Useful links
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf # http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
@ -81,9 +83,9 @@ class CFF(object):
self.charset = Charset(raw, self.top_dict.safe_get('charset'), self.charset = Charset(raw, self.top_dict.safe_get('charset'),
self.strings, self.num_glyphs, self.is_CID) self.strings, self.num_glyphs, self.is_CID)
import pprint # import pprint
pprint.pprint(self.top_dict) # pprint.pprint(self.top_dict)
pprint.pprint(self.private_dict) # pprint.pprint(self.private_dict)
class Index(list): class Index(list):
@ -125,107 +127,6 @@ class Strings(Index):
class Charset(list): class Charset(list):
STANDARD_CHARSETS = [ # {{{
# ISOAdobe
(".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
"percent", "ampersand", "quoteright", "parenleft", "parenright",
"asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
"colon", "semicolon", "less", "equal", "greater", "question", "at",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"bracketleft", "backslash", "bracketright", "asciicircum",
"underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
"j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
"exclamdown", "cent", "sterling", "fraction", "yen", "florin",
"section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
"guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
"daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
"quotedblbase", "quotedblright", "guillemotright", "ellipsis",
"perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
"macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
"hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
"Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
"oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
"trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
"divide", "brokenbar", "degree", "thorn", "threequarters",
"twosuperior", "registered", "minus", "eth", "multiply",
"threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
"Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
"Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
"Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
"Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
"Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
"aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
"egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
"oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
"uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
"zcaron"),
# Expert
("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
"dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
"parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
"hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
"twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
"sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
"colon", "semicolon", "commasuperior", "threequartersemdash",
"periodsuperior", "questionsmall", "asuperior", "bsuperior",
"centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
"msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
"tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
"parenrightinferior", "Circumflexsmall", "hyphensuperior",
"Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
"Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
"Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
"Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
"colonmonetary", "onefitted", "rupiah", "Tildesmall",
"exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
"Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
"Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
"Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
"threequarters", "questiondownsmall", "oneeighth", "threeeighths",
"fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior", "Agravesmall",
"Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
"Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
"Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
"Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
"Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
"Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
"Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
"Thornsmall", "Ydieresissmall"),
# Expert Subset
(".notdef", "space", "dollaroldstyle", "dollarsuperior",
"parenleftsuperior", "parenrightsuperior", "twodotenleader",
"onedotenleader", "comma", "hyphen", "period", "fraction",
"zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
"fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
"eightoldstyle", "nineoldstyle", "colon", "semicolon",
"commasuperior", "threequartersemdash", "periodsuperior",
"asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
"isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
"rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
"ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
"colonmonetary", "onefitted", "rupiah", "centoldstyle",
"figuredash", "hypheninferior", "onequarter", "onehalf",
"threequarters", "oneeighth", "threeeighths", "fiveeighths",
"seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior"),
] # }}}
def __init__(self, raw, offset, strings, num_glyphs, is_CID): def __init__(self, raw, offset, strings, num_glyphs, is_CID):
super(Charset, self).__init__() super(Charset, self).__init__()
self.standard_charset = offset if offset in {0, 1, 2} else None self.standard_charset = offset if offset in {0, 1, 2} else None
@ -266,7 +167,7 @@ class Charset(list):
def lookup(self, glyph_id): def lookup(self, glyph_id):
if self.standard_charset is None: if self.standard_charset is None:
return self[glyph_id] return self[glyph_id]
return self.STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii') return STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
def safe_lookup(self, glyph_id): def safe_lookup(self, glyph_id):
try: try:
@ -293,6 +194,8 @@ class CFFTable(UnknownTable):
glyph_id in character_map.iteritems()} glyph_id in character_map.iteritems()}
charset = set(charset_map.itervalues()) charset = set(charset_map.itervalues())
charset.discard(None) charset.discard(None)
if not charset:
raise NoGlyphs('This font has no glyphs for the specified characters')
s = Subset(self.cff, charset) s = Subset(self.cff, charset)
# Rebuild character_map with the glyph ids from the subset font # Rebuild character_map with the glyph ids from the subset font
@ -302,74 +205,10 @@ class CFFTable(UnknownTable):
if glyph_id: if glyph_id:
character_map[code] = glyph_id character_map[code] = glyph_id
# cff_standard_strings {{{ # Check that raw is parseable
# The 391 Standard Strings as used in the CFF format. CFF(s.raw)
# from Adobe Technical None #5176, version 1.0, 18 March 1998
self.raw = s.raw
cff_standard_strings = [
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
'Semibold'
]
# }}}

View File

@ -10,6 +10,8 @@ __docformat__ = 'restructuredtext en'
from struct import pack from struct import pack
from collections import OrderedDict from collections import OrderedDict
from calibre.utils.fonts.sfnt.cff.constants import cff_standard_strings
class Index(list): class Index(list):
def __init__(self): def __init__(self):
@ -40,12 +42,66 @@ class Index(list):
offsets = b''.join(pack(b'>L', x)[1:] for x in offsets) offsets = b''.join(pack(b'>L', x)[1:] for x in offsets)
else: else:
fmt = {1:'B', 2:'H', 4:'L'}[offsize] fmt = {1:'B', 2:'H', 4:'L'}[offsize]
offsets = pack( ('>%d%s'%(len(self), fmt)).encode('ascii'), offsets = pack( ('>%d%s'%(len(offsets), fmt)).encode('ascii'),
*offsets) *offsets)
self.raw = prefix + offsets + obj_data self.raw = prefix + offsets + obj_data
return self.raw return self.raw
class Strings(Index):
def __init__(self):
Index.__init__(self)
self.added = {x:i for i, x in enumerate(cff_standard_strings)}
def __call__(self, x):
ans = self.added.get(x, None)
if ans is None:
ans = len(self) + len(cff_standard_strings)
self.added[x] = ans
self.append(x)
return ans
class Dict(Index):
def __init__(self, src, strings):
Index.__init__(self)
self.src, self.strings = src, strings
def compile(self):
self[:] = [self.src.compile(self.strings)]
Index.compile(self)
class PrivateDict(object):
def __init__(self, src, subrs, strings):
self.src, self.strings = src, strings
self.subrs = None
if subrs is not None:
self.subrs = Index()
self.subrs.extend(subrs)
self.subrs.compile()
def compile(self):
raw = self.src.compile(self.strings)
if self.subrs is not None:
self.src['Subrs'] = len(raw)
raw = self.src.compile(self.strings)
self.raw = raw
return raw
class Charsets(list):
def __init__(self, strings):
list.__init__(self)
self.strings = strings
def compile(self):
ans = pack(b'>B', 0)
sids = [self.strings(x) for x in self]
ans += pack(('>%dH'%len(self)).encode('ascii'), *sids)
self.raw = ans
return ans
class Subset(object): class Subset(object):
@ -53,19 +109,77 @@ class Subset(object):
self.cff = cff self.cff = cff
self.keep_charnames = keep_charnames self.keep_charnames = keep_charnames
header = pack(b'>4B', 1, 0, 4, cff.offset_size)
# Font names Index # Font names Index
font_names = Index() font_names = Index()
font_names.extend(self.cff.font_names) font_names.extend(self.cff.font_names)
# CharStrings Index # Strings Index
strings = Strings()
# CharStrings Index and charsets
char_strings = Index() char_strings = Index()
self.charname_map = OrderedDict() self.charname_map = OrderedDict()
charsets = Charsets(strings)
for i in xrange(self.cff.num_glyphs): for i in xrange(self.cff.num_glyphs):
cname = self.cff.charset.safe_lookup(i) cname = self.cff.charset.safe_lookup(i)
if cname in keep_charnames: if cname in keep_charnames:
char_strings.append(self.cff.char_strings[i]) char_strings.append(self.cff.char_strings[i])
self.charname_map[cname] = i self.charname_map[cname] = i
if i > 0: # .notdef is not included
charsets.append(cname)
# Add the strings
char_strings.compile() char_strings.compile()
charsets.compile()
# Global subroutines
global_subrs = Index()
global_subrs.extend(cff.global_subrs)
global_subrs.compile()
# TOP DICT
top_dict = Dict(cff.top_dict, strings)
top_dict.compile() # Add strings
private_dict = None
if cff.private_dict is not None:
private_dict = PrivateDict(cff.private_dict, cff.private_subrs,
strings)
private_dict.compile() # Add strings
fixed_prefix = header + font_names.compile()
t = top_dict.src
# Put in dummy offsets
t['charset'] = 1
t['CharStrings'] = 1
if private_dict is not None:
t['Private'] = (len(private_dict.raw), 1)
top_dict.compile()
strings.compile()
# Calculate real offsets
pos = len(fixed_prefix)
pos += len(top_dict.raw)
pos += len(strings.raw)
pos += len(global_subrs.raw)
t['charset'] = pos
pos += len(charsets.raw)
t['CharStrings'] = pos
pos += len(char_strings.raw)
if private_dict is not None:
t['Private'] = (len(private_dict.raw), pos)
top_dict.compile()
self.raw = (fixed_prefix + top_dict.raw + strings.raw +
global_subrs.raw + charsets.raw + char_strings.raw)
if private_dict is not None:
self.raw += private_dict.raw
if private_dict.subrs is not None:
self.raw += private_dict.subrs.raw

View File

@ -70,7 +70,6 @@ def subset_postscript(sfnt, character_map):
cff = sfnt[b'CFF '] cff = sfnt[b'CFF ']
cff.decompile() cff.decompile()
cff.subset(character_map) cff.subset(character_map)
raise Exception('TODO: Implement CFF subsetting')
def subset(raw, individual_chars, ranges=()): def subset(raw, individual_chars, ranges=()):
chars = list(map(ord, individual_chars)) chars = list(map(ord, individual_chars))