More work on CFF subsetting. The subset table is written but character mapping is off

This commit is contained in:
Kovid Goyal 2012-11-09 11:58:02 +05:30
parent ac0605ae04
commit 98f4cbb7e3
5 changed files with 425 additions and 188 deletions

View File

@ -0,0 +1,183 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
# cff_standard_strings {{{
# The 391 Standard Strings as used in the CFF format.
# from Adobe Technical None #5176, version 1.0, 18 March 1998
cff_standard_strings = [
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
'Semibold'
]
# }}}
STANDARD_CHARSETS = [ # {{{
# ISOAdobe
(".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
"percent", "ampersand", "quoteright", "parenleft", "parenright",
"asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
"colon", "semicolon", "less", "equal", "greater", "question", "at",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"bracketleft", "backslash", "bracketright", "asciicircum",
"underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
"j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
"exclamdown", "cent", "sterling", "fraction", "yen", "florin",
"section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
"guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
"daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
"quotedblbase", "quotedblright", "guillemotright", "ellipsis",
"perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
"macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
"hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
"Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
"oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
"trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
"divide", "brokenbar", "degree", "thorn", "threequarters",
"twosuperior", "registered", "minus", "eth", "multiply",
"threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
"Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
"Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
"Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
"Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
"Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
"aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
"egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
"oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
"uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
"zcaron"),
# Expert
("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
"dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
"parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
"hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
"twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
"sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
"colon", "semicolon", "commasuperior", "threequartersemdash",
"periodsuperior", "questionsmall", "asuperior", "bsuperior",
"centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
"msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
"tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
"parenrightinferior", "Circumflexsmall", "hyphensuperior",
"Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
"Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
"Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
"Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
"colonmonetary", "onefitted", "rupiah", "Tildesmall",
"exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
"Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
"Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
"Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
"threequarters", "questiondownsmall", "oneeighth", "threeeighths",
"fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior", "Agravesmall",
"Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
"Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
"Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
"Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
"Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
"Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
"Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
"Thornsmall", "Ydieresissmall"),
# Expert Subset
(".notdef", "space", "dollaroldstyle", "dollarsuperior",
"parenleftsuperior", "parenrightsuperior", "twodotenleader",
"onedotenleader", "comma", "hyphen", "period", "fraction",
"zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
"fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
"eightoldstyle", "nineoldstyle", "colon", "semicolon",
"commasuperior", "threequartersemdash", "periodsuperior",
"asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
"isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
"rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
"ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
"colonmonetary", "onefitted", "rupiah", "centoldstyle",
"figuredash", "hypheninferior", "onequarter", "onehalf",
"threequarters", "oneeighth", "threeeighths", "fiveeighths",
"seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior"),
] # }}}

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack
from struct import unpack, pack
t1_operand_encoding = [None] * 256
t1_operand_encoding[0:32] = (32) * ["do_operator"]
@ -27,8 +27,9 @@ cff_dict_operand_encoding[255] = "reserved"
real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'.', 'E', 'E-', None, '-']
real_nibbles_map = {x:i for i, x in enumerate(real_nibbles)}
class Reader(dict):
class ByteCode(dict):
def read_byte(self, b0, data, index):
return b0 - 139, index
@ -71,13 +72,62 @@ class Reader(dict):
number = number + real_nibbles[nibble1]
return float(number), index
class Dict(Reader):
def write_float(self, f, encoding='ignored'):
s = type(u'')(f).upper()
if s[:2] == "0.":
s = s[1:]
elif s[:3] == "-0.":
s = "-" + s[2:]
nibbles = []
while s:
c = s[0]
s = s[1:]
if c == "E" and s[:1] == "-":
s = s[1:]
c = "E-"
nibbles.append(real_nibbles_map[c])
nibbles.append(0xf)
if len(nibbles) % 2:
nibbles.append(0xf)
d = bytearray([30])
for i in xrange(0, len(nibbles), 2):
d.append(nibbles[i] << 4 | nibbles[i+1])
return bytes(d)
def write_int(self, value, encoding="cff"):
four_byte_op = {'cff':29, 't1':255}.get(encoding, None)
if -107 <= value <= 107:
code = bytes(bytearray([value + 139]))
elif 108 <= value <= 1131:
value = value - 108
code = bytes(bytearray([(value >> 8) + 247, (value & 0xFF)]))
elif -1131 <= value <= -108:
value = -value - 108
code = bytes(bytearray([(value >> 8) + 251, (value & 0xFF)]))
elif four_byte_op is None:
# T2 only supports 2 byte ints
code = bytes(bytearray([28])) + pack(b">h", value)
else:
code = bytes(bytearray([four_byte_op])) + pack(b">l", value)
return code
def write_offset(self, value):
return bytes(bytearray([29])) + pack(b">l", value)
def write_number(self, value, encoding="cff"):
f = self.write_float if isinstance(value, float) else self.write_int
return f(value, encoding)
class Dict(ByteCode):
operand_encoding = cff_dict_operand_encoding
TABLE = []
TABLE = ()
FILTERED = frozenset()
OFFSETS = frozenset()
def __init__(self):
Reader.__init__(self)
ByteCode.__init__(self)
self.operators = {op:(name, arg) for op, name, arg, default in
self.TABLE}
@ -141,9 +191,53 @@ class Dict(Reader):
del self.stack[:]
return out
def compile(self, strings):
data = []
for op, name, arg, default in self.TABLE:
if name in self.FILTERED:
continue
val = self.safe_get(name)
opcode = bytes(bytearray(op if isinstance(op, tuple) else [op]))
if val != self.defaults[name]:
self.encoding_offset = name in self.OFFSETS
if isinstance(arg, tuple):
if len(val) != len(arg):
raise ValueError('Invalid argument %s for operator: %s'
%(val, op))
for typ, v in zip(arg, val):
if typ == 'SID':
val = strings(val)
data.append(getattr(self, 'encode_'+typ)(v))
else:
if arg == 'SID':
val = strings(val)
data.append(getattr(self, 'encode_'+arg)(val))
data.append(opcode)
self.raw = b''.join(data)
return self.raw
def encode_number(self, val):
if self.encoding_offset:
return self.write_offset(val)
return self.write_number(val)
def encode_SID(self, val):
return self.write_int(val)
def encode_array(self, val):
return b''.join(map(self.encode_number, val))
def encode_delta(self, value):
out = []
last = 0
for v in value:
out.append(v - last)
last = v
return self.encode_array(out)
class TopDict(Dict):
TABLE = [
TABLE = (
#opcode name argument type default
((12, 30), 'ROS', ('SID','SID','number'), None, ),
((12, 20), 'SyntheticBase', 'number', None, ),
@ -179,12 +273,18 @@ class TopDict(Dict):
((12, 37), 'FDSelect', 'number', None, ),
((12, 36), 'FDArray', 'number', None, ),
(17, 'CharStrings', 'number', None, ),
]
)
# We will not write these operators out
FILTERED = {'ROS', 'SyntheticBase', 'UniqueID', 'XUID',
'CIDFontVersion', 'CIDFontRevision', 'CIDFontType', 'CIDCount',
'UIDBase', 'Encoding', 'FDSelect', 'FDArray'}
OFFSETS = {'charset', 'Encoding', 'CharStrings', 'Private'}
class PrivateDict(Dict):
TABLE = [
# opcode name argument type default
TABLE = (
# opcode name argument type default
(6, 'BlueValues', 'delta', None, ),
(7, 'OtherBlues', 'delta', None, ),
(8, 'FamilyBlues', 'delta', None, ),
@ -205,5 +305,7 @@ class PrivateDict(Dict):
(20, 'defaultWidthX', 'number', 0, ),
(21, 'nominalWidthX', 'number', 0, ),
(19, 'Subrs', 'number', None, ),
]
)
OFFSETS = {'Subrs'}

View File

@ -11,8 +11,10 @@ from struct import unpack_from, unpack, calcsize
from functools import partial
from calibre.utils.fonts.sfnt import UnknownTable
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
from calibre.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict
from calibre.utils.fonts.sfnt.cff.constants import (cff_standard_strings,
STANDARD_CHARSETS)
# Useful links
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
@ -81,9 +83,9 @@ class CFF(object):
self.charset = Charset(raw, self.top_dict.safe_get('charset'),
self.strings, self.num_glyphs, self.is_CID)
import pprint
pprint.pprint(self.top_dict)
pprint.pprint(self.private_dict)
# import pprint
# pprint.pprint(self.top_dict)
# pprint.pprint(self.private_dict)
class Index(list):
@ -125,107 +127,6 @@ class Strings(Index):
class Charset(list):
STANDARD_CHARSETS = [ # {{{
# ISOAdobe
(".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
"percent", "ampersand", "quoteright", "parenleft", "parenright",
"asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
"colon", "semicolon", "less", "equal", "greater", "question", "at",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"bracketleft", "backslash", "bracketright", "asciicircum",
"underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
"j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
"exclamdown", "cent", "sterling", "fraction", "yen", "florin",
"section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
"guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
"daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
"quotedblbase", "quotedblright", "guillemotright", "ellipsis",
"perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
"macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
"hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
"Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
"oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
"trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
"divide", "brokenbar", "degree", "thorn", "threequarters",
"twosuperior", "registered", "minus", "eth", "multiply",
"threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
"Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
"Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
"Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
"Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
"Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
"aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
"egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
"oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
"uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
"zcaron"),
# Expert
("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
"dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
"parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
"hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
"twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
"sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
"colon", "semicolon", "commasuperior", "threequartersemdash",
"periodsuperior", "questionsmall", "asuperior", "bsuperior",
"centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
"msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
"tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
"parenrightinferior", "Circumflexsmall", "hyphensuperior",
"Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
"Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
"Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
"Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
"colonmonetary", "onefitted", "rupiah", "Tildesmall",
"exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
"Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
"Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
"Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
"threequarters", "questiondownsmall", "oneeighth", "threeeighths",
"fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior", "Agravesmall",
"Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
"Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
"Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
"Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
"Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
"Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
"Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
"Thornsmall", "Ydieresissmall"),
# Expert Subset
(".notdef", "space", "dollaroldstyle", "dollarsuperior",
"parenleftsuperior", "parenrightsuperior", "twodotenleader",
"onedotenleader", "comma", "hyphen", "period", "fraction",
"zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
"fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
"eightoldstyle", "nineoldstyle", "colon", "semicolon",
"commasuperior", "threequartersemdash", "periodsuperior",
"asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
"isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
"rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
"ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
"colonmonetary", "onefitted", "rupiah", "centoldstyle",
"figuredash", "hypheninferior", "onequarter", "onehalf",
"threequarters", "oneeighth", "threeeighths", "fiveeighths",
"seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior"),
] # }}}
def __init__(self, raw, offset, strings, num_glyphs, is_CID):
super(Charset, self).__init__()
self.standard_charset = offset if offset in {0, 1, 2} else None
@ -266,7 +167,7 @@ class Charset(list):
def lookup(self, glyph_id):
if self.standard_charset is None:
return self[glyph_id]
return self.STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
return STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
def safe_lookup(self, glyph_id):
try:
@ -293,6 +194,8 @@ class CFFTable(UnknownTable):
glyph_id in character_map.iteritems()}
charset = set(charset_map.itervalues())
charset.discard(None)
if not charset:
raise NoGlyphs('This font has no glyphs for the specified characters')
s = Subset(self.cff, charset)
# Rebuild character_map with the glyph ids from the subset font
@ -302,74 +205,10 @@ class CFFTable(UnknownTable):
if glyph_id:
character_map[code] = glyph_id
# cff_standard_strings {{{
# The 391 Standard Strings as used in the CFF format.
# from Adobe Technical None #5176, version 1.0, 18 March 1998
# Check that raw is parseable
CFF(s.raw)
self.raw = s.raw
cff_standard_strings = [
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
'Semibold'
]
# }}}

View File

@ -10,6 +10,8 @@ __docformat__ = 'restructuredtext en'
from struct import pack
from collections import OrderedDict
from calibre.utils.fonts.sfnt.cff.constants import cff_standard_strings
class Index(list):
def __init__(self):
@ -40,12 +42,66 @@ class Index(list):
offsets = b''.join(pack(b'>L', x)[1:] for x in offsets)
else:
fmt = {1:'B', 2:'H', 4:'L'}[offsize]
offsets = pack( ('>%d%s'%(len(self), fmt)).encode('ascii'),
offsets = pack( ('>%d%s'%(len(offsets), fmt)).encode('ascii'),
*offsets)
self.raw = prefix + offsets + obj_data
return self.raw
class Strings(Index):
def __init__(self):
Index.__init__(self)
self.added = {x:i for i, x in enumerate(cff_standard_strings)}
def __call__(self, x):
ans = self.added.get(x, None)
if ans is None:
ans = len(self) + len(cff_standard_strings)
self.added[x] = ans
self.append(x)
return ans
class Dict(Index):
def __init__(self, src, strings):
Index.__init__(self)
self.src, self.strings = src, strings
def compile(self):
self[:] = [self.src.compile(self.strings)]
Index.compile(self)
class PrivateDict(object):
def __init__(self, src, subrs, strings):
self.src, self.strings = src, strings
self.subrs = None
if subrs is not None:
self.subrs = Index()
self.subrs.extend(subrs)
self.subrs.compile()
def compile(self):
raw = self.src.compile(self.strings)
if self.subrs is not None:
self.src['Subrs'] = len(raw)
raw = self.src.compile(self.strings)
self.raw = raw
return raw
class Charsets(list):
def __init__(self, strings):
list.__init__(self)
self.strings = strings
def compile(self):
ans = pack(b'>B', 0)
sids = [self.strings(x) for x in self]
ans += pack(('>%dH'%len(self)).encode('ascii'), *sids)
self.raw = ans
return ans
class Subset(object):
@ -53,19 +109,77 @@ class Subset(object):
self.cff = cff
self.keep_charnames = keep_charnames
header = pack(b'>4B', 1, 0, 4, cff.offset_size)
# Font names Index
font_names = Index()
font_names.extend(self.cff.font_names)
# CharStrings Index
# Strings Index
strings = Strings()
# CharStrings Index and charsets
char_strings = Index()
self.charname_map = OrderedDict()
charsets = Charsets(strings)
for i in xrange(self.cff.num_glyphs):
cname = self.cff.charset.safe_lookup(i)
if cname in keep_charnames:
char_strings.append(self.cff.char_strings[i])
self.charname_map[cname] = i
if i > 0: # .notdef is not included
charsets.append(cname)
# Add the strings
char_strings.compile()
charsets.compile()
# Global subroutines
global_subrs = Index()
global_subrs.extend(cff.global_subrs)
global_subrs.compile()
# TOP DICT
top_dict = Dict(cff.top_dict, strings)
top_dict.compile() # Add strings
private_dict = None
if cff.private_dict is not None:
private_dict = PrivateDict(cff.private_dict, cff.private_subrs,
strings)
private_dict.compile() # Add strings
fixed_prefix = header + font_names.compile()
t = top_dict.src
# Put in dummy offsets
t['charset'] = 1
t['CharStrings'] = 1
if private_dict is not None:
t['Private'] = (len(private_dict.raw), 1)
top_dict.compile()
strings.compile()
# Calculate real offsets
pos = len(fixed_prefix)
pos += len(top_dict.raw)
pos += len(strings.raw)
pos += len(global_subrs.raw)
t['charset'] = pos
pos += len(charsets.raw)
t['CharStrings'] = pos
pos += len(char_strings.raw)
if private_dict is not None:
t['Private'] = (len(private_dict.raw), pos)
top_dict.compile()
self.raw = (fixed_prefix + top_dict.raw + strings.raw +
global_subrs.raw + charsets.raw + char_strings.raw)
if private_dict is not None:
self.raw += private_dict.raw
if private_dict.subrs is not None:
self.raw += private_dict.subrs.raw

View File

@ -70,7 +70,6 @@ def subset_postscript(sfnt, character_map):
cff = sfnt[b'CFF ']
cff.decompile()
cff.subset(character_map)
raise Exception('TODO: Implement CFF subsetting')
def subset(raw, individual_chars, ranges=()):
chars = list(map(ord, individual_chars))