mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Start work on pure python implementation of font subsetting, since I really dont like sfntly
This commit is contained in:
parent
f2e6dd1cce
commit
a7f054ec5c
@ -7,6 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
def align_block(raw, multiple=4, pad=b'\0'):
|
||||
'''
|
||||
@ -17,5 +18,48 @@ def align_block(raw, multiple=4, pad=b'\0'):
|
||||
if extra == 0: return raw
|
||||
return raw + pad*(multiple - extra)
|
||||
|
||||
class UnknownTable(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
self.raw = raw
|
||||
|
||||
def __call__(self):
|
||||
return self.raw
|
||||
|
||||
class DateTimeProperty(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __get__(self, obj, type=None):
|
||||
return datetime(1904, 1, 1) + timedelta(seconds=getattr(obj,
|
||||
self.name))
|
||||
|
||||
def __set__(self, obj, val):
|
||||
td = val - datetime(1904, 1, 1)
|
||||
setattr(obj, self.name, int(td.total_seconds()))
|
||||
|
||||
class FixedProperty(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __get__(self, obj, type=None):
|
||||
val = getattr(obj, self.name)
|
||||
return val * (2**-16)
|
||||
|
||||
def __set__(self, obj, val):
|
||||
return int(round(val*(2**16)))
|
||||
|
||||
def max_power_of_two(x):
|
||||
"""
|
||||
Return the highest exponent of two, so that
|
||||
(2 ** exponent) <= x
|
||||
"""
|
||||
exponent = 0
|
||||
while x:
|
||||
x = x >> 1
|
||||
exponent += 1
|
||||
return max(exponent - 1, 0)
|
||||
|
||||
|
||||
|
235
src/calibre/utils/fonts/sfnt/cmap.py
Normal file
235
src/calibre/utils/fonts/sfnt/cmap.py
Normal file
@ -0,0 +1,235 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
# Note that the code for creating a BMP table (cmap format 4) is taken with
|
||||
# thanks from the fonttools project (BSD licensed).
|
||||
|
||||
from struct import unpack_from, calcsize, pack
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre.utils.fonts.utils import get_bmp_glyph_ids
|
||||
from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
|
||||
def split_range(start_code, end_code, cmap): # {{{
|
||||
# Try to split a range of character codes into subranges with consecutive
|
||||
# glyph IDs in such a way that the cmap4 subtable can be stored "most"
|
||||
# efficiently.
|
||||
if start_code == end_code:
|
||||
return [], [end_code]
|
||||
|
||||
last_id = cmap[start_code]
|
||||
last_code = start_code
|
||||
in_order = None
|
||||
ordered_begin = None
|
||||
sub_ranges = []
|
||||
|
||||
# Gather subranges in which the glyph IDs are consecutive.
|
||||
for code in range(start_code + 1, end_code + 1):
|
||||
glyph_id = cmap[code]
|
||||
|
||||
if glyph_id - 1 == last_id:
|
||||
if in_order is None or not in_order:
|
||||
in_order = 1
|
||||
ordered_begin = last_code
|
||||
else:
|
||||
if in_order:
|
||||
in_order = 0
|
||||
sub_ranges.append((ordered_begin, last_code))
|
||||
ordered_begin = None
|
||||
|
||||
last_id = glyph_id
|
||||
last_code = code
|
||||
|
||||
if in_order:
|
||||
sub_ranges.append((ordered_begin, last_code))
|
||||
assert last_code == end_code
|
||||
|
||||
# Now filter out those new subranges that would only make the data bigger.
|
||||
# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
|
||||
# character.
|
||||
new_ranges = []
|
||||
for b, e in sub_ranges:
|
||||
if b == start_code and e == end_code:
|
||||
break # the whole range, we're fine
|
||||
if b == start_code or e == end_code:
|
||||
threshold = 4 # split costs one more segment
|
||||
else:
|
||||
threshold = 8 # split costs two more segments
|
||||
if (e - b + 1) > threshold:
|
||||
new_ranges.append((b, e))
|
||||
sub_ranges = new_ranges
|
||||
|
||||
if not sub_ranges:
|
||||
return [], [end_code]
|
||||
|
||||
if sub_ranges[0][0] != start_code:
|
||||
sub_ranges.insert(0, (start_code, sub_ranges[0][0] - 1))
|
||||
if sub_ranges[-1][1] != end_code:
|
||||
sub_ranges.append((sub_ranges[-1][1] + 1, end_code))
|
||||
|
||||
# Fill the "holes" in the segments list -- those are the segments in which
|
||||
# the glyph IDs are _not_ consecutive.
|
||||
i = 1
|
||||
while i < len(sub_ranges):
|
||||
if sub_ranges[i-1][1] + 1 != sub_ranges[i][0]:
|
||||
sub_ranges.insert(i, (sub_ranges[i-1][1] + 1, sub_ranges[i][0] - 1))
|
||||
i = i + 1
|
||||
i = i + 1
|
||||
|
||||
# Transform the ranges into start_code/end_code lists.
|
||||
start = []
|
||||
end = []
|
||||
for b, e in sub_ranges:
|
||||
start.append(b)
|
||||
end.append(e)
|
||||
start.pop(0)
|
||||
|
||||
assert len(start) + 1 == len(end)
|
||||
return start, end
|
||||
# }}}
|
||||
|
||||
def set_id_delta(id_delta): # {{{
|
||||
# The lowest gid in glyphIndexArray, after subtracting id_delta, must be 1.
|
||||
# id_delta is a short, and must be between -32K and 32K
|
||||
# startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
|
||||
# This means that we have a problem because we can need to assign to
|
||||
# id_delta values
|
||||
# between -(64K-2) and 64K -1.
|
||||
# Since the final gi is reconstructed from the glyphArray GID by:
|
||||
# (short)finalGID = (gid + id_delta) % 0x10000),
|
||||
# we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
|
||||
# negative number to an unsigned short.
|
||||
# Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
|
||||
# the modulo arithmetic.
|
||||
|
||||
if id_delta > 0x7FFF:
|
||||
id_delta = id_delta - 0x10000
|
||||
elif id_delta < -0x7FFF:
|
||||
id_delta = id_delta + 0x10000
|
||||
|
||||
return id_delta
|
||||
# }}}
|
||||
|
||||
class CmapTable(UnknownTable):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CmapTable, self).__init__(*args, **kwargs)
|
||||
|
||||
self.version, self.num_tables = unpack_from(b'>HH', self.raw)
|
||||
|
||||
self.tables = {}
|
||||
|
||||
offset = 4
|
||||
sz = calcsize(b'>HHL')
|
||||
recs = []
|
||||
for i in xrange(self.num_tables):
|
||||
platform, encoding, table_offset = unpack_from(b'>HHL', self.raw,
|
||||
offset)
|
||||
offset += sz
|
||||
recs.append((platform, encoding, table_offset))
|
||||
|
||||
self.bmp_table = None
|
||||
|
||||
for i in xrange(len(recs)):
|
||||
platform, encoding, offset = recs[i]
|
||||
try:
|
||||
next_offset = recs[i+1][-1]
|
||||
except IndexError:
|
||||
next_offset = len(self.raw)
|
||||
table = self.raw[offset:next_offset]
|
||||
fmt = unpack_from(b'>H', table)[0]
|
||||
if platform == 3 and encoding == 1 and fmt == 4:
|
||||
self.bmp_table = table
|
||||
|
||||
def get_character_map(self, chars):
|
||||
'''
|
||||
Get a mapping of character codes to glyph ids in the font.
|
||||
'''
|
||||
if self.bmp_table is None:
|
||||
raise UnsupportedFont('This font has no Windows BMP cmap subtable.'
|
||||
' Most likely a special purpose font.')
|
||||
chars = list(set(chars))
|
||||
chars.sort()
|
||||
ans = OrderedDict()
|
||||
for i, glyph_id in enumerate(get_bmp_glyph_ids(self.bmp_table, 0,
|
||||
chars)):
|
||||
if glyph_id > 0:
|
||||
ans[chars[i]] = glyph_id
|
||||
return ans
|
||||
|
||||
def set_character_map(self, cmap):
|
||||
self.version, self.num_tables = 0, 1
|
||||
fmt = b'>7H'
|
||||
codes = list(cmap.iterkeys())
|
||||
codes.sort()
|
||||
|
||||
if not codes:
|
||||
start_code = [0xffff]
|
||||
end_code = [0xffff]
|
||||
else:
|
||||
last_code = codes[0]
|
||||
end_code = []
|
||||
start_code = [last_code]
|
||||
|
||||
for code in codes[1:]:
|
||||
if code == last_code + 1:
|
||||
last_code = code
|
||||
continue
|
||||
start, end = split_range(start_code[-1], last_code, cmap)
|
||||
start_code.extend(start)
|
||||
end_code.extend(end)
|
||||
start_code.append(code)
|
||||
last_code = code
|
||||
end_code.append(last_code)
|
||||
start_code.append(0xffff)
|
||||
end_code.append(0xffff)
|
||||
|
||||
id_delta = []
|
||||
id_range_offset = []
|
||||
glyph_index_array = []
|
||||
for i in xrange(len(end_code)-1): # skip the closing codes (0xffff)
|
||||
indices = []
|
||||
for char_code in xrange(start_code[i], end_code[i] + 1):
|
||||
indices.append(cmap[char_code])
|
||||
if (indices == xrange(indices[0], indices[0] + len(indices))):
|
||||
id_delta_temp = set_id_delta(indices[0] - start_code[i])
|
||||
id_delta.append(id_delta_temp)
|
||||
id_range_offset.append(0)
|
||||
else:
|
||||
id_delta.append(0)
|
||||
id_range_offset.append(2 * (len(end_code) +
|
||||
len(glyph_index_array) - i))
|
||||
glyph_index_array.extend(indices)
|
||||
id_delta.append(1) # 0xffff + 1 == 0. So this end code maps to .notdef
|
||||
id_range_offset.append(0)
|
||||
|
||||
seg_count = len(end_code)
|
||||
max_exponent = max_power_of_two(seg_count)
|
||||
search_range = 2 * (2 ** max_exponent)
|
||||
entry_selector = max_exponent
|
||||
range_shift = 2 * seg_count - search_range
|
||||
|
||||
char_code_array = end_code + [0] + start_code
|
||||
char_code_array = pack(b'>%dH'%len(char_code_array), *char_code_array)
|
||||
id_delta_array = pack(b'>%dh'%len(id_delta), *id_delta)
|
||||
rest_array = id_range_offset + glyph_index_array
|
||||
rest_array = pack(b'>%dH'%len(rest_array), *rest_array)
|
||||
data = char_code_array + id_delta_array + rest_array
|
||||
|
||||
length = calcsize(fmt) + len(data)
|
||||
header = pack(fmt, 4, length, 0,
|
||||
2*seg_count, search_range, entry_selector, range_shift)
|
||||
self.bmp_table = header + data
|
||||
|
||||
fmt = b'>4HL'
|
||||
offset = calcsize(fmt)
|
||||
self.raw = pack(fmt, self.version, self.num_tables, 3, 1, offset) + \
|
||||
self.bmp_table
|
||||
|
@ -7,22 +7,17 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from math import log
|
||||
from struct import pack, calcsize
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.utils.fonts.utils import (get_tables, checksum_of_block,
|
||||
verify_checksums)
|
||||
from calibre.utils.fonts.sfnt import align_block
|
||||
from calibre.utils.fonts.sfnt import align_block, UnknownTable, max_power_of_two
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
|
||||
class UnknownTable(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
self.raw = raw
|
||||
|
||||
def __call__(self):
|
||||
return self.raw
|
||||
from calibre.utils.fonts.sfnt.head import HeadTable
|
||||
from calibre.utils.fonts.sfnt.maxp import MaxpTable
|
||||
from calibre.utils.fonts.sfnt.loca import LocaTable
|
||||
|
||||
class Sfnt(object):
|
||||
|
||||
@ -37,8 +32,23 @@ class Sfnt(object):
|
||||
self.tables = {}
|
||||
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
|
||||
self.tables[table_tag] = {
|
||||
b'head' : HeadTable,
|
||||
b'maxp' : MaxpTable,
|
||||
b'loca' : LocaTable,
|
||||
}.get(table_tag, UnknownTable)(table)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.tables[key]
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.tables
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self.tables[key]
|
||||
|
||||
def pop(self, key, default=None):
|
||||
return self.tables.pop(key, default)
|
||||
|
||||
def __call__(self):
|
||||
stream = BytesIO()
|
||||
|
||||
@ -49,7 +59,7 @@ class Sfnt(object):
|
||||
|
||||
# Write header
|
||||
num_tables = len(self.tables)
|
||||
ln2 = int(log(num_tables, 2))
|
||||
ln2 = max_power_of_two(num_tables)
|
||||
srange = (2**ln2) * 16
|
||||
spack(b'>4s4H',
|
||||
self.sfnt_version, num_tables, srange, ln2, num_tables * 16 - srange)
|
||||
|
53
src/calibre/utils/fonts/sfnt/head.py
Normal file
53
src/calibre/utils/fonts/sfnt/head.py
Normal file
@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from itertools import izip
|
||||
from struct import unpack_from, pack
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable, DateTimeProperty
|
||||
|
||||
class HeadTable(UnknownTable):
|
||||
|
||||
created = DateTimeProperty('_created')
|
||||
modified = DateTimeProperty('_modified')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(HeadTable, self).__init__(*args, **kwargs)
|
||||
|
||||
field_types = (
|
||||
'version_number' , 'L',
|
||||
'font_revision' , 'L',
|
||||
'checksum_adjustment' , 'L',
|
||||
'magic_number' , 'L',
|
||||
'flags' , 'H',
|
||||
'units_per_em' , 'H',
|
||||
'_created' , 'q',
|
||||
'_modified' , 'q',
|
||||
'x_min' , 'H',
|
||||
'y_min' , 'H',
|
||||
'x_max' , 'H',
|
||||
'y_max' , 'H',
|
||||
'mac_style' , 'H',
|
||||
'lowest_rec_ppem' , 'H',
|
||||
'font_direction_hint' , 'h',
|
||||
'index_to_loc_format' , 'h',
|
||||
'glyph_data_format' , 'h'
|
||||
)
|
||||
|
||||
self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
|
||||
self._fields = field_types[0::2]
|
||||
|
||||
for f, val in izip(self._fields, unpack_from(self._fmt, self.raw)):
|
||||
setattr(self, f, val)
|
||||
|
||||
def update(self):
|
||||
vals = [getattr(self, f) for f in self._fields]
|
||||
self.raw = pack(self._fmt, *vals)
|
||||
|
||||
|
31
src/calibre/utils/fonts/sfnt/loca.py
Normal file
31
src/calibre/utils/fonts/sfnt/loca.py
Normal file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import calcsize, unpack_from
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable
|
||||
|
||||
class LocaTable(UnknownTable):
|
||||
|
||||
def load_offsets(self, head_table, maxp_table):
|
||||
fmt = 'H' if head_table.index_to_loc_format == 0 else 'L'
|
||||
num_glyphs = maxp_table.num_glyphs
|
||||
sz = calcsize(('>%s'%fmt).encode('ascii'))
|
||||
num = len(self.raw)//sz
|
||||
self.offset_map = unpack_from(('>%d%s'%(num, fmt)).encode('ascii'),
|
||||
self.raw)
|
||||
self.offset_map = self.offset_map[:num_glyphs+1]
|
||||
if fmt == 'H':
|
||||
self.offset_map = [2*i for i in self.offset_map]
|
||||
|
||||
def glyph_location(self, glyph_id):
|
||||
offset = self.offset_map[glyph_id]
|
||||
next_offset = self.offset_map[glyph_id+1]
|
||||
return offset, next_offset - offset
|
||||
|
58
src/calibre/utils/fonts/sfnt/maxp.py
Normal file
58
src/calibre/utils/fonts/sfnt/maxp.py
Normal file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from itertools import izip
|
||||
from struct import unpack_from, pack
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
|
||||
class MaxpTable(UnknownTable):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(MaxpTable, self).__init__(*args, **kwargs)
|
||||
|
||||
self._fmt = b'>LH'
|
||||
self._version, self.num_glyphs = unpack_from(self._fmt, self.raw)
|
||||
self.fields = ('_version', 'num_glyphs')
|
||||
|
||||
if self._version >= 0x10000:
|
||||
self.version = 0x10000
|
||||
vals = unpack_from(self._fmt, self.raw)
|
||||
for f, val in izip(self.fields, vals):
|
||||
setattr(self, f, val)
|
||||
|
||||
@dynamic_property
|
||||
def version(self):
|
||||
def fget(self):
|
||||
return self._version
|
||||
def fset(self, val):
|
||||
if val == 0x5000:
|
||||
self._fmt = b'>LH'
|
||||
self._fields = ('_version', 'num_glyphs')
|
||||
elif val == 0x10000:
|
||||
self.fields = ('_version', 'num_glyphs', 'max_points',
|
||||
'max_contours', 'max_composite_points',
|
||||
'max_composite_contours', 'max_zones',
|
||||
'max_twilight_points', 'max_storage', 'max_function_defs',
|
||||
'max_instruction_defs', 'max_stack_elements',
|
||||
'max_size_of_instructions', 'max_component_elements',
|
||||
'max_component_depth')
|
||||
self._fmt = b'>LH' + b'H'*(len(self.fields)-2)
|
||||
self._version = val
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
def update(self):
|
||||
if self._version > 0x10000:
|
||||
raise UnsupportedFont('maxp table with version > 0x10000 not modifiable')
|
||||
vals = [getattr(self, f) for f in self._fields]
|
||||
self.raw = pack(self._fmt, *vals)
|
||||
|
||||
|
||||
|
50
src/calibre/utils/fonts/sfnt/subset.py
Normal file
50
src/calibre/utils/fonts/sfnt/subset.py
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.utils.fonts.sfnt.container import Sfnt
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
|
||||
def subset_truetype(sfnt, character_map):
|
||||
loca = sfnt[b'loca']
|
||||
try:
|
||||
head, maxp = sfnt[b'head'], sfnt[b'maxp']
|
||||
except KeyError:
|
||||
raise UnsupportedFont('This font does not contain head and/or maxp tables')
|
||||
loca.load_offsets(head, maxp)
|
||||
|
||||
def subset(raw, individual_chars, ranges=()):
|
||||
chars = list(map(ord, individual_chars))
|
||||
for r in ranges:
|
||||
chars += list(xrange(ord(r[0]), ord(r[1])+1))
|
||||
|
||||
sfnt = Sfnt(raw)
|
||||
# Remove the Digital Signature table since it is useless in a subset
|
||||
# font anyway
|
||||
sfnt.pop(b'DSIG', None)
|
||||
|
||||
try:
|
||||
cmap = sfnt[b'cmap']
|
||||
except KeyError:
|
||||
raise UnsupportedFont('This font has no cmap table')
|
||||
|
||||
# Get mapping of chars to glyph ids for all specified chars
|
||||
character_map = cmap.get_character_map(chars)
|
||||
# Restrict the cmap table to only contain entries for the specified chars
|
||||
cmap.set_character_map(character_map)
|
||||
|
||||
if b'loca' in sfnt and b'glyf' in sfnt:
|
||||
subset_truetype(sfnt, character_map)
|
||||
elif b'CFF ' in sfnt:
|
||||
raise UnsupportedFont('This font contains PostScript outlines, '
|
||||
'subsetting not supported')
|
||||
else:
|
||||
raise UnsupportedFont('This font does not contain TrueType '
|
||||
'or PostScript outlines')
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user