diff --git a/src/calibre/utils/fonts/sfnt/__init__.py b/src/calibre/utils/fonts/sfnt/__init__.py index 6e7efced5a..7a40e7fd15 100644 --- a/src/calibre/utils/fonts/sfnt/__init__.py +++ b/src/calibre/utils/fonts/sfnt/__init__.py @@ -7,6 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' +from datetime import datetime, timedelta def align_block(raw, multiple=4, pad=b'\0'): ''' @@ -17,5 +18,48 @@ def align_block(raw, multiple=4, pad=b'\0'): if extra == 0: return raw return raw + pad*(multiple - extra) +class UnknownTable(object): + + def __init__(self, raw): + self.raw = raw + + def __call__(self): + return self.raw + +class DateTimeProperty(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, type=None): + return datetime(1904, 1, 1) + timedelta(seconds=getattr(obj, + self.name)) + + def __set__(self, obj, val): + td = val - datetime(1904, 1, 1) + setattr(obj, self.name, int(td.total_seconds())) + +class FixedProperty(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, type=None): + val = getattr(obj, self.name) + return val * (2**-16) + + def __set__(self, obj, val): + return int(round(val*(2**16))) + +def max_power_of_two(x): + """ + Return the highest exponent of two, so that + (2 ** exponent) <= x + """ + exponent = 0 + while x: + x = x >> 1 + exponent += 1 + return max(exponent - 1, 0) diff --git a/src/calibre/utils/fonts/sfnt/cmap.py b/src/calibre/utils/fonts/sfnt/cmap.py new file mode 100644 index 0000000000..94b0e0eaf5 --- /dev/null +++ b/src/calibre/utils/fonts/sfnt/cmap.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +# Note that the code for creating a BMP table (cmap format 4) is taken with +# thanks from the fonttools project (BSD licensed). + +from struct import unpack_from, calcsize, pack +from collections import OrderedDict + +from calibre.utils.fonts.utils import get_bmp_glyph_ids +from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two +from calibre.utils.fonts.sfnt.errors import UnsupportedFont + +def split_range(start_code, end_code, cmap): # {{{ + # Try to split a range of character codes into subranges with consecutive + # glyph IDs in such a way that the cmap4 subtable can be stored "most" + # efficiently. + if start_code == end_code: + return [], [end_code] + + last_id = cmap[start_code] + last_code = start_code + in_order = None + ordered_begin = None + sub_ranges = [] + + # Gather subranges in which the glyph IDs are consecutive. + for code in range(start_code + 1, end_code + 1): + glyph_id = cmap[code] + + if glyph_id - 1 == last_id: + if in_order is None or not in_order: + in_order = 1 + ordered_begin = last_code + else: + if in_order: + in_order = 0 + sub_ranges.append((ordered_begin, last_code)) + ordered_begin = None + + last_id = glyph_id + last_code = code + + if in_order: + sub_ranges.append((ordered_begin, last_code)) + assert last_code == end_code + + # Now filter out those new subranges that would only make the data bigger. + # A new segment cost 8 bytes, not using a new segment costs 2 bytes per + # character. + new_ranges = [] + for b, e in sub_ranges: + if b == start_code and e == end_code: + break # the whole range, we're fine + if b == start_code or e == end_code: + threshold = 4 # split costs one more segment + else: + threshold = 8 # split costs two more segments + if (e - b + 1) > threshold: + new_ranges.append((b, e)) + sub_ranges = new_ranges + + if not sub_ranges: + return [], [end_code] + + if sub_ranges[0][0] != start_code: + sub_ranges.insert(0, (start_code, sub_ranges[0][0] - 1)) + if sub_ranges[-1][1] != end_code: + sub_ranges.append((sub_ranges[-1][1] + 1, end_code)) + + # Fill the "holes" in the segments list -- those are the segments in which + # the glyph IDs are _not_ consecutive. + i = 1 + while i < len(sub_ranges): + if sub_ranges[i-1][1] + 1 != sub_ranges[i][0]: + sub_ranges.insert(i, (sub_ranges[i-1][1] + 1, sub_ranges[i][0] - 1)) + i = i + 1 + i = i + 1 + + # Transform the ranges into start_code/end_code lists. + start = [] + end = [] + for b, e in sub_ranges: + start.append(b) + end.append(e) + start.pop(0) + + assert len(start) + 1 == len(end) + return start, end +# }}} + +def set_id_delta(id_delta): # {{{ + # The lowest gid in glyphIndexArray, after subtracting id_delta, must be 1. + # id_delta is a short, and must be between -32K and 32K + # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1 + # This means that we have a problem because we can need to assign to + # id_delta values + # between -(64K-2) and 64K -1. + # Since the final gi is reconstructed from the glyphArray GID by: + # (short)finalGID = (gid + id_delta) % 0x10000), + # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the + # negative number to an unsigned short. + # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of + # the modulo arithmetic. + + if id_delta > 0x7FFF: + id_delta = id_delta - 0x10000 + elif id_delta < -0x7FFF: + id_delta = id_delta + 0x10000 + + return id_delta +# }}} + +class CmapTable(UnknownTable): + + def __init__(self, *args, **kwargs): + super(CmapTable, self).__init__(*args, **kwargs) + + self.version, self.num_tables = unpack_from(b'>HH', self.raw) + + self.tables = {} + + offset = 4 + sz = calcsize(b'>HHL') + recs = [] + for i in xrange(self.num_tables): + platform, encoding, table_offset = unpack_from(b'>HHL', self.raw, + offset) + offset += sz + recs.append((platform, encoding, table_offset)) + + self.bmp_table = None + + for i in xrange(len(recs)): + platform, encoding, offset = recs[i] + try: + next_offset = recs[i+1][-1] + except IndexError: + next_offset = len(self.raw) + table = self.raw[offset:next_offset] + fmt = unpack_from(b'>H', table)[0] + if platform == 3 and encoding == 1 and fmt == 4: + self.bmp_table = table + + def get_character_map(self, chars): + ''' + Get a mapping of character codes to glyph ids in the font. + ''' + if self.bmp_table is None: + raise UnsupportedFont('This font has no Windows BMP cmap subtable.' + ' Most likely a special purpose font.') + chars = list(set(chars)) + chars.sort() + ans = OrderedDict() + for i, glyph_id in enumerate(get_bmp_glyph_ids(self.bmp_table, 0, + chars)): + if glyph_id > 0: + ans[chars[i]] = glyph_id + return ans + + def set_character_map(self, cmap): + self.version, self.num_tables = 0, 1 + fmt = b'>7H' + codes = list(cmap.iterkeys()) + codes.sort() + + if not codes: + start_code = [0xffff] + end_code = [0xffff] + else: + last_code = codes[0] + end_code = [] + start_code = [last_code] + + for code in codes[1:]: + if code == last_code + 1: + last_code = code + continue + start, end = split_range(start_code[-1], last_code, cmap) + start_code.extend(start) + end_code.extend(end) + start_code.append(code) + last_code = code + end_code.append(last_code) + start_code.append(0xffff) + end_code.append(0xffff) + + id_delta = [] + id_range_offset = [] + glyph_index_array = [] + for i in xrange(len(end_code)-1): # skip the closing codes (0xffff) + indices = [] + for char_code in xrange(start_code[i], end_code[i] + 1): + indices.append(cmap[char_code]) + if (indices == xrange(indices[0], indices[0] + len(indices))): + id_delta_temp = set_id_delta(indices[0] - start_code[i]) + id_delta.append(id_delta_temp) + id_range_offset.append(0) + else: + id_delta.append(0) + id_range_offset.append(2 * (len(end_code) + + len(glyph_index_array) - i)) + glyph_index_array.extend(indices) + id_delta.append(1) # 0xffff + 1 == 0. So this end code maps to .notdef + id_range_offset.append(0) + + seg_count = len(end_code) + max_exponent = max_power_of_two(seg_count) + search_range = 2 * (2 ** max_exponent) + entry_selector = max_exponent + range_shift = 2 * seg_count - search_range + + char_code_array = end_code + [0] + start_code + char_code_array = pack(b'>%dH'%len(char_code_array), *char_code_array) + id_delta_array = pack(b'>%dh'%len(id_delta), *id_delta) + rest_array = id_range_offset + glyph_index_array + rest_array = pack(b'>%dH'%len(rest_array), *rest_array) + data = char_code_array + id_delta_array + rest_array + + length = calcsize(fmt) + len(data) + header = pack(fmt, 4, length, 0, + 2*seg_count, search_range, entry_selector, range_shift) + self.bmp_table = header + data + + fmt = b'>4HL' + offset = calcsize(fmt) + self.raw = pack(fmt, self.version, self.num_tables, 3, 1, offset) + \ + self.bmp_table + diff --git a/src/calibre/utils/fonts/sfnt/container.py b/src/calibre/utils/fonts/sfnt/container.py index 30c5970392..6b4be41739 100644 --- a/src/calibre/utils/fonts/sfnt/container.py +++ b/src/calibre/utils/fonts/sfnt/container.py @@ -7,22 +7,17 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -from math import log from struct import pack, calcsize from io import BytesIO from calibre.utils.fonts.utils import (get_tables, checksum_of_block, verify_checksums) -from calibre.utils.fonts.sfnt import align_block +from calibre.utils.fonts.sfnt import align_block, UnknownTable, max_power_of_two from calibre.utils.fonts.sfnt.errors import UnsupportedFont -class UnknownTable(object): - - def __init__(self, raw): - self.raw = raw - - def __call__(self): - return self.raw +from calibre.utils.fonts.sfnt.head import HeadTable +from calibre.utils.fonts.sfnt.maxp import MaxpTable +from calibre.utils.fonts.sfnt.loca import LocaTable class Sfnt(object): @@ -37,8 +32,23 @@ class Sfnt(object): self.tables = {} for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw): self.tables[table_tag] = { + b'head' : HeadTable, + b'maxp' : MaxpTable, + b'loca' : LocaTable, }.get(table_tag, UnknownTable)(table) + def __getitem__(self, key): + return self.tables[key] + + def __contains__(self, key): + return key in self.tables + + def __delitem__(self, key): + del self.tables[key] + + def pop(self, key, default=None): + return self.tables.pop(key, default) + def __call__(self): stream = BytesIO() @@ -49,7 +59,7 @@ class Sfnt(object): # Write header num_tables = len(self.tables) - ln2 = int(log(num_tables, 2)) + ln2 = max_power_of_two(num_tables) srange = (2**ln2) * 16 spack(b'>4s4H', self.sfnt_version, num_tables, srange, ln2, num_tables * 16 - srange) diff --git a/src/calibre/utils/fonts/sfnt/head.py b/src/calibre/utils/fonts/sfnt/head.py new file mode 100644 index 0000000000..8b9c732e14 --- /dev/null +++ b/src/calibre/utils/fonts/sfnt/head.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from itertools import izip +from struct import unpack_from, pack + +from calibre.utils.fonts.sfnt import UnknownTable, DateTimeProperty + +class HeadTable(UnknownTable): + + created = DateTimeProperty('_created') + modified = DateTimeProperty('_modified') + + def __init__(self, *args, **kwargs): + super(HeadTable, self).__init__(*args, **kwargs) + + field_types = ( + 'version_number' , 'L', + 'font_revision' , 'L', + 'checksum_adjustment' , 'L', + 'magic_number' , 'L', + 'flags' , 'H', + 'units_per_em' , 'H', + '_created' , 'q', + '_modified' , 'q', + 'x_min' , 'H', + 'y_min' , 'H', + 'x_max' , 'H', + 'y_max' , 'H', + 'mac_style' , 'H', + 'lowest_rec_ppem' , 'H', + 'font_direction_hint' , 'h', + 'index_to_loc_format' , 'h', + 'glyph_data_format' , 'h' + ) + + self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii') + self._fields = field_types[0::2] + + for f, val in izip(self._fields, unpack_from(self._fmt, self.raw)): + setattr(self, f, val) + + def update(self): + vals = [getattr(self, f) for f in self._fields] + self.raw = pack(self._fmt, *vals) + + diff --git a/src/calibre/utils/fonts/sfnt/loca.py b/src/calibre/utils/fonts/sfnt/loca.py new file mode 100644 index 0000000000..f6ca903b83 --- /dev/null +++ b/src/calibre/utils/fonts/sfnt/loca.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from struct import calcsize, unpack_from + +from calibre.utils.fonts.sfnt import UnknownTable + +class LocaTable(UnknownTable): + + def load_offsets(self, head_table, maxp_table): + fmt = 'H' if head_table.index_to_loc_format == 0 else 'L' + num_glyphs = maxp_table.num_glyphs + sz = calcsize(('>%s'%fmt).encode('ascii')) + num = len(self.raw)//sz + self.offset_map = unpack_from(('>%d%s'%(num, fmt)).encode('ascii'), + self.raw) + self.offset_map = self.offset_map[:num_glyphs+1] + if fmt == 'H': + self.offset_map = [2*i for i in self.offset_map] + + def glyph_location(self, glyph_id): + offset = self.offset_map[glyph_id] + next_offset = self.offset_map[glyph_id+1] + return offset, next_offset - offset + diff --git a/src/calibre/utils/fonts/sfnt/maxp.py b/src/calibre/utils/fonts/sfnt/maxp.py new file mode 100644 index 0000000000..0e3450c699 --- /dev/null +++ b/src/calibre/utils/fonts/sfnt/maxp.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from itertools import izip +from struct import unpack_from, pack + +from calibre.utils.fonts.sfnt import UnknownTable +from calibre.utils.fonts.sfnt.errors import UnsupportedFont + +class MaxpTable(UnknownTable): + + def __init__(self, *args, **kwargs): + super(MaxpTable, self).__init__(*args, **kwargs) + + self._fmt = b'>LH' + self._version, self.num_glyphs = unpack_from(self._fmt, self.raw) + self.fields = ('_version', 'num_glyphs') + + if self._version >= 0x10000: + self.version = 0x10000 + vals = unpack_from(self._fmt, self.raw) + for f, val in izip(self.fields, vals): + setattr(self, f, val) + + @dynamic_property + def version(self): + def fget(self): + return self._version + def fset(self, val): + if val == 0x5000: + self._fmt = b'>LH' + self._fields = ('_version', 'num_glyphs') + elif val == 0x10000: + self.fields = ('_version', 'num_glyphs', 'max_points', + 'max_contours', 'max_composite_points', + 'max_composite_contours', 'max_zones', + 'max_twilight_points', 'max_storage', 'max_function_defs', + 'max_instruction_defs', 'max_stack_elements', + 'max_size_of_instructions', 'max_component_elements', + 'max_component_depth') + self._fmt = b'>LH' + b'H'*(len(self.fields)-2) + self._version = val + return property(fget=fget, fset=fset) + + def update(self): + if self._version > 0x10000: + raise UnsupportedFont('maxp table with version > 0x10000 not modifiable') + vals = [getattr(self, f) for f in self._fields] + self.raw = pack(self._fmt, *vals) + + + diff --git a/src/calibre/utils/fonts/sfnt/subset.py b/src/calibre/utils/fonts/sfnt/subset.py new file mode 100644 index 0000000000..085b6255e4 --- /dev/null +++ b/src/calibre/utils/fonts/sfnt/subset.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.utils.fonts.sfnt.container import Sfnt +from calibre.utils.fonts.sfnt.errors import UnsupportedFont + +def subset_truetype(sfnt, character_map): + loca = sfnt[b'loca'] + try: + head, maxp = sfnt[b'head'], sfnt[b'maxp'] + except KeyError: + raise UnsupportedFont('This font does not contain head and/or maxp tables') + loca.load_offsets(head, maxp) + +def subset(raw, individual_chars, ranges=()): + chars = list(map(ord, individual_chars)) + for r in ranges: + chars += list(xrange(ord(r[0]), ord(r[1])+1)) + + sfnt = Sfnt(raw) + # Remove the Digital Signature table since it is useless in a subset + # font anyway + sfnt.pop(b'DSIG', None) + + try: + cmap = sfnt[b'cmap'] + except KeyError: + raise UnsupportedFont('This font has no cmap table') + + # Get mapping of chars to glyph ids for all specified chars + character_map = cmap.get_character_map(chars) + # Restrict the cmap table to only contain entries for the specified chars + cmap.set_character_map(character_map) + + if b'loca' in sfnt and b'glyf' in sfnt: + subset_truetype(sfnt, character_map) + elif b'CFF ' in sfnt: + raise UnsupportedFont('This font contains PostScript outlines, ' + 'subsetting not supported') + else: + raise UnsupportedFont('This font does not contain TrueType ' + 'or PostScript outlines') + +