diff --git a/src/calibre/utils/fonts/sfnt/cff/table.py b/src/calibre/utils/fonts/sfnt/cff/table.py index 63d85b65ec..fa3a5207a0 100644 --- a/src/calibre/utils/fonts/sfnt/cff/table.py +++ b/src/calibre/utils/fonts/sfnt/cff/table.py @@ -186,7 +186,7 @@ class CFFTable(UnknownTable): def decompile(self): self.cff = CFF(self.raw) - def subset(self, character_map): + def subset(self, character_map, extra_glyphs): from calibre.utils.fonts.sfnt.cff.writer import Subset # Map codes from the cmap table to glyph names, this will be used to # reconstruct character_map for the subset font @@ -196,6 +196,9 @@ class CFFTable(UnknownTable): charset.discard(None) if not charset: raise NoGlyphs('This font has no glyphs for the specified characters') + charset |= { + self.cff.charset.safe_lookup(glyph_id) for glyph_id in extra_glyphs} + charset.discard(None) s = Subset(self.cff, charset) # Rebuild character_map with the glyph ids from the subset font diff --git a/src/calibre/utils/fonts/sfnt/common.py b/src/calibre/utils/fonts/sfnt/common.py new file mode 100644 index 0000000000..49ba77a28e --- /dev/null +++ b/src/calibre/utils/fonts/sfnt/common.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from struct import unpack_from, calcsize +from collections import OrderedDict, namedtuple + +from calibre.utils.fonts.sfnt.errors import UnsupportedFont + +class Unpackable(object): + + def __init__(self, raw, offset): + self.raw, self.offset = raw, offset + self.start_pos = offset + + def unpack(self, fmt, single_special=True): + fmt = fmt.encode('ascii') if not isinstance(fmt, bytes) else fmt + ans = unpack_from(b'>'+fmt, self.raw, self.offset) + if single_special and len(ans) == 1: + ans = ans[0] + self.offset += calcsize(fmt) + return ans + +class SimpleListTable(list): + + 'A table that contains a list of subtables' + + child_class = None + + def __init__(self, raw, offset): + list.__init__(self) + + data = Unpackable(raw, offset) + self.read_extra_header(data) + + count = data.unpack('H') + for i in xrange(count): + offset = data.unpack('H') + self.append(self.child_class(raw, data.start_pos + offset)) + self.read_extra_footer(data) + + def read_extra_header(self, data): + pass + + def read_extra_footer(self, data): + pass + +class ListTable(OrderedDict): + + 'A table that contains an ordered mapping of table tag to subtable' + + child_class = None + + def __init__(self, raw, offset): + OrderedDict.__init__(self) + + data = Unpackable(raw, offset) + self.read_extra_header(data) + + count = data.unpack('H') + for i in xrange(count): + tag, coffset = data.unpack('4sH') + self[tag] = self.child_class(raw, data.start_pos + coffset) + + self.read_extra_footer(data) + + def read_extra_header(self, data): + pass + + def read_extra_footer(self, data): + pass + + def dump(self, prefix=''): + print (prefix, self.__class__.__name__, sep='') + prefix += ' ' + for tag, child in self.iteritems(): + print (prefix, tag, sep='') + child.dump(prefix=prefix+' ') + + +class IndexTable(list): + + def __init__(self, raw, offset): + data = Unpackable(raw, offset) + self.read_extra_header(data) + + count = data.unpack('H') + for i in xrange(count): + self.append(data.unpack('H')) + + def read_extra_header(self, data): + pass + + def dump(self, prefix=''): + print(prefix, self.__class__.__name__, sep='') + +class LanguageSystemTable(IndexTable): + + def read_extra_header(self, data): + self.lookup_order, self.required_feature_index = data.unpack('2H') + if self.lookup_order != 0: + raise UnsupportedFont('This LanguageSystemTable has an unknown' + ' lookup order: 0x%x'%self.lookup_order) + +class ScriptTable(ListTable): + + child_class = LanguageSystemTable + + def __init__(self, raw, offset): + ListTable.__init__(self, raw, offset) + + def read_extra_header(self, data): + start_pos = data.offset + default_offset = data.unpack('H') + self[b'default'] = (LanguageSystemTable(data.raw, start_pos + + default_offset) if default_offset else None) + +class ScriptListTable(ListTable): + + child_class = ScriptTable + +class FeatureTable(IndexTable): + + def read_extra_header(self, data): + self.feature_params = data.unpack('H') + if False and self.feature_params != 0: + # Source code pro sets this to non NULL + raise UnsupportedFont( + 'This FeatureTable has non NULL FeatureParams: 0x%x'%self.feature_params) + +class FeatureListTable(ListTable): + + child_class = FeatureTable + +class LookupTable(SimpleListTable): + + def read_extra_header(self, data): + self.lookup_type, self.lookup_flag = data.unpack('2H') + self.set_child_class() + + def set_child_class(self): + raise NotImplementedError() + + def read_extra_footer(self, data): + if self.lookup_flag & 0x0010: + self.mark_filtering_set = data.unpack('H') + +def ExtensionSubstitution(raw, offset, subtable_map={}): + data = Unpackable(raw, offset) + subst_format, extension_lookup_type, offset = data.unpack('2HL') + if subst_format != 1: + raise UnsupportedFont('ExtensionSubstitution has unknown format: 0x%x'%subst_format) + return subtable_map[extension_lookup_type](raw, offset+data.start_pos) + +CoverageRange = namedtuple('CoverageRange', 'start end start_coverage_index') + +class Coverage(object): + + def __init__(self, raw, offset, parent_table_name): + data = Unpackable(raw, offset) + self.format, count = data.unpack('2H') + + if self.format not in {1, 2}: + raise UnsupportedFont('Unknown Coverage format: 0x%x in %s'%( + self.format, parent_table_name)) + if self.format == 1: + self.glyph_ids = data.unpack('%dH'%count, single_special=False) + self.glyph_ids_map = {gid:i for i, gid in + enumerate(self.glyph_ids)} + else: + self.ranges = [] + ranges = data.unpack('%dH'%(3*count), single_special=False) + for i in xrange(count): + start, end, start_coverage_index = ranges[i*3:(i+1)*3] + self.ranges.append(CoverageRange(start, end, start_coverage_index)) + + def coverage_indices(self, glyph_ids): + '''Return map of glyph_id -> coverage index. Map contains only those + glyph_ids that are covered by this table and that are present in + glyph_ids.''' + ans = OrderedDict() + for gid in glyph_ids: + if self.format == 1: + idx = self.glyph_ids_map.get(gid, None) + if idx is not None: + ans[gid] = idx + else: + for start, end, start_coverage_index in self.ranges: + if start <= gid <= end: + ans[gid] = start_coverage_index + (gid-start) + return ans + +class UnknownLookupSubTable(object): + + formats = {} + + def __init__(self, raw, offset): + data = Unpackable(raw, offset) + self.format = data.unpack('H') + if self.format not in self.formats: + raise UnsupportedFont('Unknown format for Lookup Subtable %s: 0x%x'%( + self.__class__.__name__, self.format)) + if self.has_initial_coverage: + coverage_offset = data.unpack('H') + data.start_pos + self.coverage = Coverage(raw, coverage_offset, self.__class__.__name__) + self.initialize(data) + + @property + def has_initial_coverage(self): + return True + + def all_substitutions(self, glyph_ids): + ''' Return a set of all glyph ids that could be substituted for any + subset of the specified glyph ids (which must be a set)''' + raise NotImplementedError() + + def read_sets(self, data, read_item=None, set_is_index=False): + count = data.unpack('H') + sets = data.unpack('%dH'%count, single_special=False) + coverage_to_items_map = [] + for offset in sets: + # Read items in the set + data.offset = start_pos = offset + data.start_pos + count = data.unpack('H') + item_offsets = data.unpack('%dH'%count, single_special=False) + items = [] + for offset in item_offsets: + data.offset = offset + start_pos + if set_is_index: + items.append(offset) + else: + items.append(read_item(data)) + coverage_to_items_map.append(items) + return coverage_to_items_map + diff --git a/src/calibre/utils/fonts/sfnt/container.py b/src/calibre/utils/fonts/sfnt/container.py index cf207d0bd1..92246fe1a9 100644 --- a/src/calibre/utils/fonts/sfnt/container.py +++ b/src/calibre/utils/fonts/sfnt/container.py @@ -22,6 +22,7 @@ from calibre.utils.fonts.sfnt.loca import LocaTable from calibre.utils.fonts.sfnt.glyf import GlyfTable from calibre.utils.fonts.sfnt.cmap import CmapTable from calibre.utils.fonts.sfnt.kern import KernTable +from calibre.utils.fonts.sfnt.gsub import GSUBTable from calibre.utils.fonts.sfnt.cff.table import CFFTable # OpenType spec: http://www.microsoft.com/typography/otspec/otff.htm @@ -46,6 +47,7 @@ class Sfnt(object): b'cmap' : CmapTable, b'CFF ' : CFFTable, b'kern' : KernTable, + b'GSUB' : GSUBTable, }.get(table_tag, UnknownTable)(table) def __getitem__(self, key): diff --git a/src/calibre/utils/fonts/sfnt/gsub.py b/src/calibre/utils/fonts/sfnt/gsub.py new file mode 100644 index 0000000000..77d7db8519 --- /dev/null +++ b/src/calibre/utils/fonts/sfnt/gsub.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from struct import unpack_from +from functools import partial + +from calibre.utils.fonts.sfnt import UnknownTable, FixedProperty +from calibre.utils.fonts.sfnt.errors import UnsupportedFont +from calibre.utils.fonts.sfnt.common import (ScriptListTable, FeatureListTable, + SimpleListTable, LookupTable, ExtensionSubstitution, + UnknownLookupSubTable) + +class SingleSubstitution(UnknownLookupSubTable): + + formats = {1, 2} + + def initialize(self, data): + if self.format == 1: + self.delta = data.unpack('h') + else: + count = data.unpack('H') + self.substitutes = data.unpack('%dH'%count, single_special=False) + + def all_substitutions(self, glyph_ids): + gid_index_map = self.coverage.coverage_indices(glyph_ids) + if self.format == 1: + return {gid + self.delta for gid in gid_index_map} + return {self.substitutes[i] for i in gid_index_map.itervalues()} + +class MultipleSubstitution(UnknownLookupSubTable): + + formats = {1} + + def initialize(self, data): + self.coverage_to_subs_map = self.read_sets(data, set_is_index=True) + + def all_substitutions(self, glyph_ids): + gid_index_map = self.coverage.coverage_indices(glyph_ids) + ans = set() + for index in gid_index_map.itervalues(): + glyphs = set(self.coverage_to_subs_map[index]) + ans |= glyphs + return ans + +class AlternateSubstitution(MultipleSubstitution): + pass + +class LigatureSubstitution(UnknownLookupSubTable): + + formats = {1} + + def initialize(self, data): + self.coverage_to_lig_map = self.read_sets(data, self.read_ligature) + + def read_ligature(self, data): + lig_glyph, count = data.unpack('HH') + components = data.unpack('%dH'%count, single_special=False) + return (lig_glyph, components) + + def all_substitutions(self, glyph_ids): + gid_index_map = self.coverage.coverage_indices(glyph_ids) + ans = set() + for index in gid_index_map.itervalues(): + for glyph_id, components in self.coverage_to_lig_map[index]: + if set(components).issubset(glyph_ids): + ans.add(glyph_id) + return ans + +class ContexttualSubstitution(UnknownLookupSubTable): + + formats = {1, 2, 3} + + @property + def has_initial_coverage(self): + return self.format != 3 + + def initialize(self, data): + pass # TODO + + def all_substitutions(self, glyph_ids): + # This table only defined substitution in terms of other tables + return set() + + +class ChainingContextualSubstitution(UnknownLookupSubTable): + + formats = {1, 2, 3} + + @property + def has_initial_coverage(self): + return self.format != 3 + + def initialize(self, data): + pass # TODO + + def all_substitutions(self, glyph_ids): + # This table only defined substitution in terms of other tables + return set() + +class ReverseChainSingleSubstitution(UnknownLookupSubTable): + + formats = {1} + + def initialize(self, data): + backtrack_count = data.unpack('H') + backtrack_offsets = data.unpack('%dH'%backtrack_count, + single_special=False) + lookahead_count = data.unpack('H') + lookahead_offsets = data.unpack('%dH'%lookahead_count, + single_special=False) + backtrack_offsets = [data.start_pos + x for x in backtrack_offsets] + lookahead_offsets = [data.start_pos + x for x in lookahead_offsets] + backtrack_offsets, lookahead_offsets # TODO: Use these + count = data.unpack('H') + self.substitutes = data.unpack('%dH'%count) + + def all_substitutions(self, glyph_ids): + gid_index_map = self.coverage.coverage_indices(glyph_ids) + return {self.substitutes[i] for i in gid_index_map.itervalues()} + +subtable_map = { + 1: SingleSubstitution, + 2: MultipleSubstitution, + 3: AlternateSubstitution, + 4: LigatureSubstitution, + 5: ContexttualSubstitution, + 6: ChainingContextualSubstitution, + 8: ReverseChainSingleSubstitution, +} + +class GSUBLookupTable(LookupTable): + + def set_child_class(self): + if self.lookup_type == 7: + self.child_class = partial(ExtensionSubstitution, + subtable_map=subtable_map) + else: + self.child_class = subtable_map[self.lookup_type] + +class LookupListTable(SimpleListTable): + + child_class = GSUBLookupTable + +class GSUBTable(UnknownTable): + + version = FixedProperty('_version') + + def decompile(self): + (self._version, self.scriptlist_offset, self.featurelist_offset, + self.lookuplist_offset) = unpack_from(b'>L3H', self.raw) + if self._version != 0x10000: + raise UnsupportedFont('The GSUB table has unknown version: 0x%x'% + self._version) + + self.script_list_table = ScriptListTable(self.raw, + self.scriptlist_offset) + # self.script_list_table.dump() + + self.feature_list_table = FeatureListTable(self.raw, + self.featurelist_offset) + # self.feature_list_table.dump() + + self.lookup_list_table = LookupListTable(self.raw, + self.lookuplist_offset) + + def all_substitutions(self, glyph_ids): + ans = set() + glyph_ids = frozenset(glyph_ids) + for lookup_table in self.lookup_list_table: + for subtable in lookup_table: + gids = subtable.all_substitutions(glyph_ids) + ans |= gids + return ans + diff --git a/src/calibre/utils/fonts/sfnt/subset.py b/src/calibre/utils/fonts/sfnt/subset.py index 829963700e..1b4a351dab 100644 --- a/src/calibre/utils/fonts/sfnt/subset.py +++ b/src/calibre/utils/fonts/sfnt/subset.py @@ -7,16 +7,18 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import traceback from collections import OrderedDict from operator import itemgetter +from functools import partial from calibre.utils.fonts.sfnt.container import Sfnt from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs # TrueType outlines {{{ -def resolve_glyphs(loca, glyf, character_map): - unresolved_glyphs = set(character_map.itervalues()) +def resolve_glyphs(loca, glyf, character_map, extra_glyphs): + unresolved_glyphs = set(character_map.itervalues()) | extra_glyphs unresolved_glyphs.add(0) # We always want the .notdef glyph resolved_glyphs = {} @@ -26,11 +28,7 @@ def resolve_glyphs(loca, glyf, character_map): offset, length = loca.glyph_location(glyph_id) except (IndexError, ValueError, KeyError, TypeError): continue - if length < 1: - continue glyph = glyf.glyph_data(offset, length) - if len(glyph) == 0: - continue resolved_glyphs[glyph_id] = glyph for gid in glyph.glyph_indices: if gid not in resolved_glyphs: @@ -38,7 +36,7 @@ def resolve_glyphs(loca, glyf, character_map): return OrderedDict(sorted(resolved_glyphs.iteritems(), key=itemgetter(0))) -def subset_truetype(sfnt, character_map): +def subset_truetype(sfnt, character_map, extra_glyphs): loca = sfnt[b'loca'] glyf = sfnt[b'glyf'] @@ -48,7 +46,7 @@ def subset_truetype(sfnt, character_map): raise UnsupportedFont('This font does not contain head and/or maxp tables') loca.load_offsets(head, maxp) - resolved_glyphs = resolve_glyphs(loca, glyf, character_map) + resolved_glyphs = resolve_glyphs(loca, glyf, character_map, extra_glyphs) if not resolved_glyphs or set(resolved_glyphs) == {0}: raise NoGlyphs('This font has no glyphs for the specified character ' 'set, subsetting it is pointless') @@ -66,26 +64,33 @@ def subset_truetype(sfnt, character_map): # }}} -def subset_postscript(sfnt, character_map): +def subset_postscript(sfnt, character_map, extra_glyphs): cff = sfnt[b'CFF '] cff.decompile() - cff.subset(character_map) + cff.subset(character_map, extra_glyphs) -def subset(raw, individual_chars, ranges=()): - chars = list(map(ord, individual_chars)) +def do_warn(warnings, *args): + for arg in args: + for line in arg.splitlines(): + if warnings is None: + print(line) + else: + warnings.append(line) + if warnings is None: + print() + else: + warnings.append('') + +def subset(raw, individual_chars, ranges=(), warnings=None): + warn = partial(do_warn, warnings) + + chars = set(map(ord, individual_chars)) for r in ranges: - chars += list(xrange(ord(r[0]), ord(r[1])+1)) + chars |= set(xrange(ord(r[0]), ord(r[1])+1)) - # Hack pending parsing of the GSUB table, manually add in a few common - # ligatures - ligatures = {'AE':'Æ', 'ae':'æ', 'OE':'Œ', 'IJ':'IJ', 'ij':'ij', 'ue':'ᵫ', - 'ff':'ff', 'fi':'fi', 'fl':'fl', 'ffi':'ffi', 'ffl':'ffl', 'st':'st'} - all_chars = set(chars) - for ichars, lig in ligatures.iteritems(): - ichars = frozenset(map(ord, ichars)) - if ichars.issubset(all_chars) and ord(lig) not in all_chars: - all_chars.add(ord(lig)) - chars.append(ord(lig)) + # Always add the space character for ease of use from the command line + if ord(' ') not in chars: + chars.add(ord(' ')) sfnt = Sfnt(raw) old_sizes = sfnt.sizes() @@ -113,12 +118,26 @@ def subset(raw, individual_chars, ranges=()): # Get mapping of chars to glyph ids for all specified chars character_map = cmap.get_character_map(chars) + extra_glyphs = set() + + if b'GSUB' in sfnt: + # Parse all substitution rules to ensure that glyphs that can be + # substituted for the specified set of glyphs are not removed + gsub = sfnt[b'GSUB'] + try: + gsub.decompile() + extra_glyphs = gsub.all_substitutions(character_map.itervalues()) + except UnsupportedFont as e: + warn('Usupported GSUB table: %s'%e) + except Exception as e: + warn('Failed to decompile GSUB table:', traceback.format_exc()) + if b'loca' in sfnt and b'glyf' in sfnt: # TrueType Outlines - subset_truetype(sfnt, character_map) + subset_truetype(sfnt, character_map, extra_glyphs) elif b'CFF ' in sfnt: # PostScript Outlines - subset_postscript(sfnt, character_map) + subset_postscript(sfnt, character_map, extra_glyphs) else: raise UnsupportedFont('This font does not contain TrueType ' 'or PostScript outlines') @@ -130,11 +149,10 @@ def subset(raw, individual_chars, ranges=()): try: sfnt[b'kern'].restrict_to_glyphs(frozenset(character_map.itervalues())) except UnsupportedFont as e: - print ('Subsetting of kern table failed, ignoring: %s'%e) + warn('kern table unsupported, ignoring: %s'%e) except Exception as e: - print ('Subsetting of kern table failed, ignoring') - import traceback - traceback.print_exc() + warn('Subsetting of kern table failed, ignoring:', + traceback.format_exc()) raw, new_sizes = sfnt() return raw, old_sizes, new_sizes @@ -264,14 +282,20 @@ def all(): from calibre.utils.fonts.scanner import font_scanner failed = [] unsupported = [] + warnings = {} total = 0 + averages = [] for family in font_scanner.find_font_families(): for font in font_scanner.fonts_for_family(family): raw = font_scanner.get_font_data(font) print ('Subsetting', font['full_name'], end='\t') total += 1 try: - sf, old_stats, new_stats = subset(raw, set(('a', 'b', 'c')), ()) + w = [] + sf, old_stats, new_stats = subset(raw, set(('a', 'b', 'c')), + (), w) + if w: + warnings[font['full_name'] + ' (%s)'%font['path']] = w except NoGlyphs: print('No glyphs!') continue @@ -283,22 +307,29 @@ def all(): print ('Failed!') failed.append((font['full_name'], font['path'], unicode(e))) else: - print ('Reduced to:', '%.1f'%( - sum(new_stats.itervalues())/sum(old_stats.itervalues()) - * 100), '%') + averages.append(sum(new_stats.itervalues())/sum(old_stats.itervalues()) + * 100) + print ('Reduced to:', '%.1f'%averages[-1] , '%') if unsupported: print ('\n\nUnsupported:') for name, path, err in unsupported: print (name, path, err) print() + if warnings: + print ('\n\nWarnings:') + for name, w in warnings.iteritems(): + if w: + print (name) + print('', '\n\t'.join(w), sep='\t') if failed: print ('\n\nFailures:') for name, path, err in failed: print (name, path, err) print() + print ('Average reduction to: %.1f%%'%( sum(averages)/len(averages))) print('Total:', total, 'Unsupported:', len(unsupported), 'Failed:', - len(failed)) + len(failed), 'Warnings:', len(warnings)) # }}}