From b4a49e5cdde70a7284ea54e7a664e925ddd8429c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 10 Nov 2012 17:32:13 +0530
Subject: [PATCH] Font subsetting: Parse the GSUB table for glyph substituion
 rules and add do not remove any glyphs that could act as substitutes. Keep
 zero length glyphs like the glyphs for non prinatable characters when
 subsetting TrueType outlines.

---
 src/calibre/utils/fonts/sfnt/cff/table.py |   5 +-
 src/calibre/utils/fonts/sfnt/common.py    | 240 ++++++++++++++++++++++
 src/calibre/utils/fonts/sfnt/container.py |   2 +
 src/calibre/utils/fonts/sfnt/gsub.py      | 180 ++++++++++++++++
 src/calibre/utils/fonts/sfnt/subset.py    |  99 ++++++---
 5 files changed, 491 insertions(+), 35 deletions(-)
 create mode 100644 src/calibre/utils/fonts/sfnt/common.py
 create mode 100644 src/calibre/utils/fonts/sfnt/gsub.py
diff --git a/src/calibre/utils/fonts/sfnt/cff/table.py b/src/calibre/utils/fonts/sfnt/cff/table.py
index 63d85b65ec..fa3a5207a0 100644
--- a/src/calibre/utils/fonts/sfnt/cff/table.py
+++ b/src/calibre/utils/fonts/sfnt/cff/table.py
@@ -186,7 +186,7 @@ class CFFTable(UnknownTable):
     def decompile(self):
         self.cff = CFF(self.raw)
 
-    def subset(self, character_map):
+    def subset(self, character_map, extra_glyphs):
         from calibre.utils.fonts.sfnt.cff.writer import Subset
         # Map codes from the cmap table to glyph names, this will be used to
         # reconstruct character_map for the subset font
@@ -196,6 +196,9 @@ class CFFTable(UnknownTable):
         charset.discard(None)
         if not charset:
             raise NoGlyphs('This font has no glyphs for the specified characters')
+        charset |= {
+            self.cff.charset.safe_lookup(glyph_id) for glyph_id in extra_glyphs}
+        charset.discard(None)
         s = Subset(self.cff, charset)
 
         # Rebuild character_map with the glyph ids from the subset font
diff --git a/src/calibre/utils/fonts/sfnt/common.py b/src/calibre/utils/fonts/sfnt/common.py
new file mode 100644
index 0000000000..49ba77a28e
--- /dev/null
+++ b/src/calibre/utils/fonts/sfnt/common.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from, calcsize
+from collections import OrderedDict, namedtuple
+
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+
+class Unpackable(object):
+
+    def __init__(self, raw, offset):
+        self.raw, self.offset = raw, offset
+        self.start_pos = offset
+
+    def unpack(self, fmt, single_special=True):
+        fmt = fmt.encode('ascii') if not isinstance(fmt, bytes) else fmt
+        ans = unpack_from(b'>'+fmt, self.raw, self.offset)
+        if single_special and len(ans) == 1:
+            ans = ans[0]
+        self.offset += calcsize(fmt)
+        return ans
+
+class SimpleListTable(list):
+
+    'A table that contains a list of subtables'
+
+    child_class = None
+
+    def __init__(self, raw, offset):
+        list.__init__(self)
+
+        data = Unpackable(raw, offset)
+        self.read_extra_header(data)
+
+        count = data.unpack('H')
+        for i in xrange(count):
+            offset = data.unpack('H')
+            self.append(self.child_class(raw, data.start_pos + offset))
+        self.read_extra_footer(data)
+
+    def read_extra_header(self, data):
+        pass
+
+    def read_extra_footer(self, data):
+        pass
+
+class ListTable(OrderedDict):
+
+    'A table that contains an ordered mapping of table tag to subtable'
+
+    child_class = None
+
+    def __init__(self, raw, offset):
+        OrderedDict.__init__(self)
+
+        data = Unpackable(raw, offset)
+        self.read_extra_header(data)
+
+        count = data.unpack('H')
+        for i in xrange(count):
+            tag, coffset = data.unpack('4sH')
+            self[tag] = self.child_class(raw, data.start_pos + coffset)
+
+        self.read_extra_footer(data)
+
+    def read_extra_header(self, data):
+        pass
+
+    def read_extra_footer(self, data):
+        pass
+
+    def dump(self, prefix=''):
+        print (prefix, self.__class__.__name__, sep='')
+        prefix += '  '
+        for tag, child in self.iteritems():
+            print (prefix, tag, sep='')
+            child.dump(prefix=prefix+'  ')
+
+
+class IndexTable(list):
+
+    def __init__(self, raw, offset):
+        data = Unpackable(raw, offset)
+        self.read_extra_header(data)
+
+        count = data.unpack('H')
+        for i in xrange(count):
+            self.append(data.unpack('H'))
+
+    def read_extra_header(self, data):
+        pass
+
+    def dump(self, prefix=''):
+        print(prefix, self.__class__.__name__, sep='')
+
+class LanguageSystemTable(IndexTable):
+
+    def read_extra_header(self, data):
+        self.lookup_order, self.required_feature_index = data.unpack('2H')
+        if self.lookup_order != 0:
+            raise UnsupportedFont('This LanguageSystemTable has an unknown'
+                    ' lookup order: 0x%x'%self.lookup_order)
+
+class ScriptTable(ListTable):
+
+    child_class = LanguageSystemTable
+
+    def __init__(self, raw, offset):
+        ListTable.__init__(self, raw, offset)
+
+    def read_extra_header(self, data):
+        start_pos = data.offset
+        default_offset = data.unpack('H')
+        self[b'default'] = (LanguageSystemTable(data.raw, start_pos +
+            default_offset) if default_offset else None)
+
+class ScriptListTable(ListTable):
+
+    child_class = ScriptTable
+
+class FeatureTable(IndexTable):
+
+    def read_extra_header(self, data):
+        self.feature_params = data.unpack('H')
+        if False and self.feature_params != 0:
+            # Source code pro sets this to non NULL
+            raise UnsupportedFont(
+                'This FeatureTable has non NULL FeatureParams: 0x%x'%self.feature_params)
+
+class FeatureListTable(ListTable):
+
+    child_class = FeatureTable
+
+class LookupTable(SimpleListTable):
+
+    def read_extra_header(self, data):
+        self.lookup_type, self.lookup_flag = data.unpack('2H')
+        self.set_child_class()
+
+    def set_child_class(self):
+        raise NotImplementedError()
+
+    def read_extra_footer(self, data):
+        if self.lookup_flag & 0x0010:
+            self.mark_filtering_set = data.unpack('H')
+
+def ExtensionSubstitution(raw, offset, subtable_map={}):
+    data = Unpackable(raw, offset)
+    subst_format, extension_lookup_type, offset = data.unpack('2HL')
+    if subst_format != 1:
+        raise UnsupportedFont('ExtensionSubstitution has unknown format: 0x%x'%subst_format)
+    return subtable_map[extension_lookup_type](raw, offset+data.start_pos)
+
+CoverageRange = namedtuple('CoverageRange', 'start end start_coverage_index')
+
+class Coverage(object):
+
+    def __init__(self, raw, offset, parent_table_name):
+        data = Unpackable(raw, offset)
+        self.format, count = data.unpack('2H')
+
+        if self.format not in {1, 2}:
+            raise UnsupportedFont('Unknown Coverage format: 0x%x in %s'%(
+                self.format, parent_table_name))
+        if self.format == 1:
+            self.glyph_ids = data.unpack('%dH'%count, single_special=False)
+            self.glyph_ids_map = {gid:i for i, gid in
+                    enumerate(self.glyph_ids)}
+        else:
+            self.ranges = []
+            ranges = data.unpack('%dH'%(3*count), single_special=False)
+            for i in xrange(count):
+                start, end, start_coverage_index = ranges[i*3:(i+1)*3]
+                self.ranges.append(CoverageRange(start, end, start_coverage_index))
+
+    def coverage_indices(self, glyph_ids):
+        '''Return map of glyph_id -> coverage index. Map contains only those
+        glyph_ids that are covered by this table and that are present in
+        glyph_ids.'''
+        ans = OrderedDict()
+        for gid in glyph_ids:
+            if self.format == 1:
+                idx = self.glyph_ids_map.get(gid, None)
+                if idx is not None:
+                    ans[gid] = idx
+            else:
+                for start, end, start_coverage_index in self.ranges:
+                    if start <= gid <= end:
+                        ans[gid] = start_coverage_index + (gid-start)
+        return ans
+
+class UnknownLookupSubTable(object):
+
+    formats = {}
+
+    def __init__(self, raw, offset):
+        data = Unpackable(raw, offset)
+        self.format = data.unpack('H')
+        if self.format not in self.formats:
+            raise UnsupportedFont('Unknown format for Lookup Subtable %s: 0x%x'%(
+                self.__class__.__name__, self.format))
+        if self.has_initial_coverage:
+            coverage_offset = data.unpack('H') + data.start_pos
+            self.coverage = Coverage(raw, coverage_offset, self.__class__.__name__)
+        self.initialize(data)
+
+    @property
+    def has_initial_coverage(self):
+        return True
+
+    def all_substitutions(self, glyph_ids):
+        ''' Return a set of all glyph ids that could be substituted for any
+        subset of the specified glyph ids (which must be a set)'''
+        raise NotImplementedError()
+
+    def read_sets(self, data, read_item=None, set_is_index=False):
+        count = data.unpack('H')
+        sets = data.unpack('%dH'%count, single_special=False)
+        coverage_to_items_map = []
+        for offset in sets:
+            # Read items in the set
+            data.offset = start_pos = offset + data.start_pos
+            count = data.unpack('H')
+            item_offsets = data.unpack('%dH'%count, single_special=False)
+            items = []
+            for offset in item_offsets:
+                data.offset = offset + start_pos
+                if set_is_index:
+                    items.append(offset)
+                else:
+                    items.append(read_item(data))
+            coverage_to_items_map.append(items)
+        return coverage_to_items_map
+
diff --git a/src/calibre/utils/fonts/sfnt/container.py b/src/calibre/utils/fonts/sfnt/container.py
index cf207d0bd1..92246fe1a9 100644
--- a/src/calibre/utils/fonts/sfnt/container.py
+++ b/src/calibre/utils/fonts/sfnt/container.py
@@ -22,6 +22,7 @@ from calibre.utils.fonts.sfnt.loca import LocaTable
 from calibre.utils.fonts.sfnt.glyf import GlyfTable
 from calibre.utils.fonts.sfnt.cmap import CmapTable
 from calibre.utils.fonts.sfnt.kern import KernTable
+from calibre.utils.fonts.sfnt.gsub import GSUBTable
 from calibre.utils.fonts.sfnt.cff.table import CFFTable
 
 # OpenType spec: http://www.microsoft.com/typography/otspec/otff.htm
@@ -46,6 +47,7 @@ class Sfnt(object):
                     b'cmap' : CmapTable,
                     b'CFF ' : CFFTable,
                     b'kern' : KernTable,
+                    b'GSUB' : GSUBTable,
                     }.get(table_tag, UnknownTable)(table)
 
     def __getitem__(self, key):
diff --git a/src/calibre/utils/fonts/sfnt/gsub.py b/src/calibre/utils/fonts/sfnt/gsub.py
new file mode 100644
index 0000000000..77d7db8519
--- /dev/null
+++ b/src/calibre/utils/fonts/sfnt/gsub.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from
+from functools import partial
+
+from calibre.utils.fonts.sfnt import UnknownTable, FixedProperty
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from calibre.utils.fonts.sfnt.common import (ScriptListTable, FeatureListTable,
+        SimpleListTable, LookupTable, ExtensionSubstitution,
+        UnknownLookupSubTable)
+
+class SingleSubstitution(UnknownLookupSubTable):
+
+    formats = {1, 2}
+
+    def initialize(self, data):
+        if self.format == 1:
+            self.delta = data.unpack('h')
+        else:
+            count = data.unpack('H')
+            self.substitutes = data.unpack('%dH'%count, single_special=False)
+
+    def all_substitutions(self, glyph_ids):
+        gid_index_map = self.coverage.coverage_indices(glyph_ids)
+        if self.format == 1:
+            return {gid + self.delta for gid in gid_index_map}
+        return {self.substitutes[i] for i in gid_index_map.itervalues()}
+
+class MultipleSubstitution(UnknownLookupSubTable):
+
+    formats = {1}
+
+    def initialize(self, data):
+        self.coverage_to_subs_map = self.read_sets(data, set_is_index=True)
+
+    def all_substitutions(self, glyph_ids):
+        gid_index_map = self.coverage.coverage_indices(glyph_ids)
+        ans = set()
+        for index in gid_index_map.itervalues():
+            glyphs = set(self.coverage_to_subs_map[index])
+            ans |= glyphs
+        return ans
+
+class AlternateSubstitution(MultipleSubstitution):
+    pass
+
+class LigatureSubstitution(UnknownLookupSubTable):
+
+    formats = {1}
+
+    def initialize(self, data):
+        self.coverage_to_lig_map = self.read_sets(data, self.read_ligature)
+
+    def read_ligature(self, data):
+        lig_glyph, count = data.unpack('HH')
+        components = data.unpack('%dH'%count, single_special=False)
+        return (lig_glyph, components)
+
+    def all_substitutions(self, glyph_ids):
+        gid_index_map = self.coverage.coverage_indices(glyph_ids)
+        ans = set()
+        for index in gid_index_map.itervalues():
+            for glyph_id, components in self.coverage_to_lig_map[index]:
+                if set(components).issubset(glyph_ids):
+                    ans.add(glyph_id)
+        return ans
+
+class ContexttualSubstitution(UnknownLookupSubTable):
+
+    formats = {1, 2, 3}
+
+    @property
+    def has_initial_coverage(self):
+        return self.format != 3
+
+    def initialize(self, data):
+        pass # TODO
+
+    def all_substitutions(self, glyph_ids):
+        # This table only defined substitution in terms of other tables
+        return set()
+
+
+class ChainingContextualSubstitution(UnknownLookupSubTable):
+
+    formats = {1, 2, 3}
+
+    @property
+    def has_initial_coverage(self):
+        return self.format != 3
+
+    def initialize(self, data):
+        pass # TODO
+
+    def all_substitutions(self, glyph_ids):
+        # This table only defined substitution in terms of other tables
+        return set()
+
+class ReverseChainSingleSubstitution(UnknownLookupSubTable):
+
+    formats = {1}
+
+    def initialize(self, data):
+        backtrack_count = data.unpack('H')
+        backtrack_offsets = data.unpack('%dH'%backtrack_count,
+                single_special=False)
+        lookahead_count = data.unpack('H')
+        lookahead_offsets = data.unpack('%dH'%lookahead_count,
+                single_special=False)
+        backtrack_offsets = [data.start_pos + x for x in backtrack_offsets]
+        lookahead_offsets = [data.start_pos + x for x in lookahead_offsets]
+        backtrack_offsets, lookahead_offsets # TODO: Use these
+        count = data.unpack('H')
+        self.substitutes = data.unpack('%dH'%count)
+
+    def all_substitutions(self, glyph_ids):
+        gid_index_map = self.coverage.coverage_indices(glyph_ids)
+        return {self.substitutes[i] for i in gid_index_map.itervalues()}
+
+subtable_map = {
+        1: SingleSubstitution,
+        2: MultipleSubstitution,
+        3: AlternateSubstitution,
+        4: LigatureSubstitution,
+        5: ContexttualSubstitution,
+        6: ChainingContextualSubstitution,
+        8: ReverseChainSingleSubstitution,
+}
+
+class GSUBLookupTable(LookupTable):
+
+    def set_child_class(self):
+        if self.lookup_type == 7:
+            self.child_class = partial(ExtensionSubstitution,
+                    subtable_map=subtable_map)
+        else:
+            self.child_class = subtable_map[self.lookup_type]
+
+class LookupListTable(SimpleListTable):
+
+    child_class = GSUBLookupTable
+
+class GSUBTable(UnknownTable):
+
+    version = FixedProperty('_version')
+
+    def decompile(self):
+        (self._version, self.scriptlist_offset, self.featurelist_offset,
+                self.lookuplist_offset) = unpack_from(b'>L3H', self.raw)
+        if self._version != 0x10000:
+            raise UnsupportedFont('The GSUB table has unknown version: 0x%x'%
+                    self._version)
+
+        self.script_list_table = ScriptListTable(self.raw,
+                self.scriptlist_offset)
+        # self.script_list_table.dump()
+
+        self.feature_list_table = FeatureListTable(self.raw,
+                self.featurelist_offset)
+        # self.feature_list_table.dump()
+
+        self.lookup_list_table = LookupListTable(self.raw,
+                self.lookuplist_offset)
+
+    def all_substitutions(self, glyph_ids):
+        ans = set()
+        glyph_ids = frozenset(glyph_ids)
+        for lookup_table in self.lookup_list_table:
+            for subtable in lookup_table:
+                gids = subtable.all_substitutions(glyph_ids)
+                ans |= gids
+        return ans
+
diff --git a/src/calibre/utils/fonts/sfnt/subset.py b/src/calibre/utils/fonts/sfnt/subset.py
index 829963700e..1b4a351dab 100644
--- a/src/calibre/utils/fonts/sfnt/subset.py
+++ b/src/calibre/utils/fonts/sfnt/subset.py
@@ -7,16 +7,18 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
+import traceback
 from collections import OrderedDict
 from operator import itemgetter
+from functools import partial
 
 from calibre.utils.fonts.sfnt.container import Sfnt
 from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
 
 # TrueType outlines {{{
 
-def resolve_glyphs(loca, glyf, character_map):
-    unresolved_glyphs = set(character_map.itervalues())
+def resolve_glyphs(loca, glyf, character_map, extra_glyphs):
+    unresolved_glyphs = set(character_map.itervalues()) | extra_glyphs
     unresolved_glyphs.add(0) # We always want the .notdef glyph
     resolved_glyphs = {}
 
@@ -26,11 +28,7 @@ def resolve_glyphs(loca, glyf, character_map):
             offset, length = loca.glyph_location(glyph_id)
         except (IndexError, ValueError, KeyError, TypeError):
             continue
-        if length < 1:
-            continue
         glyph = glyf.glyph_data(offset, length)
-        if len(glyph) == 0:
-            continue
         resolved_glyphs[glyph_id] = glyph
         for gid in glyph.glyph_indices:
             if gid not in resolved_glyphs:
@@ -38,7 +36,7 @@ def resolve_glyphs(loca, glyf, character_map):
 
     return OrderedDict(sorted(resolved_glyphs.iteritems(), key=itemgetter(0)))
 
-def subset_truetype(sfnt, character_map):
+def subset_truetype(sfnt, character_map, extra_glyphs):
     loca = sfnt[b'loca']
     glyf = sfnt[b'glyf']
 
@@ -48,7 +46,7 @@ def subset_truetype(sfnt, character_map):
         raise UnsupportedFont('This font does not contain head and/or maxp tables')
     loca.load_offsets(head, maxp)
 
-    resolved_glyphs = resolve_glyphs(loca, glyf, character_map)
+    resolved_glyphs = resolve_glyphs(loca, glyf, character_map, extra_glyphs)
     if not resolved_glyphs or set(resolved_glyphs) == {0}:
         raise NoGlyphs('This font has no glyphs for the specified character '
                 'set, subsetting it is pointless')
@@ -66,26 +64,33 @@ def subset_truetype(sfnt, character_map):
 
 # }}}
 
-def subset_postscript(sfnt, character_map):
+def subset_postscript(sfnt, character_map, extra_glyphs):
     cff = sfnt[b'CFF ']
     cff.decompile()
-    cff.subset(character_map)
+    cff.subset(character_map, extra_glyphs)
 
-def subset(raw, individual_chars, ranges=()):
-    chars = list(map(ord, individual_chars))
+def do_warn(warnings, *args):
+    for arg in args:
+        for line in arg.splitlines():
+            if warnings is None:
+                print(line)
+            else:
+                warnings.append(line)
+    if warnings is None:
+        print()
+    else:
+        warnings.append('')
+
+def subset(raw, individual_chars, ranges=(), warnings=None):
+    warn = partial(do_warn, warnings)
+
+    chars = set(map(ord, individual_chars))
     for r in ranges:
-        chars += list(xrange(ord(r[0]), ord(r[1])+1))
+        chars |= set(xrange(ord(r[0]), ord(r[1])+1))
 
-    # Hack pending parsing of the GSUB table, manually add in a few common
-    # ligatures
-    ligatures = {'AE':'Æ', 'ae':'æ', 'OE':'Œ', 'IJ':'Ĳ', 'ij':'ĳ', 'ue':'ᵫ',
-            'ff':'ﬀ', 'fi':'ﬁ', 'fl':'ﬂ', 'ffi':'ﬃ', 'ffl':'ﬄ', 'st':'ﬆ'}
-    all_chars = set(chars)
-    for ichars, lig in ligatures.iteritems():
-        ichars = frozenset(map(ord, ichars))
-        if ichars.issubset(all_chars) and ord(lig) not in all_chars:
-            all_chars.add(ord(lig))
-            chars.append(ord(lig))
+    # Always add the space character for ease of use from the command line
+    if ord(' ') not in chars:
+        chars.add(ord(' '))
 
     sfnt = Sfnt(raw)
     old_sizes = sfnt.sizes()
@@ -113,12 +118,26 @@ def subset(raw, individual_chars, ranges=()):
     # Get mapping of chars to glyph ids for all specified chars
     character_map = cmap.get_character_map(chars)
 
+    extra_glyphs = set()
+
+    if b'GSUB' in sfnt:
+        # Parse all substitution rules to ensure that glyphs that can be
+        # substituted for the specified set of glyphs are not removed
+        gsub = sfnt[b'GSUB']
+        try:
+            gsub.decompile()
+            extra_glyphs = gsub.all_substitutions(character_map.itervalues())
+        except UnsupportedFont as e:
+            warn('Usupported GSUB table: %s'%e)
+        except Exception as e:
+            warn('Failed to decompile GSUB table:', traceback.format_exc())
+
     if b'loca' in sfnt and b'glyf' in sfnt:
         # TrueType Outlines
-        subset_truetype(sfnt, character_map)
+        subset_truetype(sfnt, character_map, extra_glyphs)
     elif b'CFF ' in sfnt:
         # PostScript Outlines
-        subset_postscript(sfnt, character_map)
+        subset_postscript(sfnt, character_map, extra_glyphs)
     else:
         raise UnsupportedFont('This font does not contain TrueType '
                 'or PostScript outlines')
@@ -130,11 +149,10 @@ def subset(raw, individual_chars, ranges=()):
         try:
             sfnt[b'kern'].restrict_to_glyphs(frozenset(character_map.itervalues()))
         except UnsupportedFont as e:
-            print ('Subsetting of kern table failed, ignoring: %s'%e)
+            warn('kern table unsupported, ignoring: %s'%e)
         except Exception as e:
-            print ('Subsetting of kern table failed, ignoring')
-            import traceback
-            traceback.print_exc()
+            warn('Subsetting of kern table failed, ignoring:',
+                    traceback.format_exc())
 
     raw, new_sizes = sfnt()
     return raw, old_sizes, new_sizes
@@ -264,14 +282,20 @@ def all():
     from calibre.utils.fonts.scanner import font_scanner
     failed = []
     unsupported = []
+    warnings = {}
     total = 0
+    averages = []
     for family in font_scanner.find_font_families():
         for font in font_scanner.fonts_for_family(family):
             raw = font_scanner.get_font_data(font)
             print ('Subsetting', font['full_name'], end='\t')
             total += 1
             try:
-                sf, old_stats, new_stats = subset(raw, set(('a', 'b', 'c')), ())
+                w = []
+                sf, old_stats, new_stats = subset(raw, set(('a', 'b', 'c')),
+                        (), w)
+                if w:
+                    warnings[font['full_name'] + ' (%s)'%font['path']] = w
             except NoGlyphs:
                 print('No glyphs!')
                 continue
@@ -283,22 +307,29 @@ def all():
                 print ('Failed!')
                 failed.append((font['full_name'], font['path'], unicode(e)))
             else:
-                print ('Reduced to:', '%.1f'%(
-                        sum(new_stats.itervalues())/sum(old_stats.itervalues())
-                        * 100), '%')
+                averages.append(sum(new_stats.itervalues())/sum(old_stats.itervalues())
+                        * 100)
+                print ('Reduced to:', '%.1f'%averages[-1] , '%')
     if unsupported:
         print ('\n\nUnsupported:')
         for name, path, err in unsupported:
             print (name, path, err)
             print()
+    if warnings:
+        print ('\n\nWarnings:')
+    for name, w in warnings.iteritems():
+        if w:
+            print (name)
+            print('', '\n\t'.join(w), sep='\t')
     if failed:
         print ('\n\nFailures:')
         for name, path, err in failed:
             print (name, path, err)
             print()
 
+    print ('Average reduction to: %.1f%%'%( sum(averages)/len(averages)))
     print('Total:', total, 'Unsupported:', len(unsupported), 'Failed:',
-            len(failed))
+            len(failed), 'Warnings:', len(warnings))
 
 
 # }}}