Font subsetting: Add support for WOFF format fonts and CID keyed fonts. Also further reduce font file sizes when subsetting.

Courtesy of switching to fonttools as the subsetting engine. They now support adding glyphs from GSUB/GPOS tables so should be much more robust than the last time I looked into fonttools and decided to write my own subsetting code instead.
2025-07-07 10:14:46 -04:00 · 2023-04-23 20:35:33 +05:30 · 2023-04-23 20:35:33 +05:30 · 81c9c9c112
commit 81c9c9c112
parent ee553442b7
2 changed files with 31 additions and 29 deletions
--- a/src/calibre/ebooks/oeb/polish/subset.py
+++ b/src/calibre/ebooks/oeb/polish/subset.py
@ -5,15 +5,16 @@ __license__   = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, sys
+import os
 import sys
 from io import BytesIO
-from calibre import prints, as_unicode
+from calibre import as_unicode, prints
-from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPath, css_text
+from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, XPath, css_text
-from calibre.ebooks.oeb.polish.utils import guess_type, OEB_FONTS
+from calibre.ebooks.oeb.polish.utils import OEB_FONTS
-from calibre.utils.fonts.sfnt.subset import subset
+from calibre.utils.fonts.subset import subset
 from calibre.utils.fonts.sfnt.errors import UnsupportedFont
 from calibre.utils.fonts.utils import get_font_names
-from polyglot.builtins import iteritems, itervalues
+from polyglot.builtins import iteritems
 def remove_font_face_rules(container, sheet, remove_names, base):
@ -33,9 +34,8 @@ def remove_font_face_rules(container, sheet, remove_names, base):
 def iter_subsettable_fonts(container):
    woff_font_types = guess_type('a.woff'), guess_type('a.woff2')
    for name, mt in iteritems(container.mime_map):
-        if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}) and mt not in woff_font_types:
+        if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}):
            yield name, mt
@ -63,20 +63,22 @@ def subset_all_fonts(container, font_stats, report):
                continue
            warnings = []
            report('Subsetting font: %s'%(font_name or name))
            font_type = os.path.splitext(name)[1][1:].lower()
            output = BytesIO()
            try:
-                nraw, old_sizes, new_sizes = subset(raw, chars,
+                warnings = subset(BytesIO(raw), output, font_type, chars)
-                                                warnings=warnings)
+            except Exception as e:
            except UnsupportedFont as e:
                report(
                    'Unsupported font: %s, ignoring. Error: %s'%(
                        name, as_unicode(e)))
                continue
            nraw = output.getvalue()
            total_old += font_size
            for w in warnings:
                report(w)
-            olen = sum(itervalues(old_sizes))
+            olen = len(raw)
-            nlen = sum(itervalues(new_sizes))
+            nlen = len(nraw)
            total_new += len(nraw)
            if nlen == olen:
                report(_('The font %s was already subset')%font_name)
--- a/src/calibre/ebooks/oeb/transforms/subset.py
+++ b/src/calibre/ebooks/oeb/transforms/subset.py
@ -5,11 +5,13 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os
 from collections import defaultdict
 from io import BytesIO
-from calibre.ebooks.oeb.base import urlnormalize, css_text
+from calibre.ebooks.oeb.base import css_text, urlnormalize
-from calibre.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont
+from calibre.utils.fonts.subset import subset
-from polyglot.builtins import iteritems, itervalues
+from polyglot.builtins import iteritems
 from tinycss.fonts3 import parse_font_family
@ -150,27 +152,25 @@ class SubsetFonts:
            else:
                fonts[item.href] = font
-        for font in itervalues(fonts):
+        for font in fonts.values():
            if not font['chars']:
                self.log('The font %s is unused. Removing it.'%font['src'])
                remove(font)
                continue
            old_raw = font['item'].data
            output = BytesIO()
            font_type = os.path.splitext(font['item'].href)[1][1:].lower()
            try:
-                raw, old_stats, new_stats = subset(font['item'].data, font['chars'])
+                subset(BytesIO(old_raw), output, font_type, font['chars'])
-            except NoGlyphs:
+            except Exception as e:
-                self.log('The font %s has no used glyphs. Removing it.'%font['src'])
+                self.log.warn('The font %s is unsupported for subsetting. %s'%(font['src'], e))
                remove(font)
                continue
            except UnsupportedFont as e:
                self.log.warn('The font %s is unsupported for subsetting. %s'%(
                    font['src'], e))
                sz = len(font['item'].data)
                totals[0] += sz
                totals[1] += sz
            else:
-                font['item'].data = raw
+                font['item'].data = output.getvalue()
-                nlen = sum(itervalues(new_stats))
+                nlen = len(font['item'].data)
-                olen = sum(itervalues(old_stats))
+                olen = len(old_raw)
                self.log('Decreased the font %s to %.1f%% of its original size'%
                        (font['src'], nlen/olen *100))
                totals[0] += nlen