Font subsetting: Add support for WOFF format fonts and CID keyed fonts. Also further reduce font file sizes when subsetting.

Courtesy of switching to fonttools as the subsetting engine. They now support adding glyphs from GSUB/GPOS tables so should be much more robust than the last time I looked into fonttools and decided to write my own subsetting code instead.
2025-11-10 00:33:28 -05:00 · 2023-04-23 20:35:33 +05:30 · 2023-04-23 20:35:33 +05:30 · 81c9c9c112
commit 81c9c9c112
parent ee553442b7
2 changed files with 31 additions and 29 deletions
--- a/src/calibre/ebooks/oeb/polish/subset.py
+++ b/src/calibre/ebooks/oeb/polish/subset.py
@ -5,15 +5,16 @@ __license__   = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, sys
+import os
+import sys
+from io import BytesIO

-from calibre import prints, as_unicode
-from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPath, css_text
-from calibre.ebooks.oeb.polish.utils import guess_type, OEB_FONTS
-from calibre.utils.fonts.sfnt.subset import subset
-from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from calibre import as_unicode, prints
+from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, XPath, css_text
+from calibre.ebooks.oeb.polish.utils import OEB_FONTS
+from calibre.utils.fonts.subset import subset
 from calibre.utils.fonts.utils import get_font_names
-from polyglot.builtins import iteritems, itervalues
+from polyglot.builtins import iteritems


 def remove_font_face_rules(container, sheet, remove_names, base):
@ -33,9 +34,8 @@ def remove_font_face_rules(container, sheet, remove_names, base):


 def iter_subsettable_fonts(container):
-    woff_font_types = guess_type('a.woff'), guess_type('a.woff2')
    for name, mt in iteritems(container.mime_map):
-        if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}) and mt not in woff_font_types:
+        if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}):
            yield name, mt


@ -63,20 +63,22 @@ def subset_all_fonts(container, font_stats, report):
                continue
            warnings = []
            report('Subsetting font: %s'%(font_name or name))
+            font_type = os.path.splitext(name)[1][1:].lower()
+            output = BytesIO()
            try:
-                nraw, old_sizes, new_sizes = subset(raw, chars,
-                                                warnings=warnings)
-            except UnsupportedFont as e:
+                warnings = subset(BytesIO(raw), output, font_type, chars)
+            except Exception as e:
                report(
                    'Unsupported font: %s, ignoring. Error: %s'%(
                        name, as_unicode(e)))
                continue
+            nraw = output.getvalue()
            total_old += font_size

            for w in warnings:
                report(w)
-            olen = sum(itervalues(old_sizes))
-            nlen = sum(itervalues(new_sizes))
+            olen = len(raw)
+            nlen = len(nraw)
            total_new += len(nraw)
            if nlen == olen:
                report(_('The font %s was already subset')%font_name)
--- a/src/calibre/ebooks/oeb/transforms/subset.py
+++ b/src/calibre/ebooks/oeb/transforms/subset.py
@ -5,11 +5,13 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

+import os
 from collections import defaultdict
+from io import BytesIO

-from calibre.ebooks.oeb.base import urlnormalize, css_text
-from calibre.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont
-from polyglot.builtins import iteritems, itervalues
+from calibre.ebooks.oeb.base import css_text, urlnormalize
+from calibre.utils.fonts.subset import subset
+from polyglot.builtins import iteritems
 from tinycss.fonts3 import parse_font_family


@ -150,27 +152,25 @@ class SubsetFonts:
            else:
                fonts[item.href] = font

-        for font in itervalues(fonts):
+        for font in fonts.values():
            if not font['chars']:
                self.log('The font %s is unused. Removing it.'%font['src'])
                remove(font)
                continue
+            old_raw = font['item'].data
+            output = BytesIO()
+            font_type = os.path.splitext(font['item'].href)[1][1:].lower()
            try:
-                raw, old_stats, new_stats = subset(font['item'].data, font['chars'])
-            except NoGlyphs:
-                self.log('The font %s has no used glyphs. Removing it.'%font['src'])
-                remove(font)
-                continue
-            except UnsupportedFont as e:
-                self.log.warn('The font %s is unsupported for subsetting. %s'%(
-                    font['src'], e))
+                subset(BytesIO(old_raw), output, font_type, font['chars'])
+            except Exception as e:
+                self.log.warn('The font %s is unsupported for subsetting. %s'%(font['src'], e))
                sz = len(font['item'].data)
                totals[0] += sz
                totals[1] += sz
            else:
-                font['item'].data = raw
-                nlen = sum(itervalues(new_stats))
-                olen = sum(itervalues(old_stats))
+                font['item'].data = output.getvalue()
+                nlen = len(font['item'].data)
+                olen = len(old_raw)
                self.log('Decreased the font %s to %.1f%% of its original size'%
                        (font['src'], nlen/olen *100))
                totals[0] += nlen