Font subsetting: Add support for WOFF format fonts and CID keyed fonts. Also further reduce font file sizes when subsetting.

Courtesy of switching to fonttools as the subsetting engine. They now
support adding glyphs from GSUB/GPOS tables so should be much more
robust than the last time I looked into fonttools and decided to write
my own subsetting code instead.
This commit is contained in:
Kovid Goyal 2023-04-23 20:35:33 +05:30
parent ee553442b7
commit 81c9c9c112
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 31 additions and 29 deletions

View File

@ -5,15 +5,16 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, sys import os
import sys
from io import BytesIO
from calibre import prints, as_unicode from calibre import as_unicode, prints
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPath, css_text from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, XPath, css_text
from calibre.ebooks.oeb.polish.utils import guess_type, OEB_FONTS from calibre.ebooks.oeb.polish.utils import OEB_FONTS
from calibre.utils.fonts.sfnt.subset import subset from calibre.utils.fonts.subset import subset
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from calibre.utils.fonts.utils import get_font_names from calibre.utils.fonts.utils import get_font_names
from polyglot.builtins import iteritems, itervalues from polyglot.builtins import iteritems
def remove_font_face_rules(container, sheet, remove_names, base): def remove_font_face_rules(container, sheet, remove_names, base):
@ -33,9 +34,8 @@ def remove_font_face_rules(container, sheet, remove_names, base):
def iter_subsettable_fonts(container): def iter_subsettable_fonts(container):
woff_font_types = guess_type('a.woff'), guess_type('a.woff2')
for name, mt in iteritems(container.mime_map): for name, mt in iteritems(container.mime_map):
if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}) and mt not in woff_font_types: if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}):
yield name, mt yield name, mt
@ -63,20 +63,22 @@ def subset_all_fonts(container, font_stats, report):
continue continue
warnings = [] warnings = []
report('Subsetting font: %s'%(font_name or name)) report('Subsetting font: %s'%(font_name or name))
font_type = os.path.splitext(name)[1][1:].lower()
output = BytesIO()
try: try:
nraw, old_sizes, new_sizes = subset(raw, chars, warnings = subset(BytesIO(raw), output, font_type, chars)
warnings=warnings) except Exception as e:
except UnsupportedFont as e:
report( report(
'Unsupported font: %s, ignoring. Error: %s'%( 'Unsupported font: %s, ignoring. Error: %s'%(
name, as_unicode(e))) name, as_unicode(e)))
continue continue
nraw = output.getvalue()
total_old += font_size total_old += font_size
for w in warnings: for w in warnings:
report(w) report(w)
olen = sum(itervalues(old_sizes)) olen = len(raw)
nlen = sum(itervalues(new_sizes)) nlen = len(nraw)
total_new += len(nraw) total_new += len(nraw)
if nlen == olen: if nlen == olen:
report(_('The font %s was already subset')%font_name) report(_('The font %s was already subset')%font_name)

View File

@ -5,11 +5,13 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os
from collections import defaultdict from collections import defaultdict
from io import BytesIO
from calibre.ebooks.oeb.base import urlnormalize, css_text from calibre.ebooks.oeb.base import css_text, urlnormalize
from calibre.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont from calibre.utils.fonts.subset import subset
from polyglot.builtins import iteritems, itervalues from polyglot.builtins import iteritems
from tinycss.fonts3 import parse_font_family from tinycss.fonts3 import parse_font_family
@ -150,27 +152,25 @@ class SubsetFonts:
else: else:
fonts[item.href] = font fonts[item.href] = font
for font in itervalues(fonts): for font in fonts.values():
if not font['chars']: if not font['chars']:
self.log('The font %s is unused. Removing it.'%font['src']) self.log('The font %s is unused. Removing it.'%font['src'])
remove(font) remove(font)
continue continue
old_raw = font['item'].data
output = BytesIO()
font_type = os.path.splitext(font['item'].href)[1][1:].lower()
try: try:
raw, old_stats, new_stats = subset(font['item'].data, font['chars']) subset(BytesIO(old_raw), output, font_type, font['chars'])
except NoGlyphs: except Exception as e:
self.log('The font %s has no used glyphs. Removing it.'%font['src']) self.log.warn('The font %s is unsupported for subsetting. %s'%(font['src'], e))
remove(font)
continue
except UnsupportedFont as e:
self.log.warn('The font %s is unsupported for subsetting. %s'%(
font['src'], e))
sz = len(font['item'].data) sz = len(font['item'].data)
totals[0] += sz totals[0] += sz
totals[1] += sz totals[1] += sz
else: else:
font['item'].data = raw font['item'].data = output.getvalue()
nlen = sum(itervalues(new_stats)) nlen = len(font['item'].data)
olen = sum(itervalues(old_stats)) olen = len(old_raw)
self.log('Decreased the font %s to %.1f%% of its original size'% self.log('Decreased the font %s to %.1f%% of its original size'%
(font['src'], nlen/olen *100)) (font['src'], nlen/olen *100))
totals[0] += nlen totals[0] += nlen