Font subsetting: Add support for WOFF format fonts and CID keyed fonts. Also further reduce font file sizes when subsetting.

Courtesy of switching to fonttools as the subsetting engine. They now
support adding glyphs from GSUB/GPOS tables so should be much more
robust than the last time I looked into fonttools and decided to write
my own subsetting code instead.
This commit is contained in:
Kovid Goyal 2023-04-23 20:35:33 +05:30
parent ee553442b7
commit 81c9c9c112
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 31 additions and 29 deletions

View File

@ -5,15 +5,16 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, sys
import os
import sys
from io import BytesIO
from calibre import prints, as_unicode
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPath, css_text
from calibre.ebooks.oeb.polish.utils import guess_type, OEB_FONTS
from calibre.utils.fonts.sfnt.subset import subset
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from calibre import as_unicode, prints
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, XPath, css_text
from calibre.ebooks.oeb.polish.utils import OEB_FONTS
from calibre.utils.fonts.subset import subset
from calibre.utils.fonts.utils import get_font_names
from polyglot.builtins import iteritems, itervalues
from polyglot.builtins import iteritems
def remove_font_face_rules(container, sheet, remove_names, base):
@ -33,9 +34,8 @@ def remove_font_face_rules(container, sheet, remove_names, base):
def iter_subsettable_fonts(container):
woff_font_types = guess_type('a.woff'), guess_type('a.woff2')
for name, mt in iteritems(container.mime_map):
if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}) and mt not in woff_font_types:
if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}):
yield name, mt
@ -63,20 +63,22 @@ def subset_all_fonts(container, font_stats, report):
continue
warnings = []
report('Subsetting font: %s'%(font_name or name))
font_type = os.path.splitext(name)[1][1:].lower()
output = BytesIO()
try:
nraw, old_sizes, new_sizes = subset(raw, chars,
warnings=warnings)
except UnsupportedFont as e:
warnings = subset(BytesIO(raw), output, font_type, chars)
except Exception as e:
report(
'Unsupported font: %s, ignoring. Error: %s'%(
name, as_unicode(e)))
continue
nraw = output.getvalue()
total_old += font_size
for w in warnings:
report(w)
olen = sum(itervalues(old_sizes))
nlen = sum(itervalues(new_sizes))
olen = len(raw)
nlen = len(nraw)
total_new += len(nraw)
if nlen == olen:
report(_('The font %s was already subset')%font_name)

View File

@ -5,11 +5,13 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from collections import defaultdict
from io import BytesIO
from calibre.ebooks.oeb.base import urlnormalize, css_text
from calibre.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont
from polyglot.builtins import iteritems, itervalues
from calibre.ebooks.oeb.base import css_text, urlnormalize
from calibre.utils.fonts.subset import subset
from polyglot.builtins import iteritems
from tinycss.fonts3 import parse_font_family
@ -150,27 +152,25 @@ class SubsetFonts:
else:
fonts[item.href] = font
for font in itervalues(fonts):
for font in fonts.values():
if not font['chars']:
self.log('The font %s is unused. Removing it.'%font['src'])
remove(font)
continue
old_raw = font['item'].data
output = BytesIO()
font_type = os.path.splitext(font['item'].href)[1][1:].lower()
try:
raw, old_stats, new_stats = subset(font['item'].data, font['chars'])
except NoGlyphs:
self.log('The font %s has no used glyphs. Removing it.'%font['src'])
remove(font)
continue
except UnsupportedFont as e:
self.log.warn('The font %s is unsupported for subsetting. %s'%(
font['src'], e))
subset(BytesIO(old_raw), output, font_type, font['chars'])
except Exception as e:
self.log.warn('The font %s is unsupported for subsetting. %s'%(font['src'], e))
sz = len(font['item'].data)
totals[0] += sz
totals[1] += sz
else:
font['item'].data = raw
nlen = sum(itervalues(new_stats))
olen = sum(itervalues(old_stats))
font['item'].data = output.getvalue()
nlen = len(font['item'].data)
olen = len(old_raw)
self.log('Decreased the font %s to %.1f%% of its original size'%
(font['src'], nlen/olen *100))
totals[0] += nlen