PDF Output: Implement font subsetting

2025-07-09 03:04:10 -04:00 · 2012-12-20 21:11:01 +05:30 · 2012-12-20 21:11:01 +05:30 · 8436bab0b6
commit 8436bab0b6
parent 5504db04a9
4 changed files with 24 additions and 5 deletions
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -1125,7 +1125,7 @@ OptionRecommendation(name='search_replace',
        RemoveFakeMargins()(self.oeb, self.log, self.opts)
        RemoveAdobeMargins()(self.oeb, self.log, self.opts)

-        if self.opts.subset_embedded_fonts:
+        if self.opts.subset_embedded_fonts and self.output_plugin.file_type != 'pdf':
            from calibre.ebooks.oeb.transforms.subset import SubsetFonts
            SubsetFonts()(self.oeb, self.log, self.opts)

--- a/src/calibre/ebooks/pdf/render/fonts.py
+++ b/src/calibre/ebooks/pdf/render/fonts.py
@ -15,6 +15,7 @@ from future_builtins import map

 from calibre.ebooks.pdf.render.common import (Array, String, Stream,
    Dictionary, Name)
+from calibre.utils.fonts.sfnt.subset import pdf_subset

 STANDARD_FONTS = {
    'Times-Roman', 'Helvetica', 'Courier', 'Symbol', 'Times-Bold',
@ -152,12 +153,13 @@ class Font(object):
        # TODO: Subsetting and OpenType
        self.font_descriptor['FontFile2'] = objects.add(self.font_stream)
        self.write_widths(objects)
-        self.write_to_unicode(objects)
+        glyph_map = self.metrics.sfnt['cmap'].get_char_codes(self.used_glyphs)
+        self.write_to_unicode(objects, glyph_map)
+        pdf_subset(self.metrics.sfnt, set(glyph_map))
        self.metrics.os2.zero_fstype()
        self.metrics.sfnt(self.font_stream)

-    def write_to_unicode(self, objects):
-        glyph_map = self.metrics.sfnt['cmap'].get_char_codes(self.used_glyphs)
+    def write_to_unicode(self, objects, glyph_map):
        glyph_map = {k:unicodedata.normalize('NFKC', unichr(v)) for k, v in
                     glyph_map.iteritems()}
        cmap = CMap(self.metrics.postscript_name, glyph_map, compress=self.compress)
--- a/src/calibre/utils/fonts/sfnt/cff/table.py
+++ b/src/calibre/utils/fonts/sfnt/cff/table.py
@ -194,7 +194,7 @@ class CFFTable(UnknownTable):
                glyph_id in character_map.iteritems()}
        charset = set(charset_map.itervalues())
        charset.discard(None)
-        if not charset:
+        if not charset and character_map:
            raise NoGlyphs('This font has no glyphs for the specified characters')
        charset |= {
            self.cff.charset.safe_lookup(glyph_id) for glyph_id in extra_glyphs}
--- a/src/calibre/utils/fonts/sfnt/subset.py
+++ b/src/calibre/utils/fonts/sfnt/subset.py
@ -81,6 +81,23 @@ def do_warn(warnings, *args):
    else:
        warnings.append('')

+def pdf_subset(sfnt, glyphs):
+    for tag in tuple(sfnt.tables):
+        if tag not in {b'hhea', b'head', b'hmtx', b'maxp', b'name',
+                       b'OS/2', b'post', b'cvt', b'fpgm', b'glyf', b'loca',
+                       b'prep', b'CFF ', b'VORG'}:
+            # Remove non core tables since they are unused in PDF rendering
+            del sfnt[tag]
+    if b'loca' in sfnt and b'glyf' in sfnt:
+        # TrueType Outlines
+        subset_truetype(sfnt, {}, glyphs)
+    elif b'CFF ' in sfnt:
+        # PostScript Outlines
+        subset_postscript(sfnt, {}, glyphs)
+    else:
+        raise UnsupportedFont('This font does not contain TrueType '
+                'or PostScript outlines')
+
 def subset(raw, individual_chars, ranges=(), warnings=None):
    warn = partial(do_warn, warnings)