PDF Output: Fix incorrect rendering when the same font is used at multiple sizes in the document

Chromium produces width arrays in the font descriptors that contain different values for the same glyph in different font subsets, so rather than merging the arrays, use the actual width values from the font. As per the PDF spec the values in these arrays must match the underlying font anyway.
2025-06-23 15:30:45 -04:00 · 2019-09-07 12:50:59 +05:30 · 2019-09-07 12:50:59 +05:30 · f10e278d74
commit f10e278d74
parent 7366d8f57a
5 changed files with 126 additions and 35 deletions
--- a/src/calibre/ebooks/pdf/html_writer.py
+++ b/src/calibre/ebooks/pdf/html_writer.py
@ -657,6 +657,22 @@ class Range(object):
        return len(self.widths) == 1
 def all_glyph_ids_in_w_arrays(arrays):
    ans = set()
    for w in arrays:
        i = 0
        while i + 1 < len(w):
            elem = w[i]
            next_elem = w[i+1]
            if isinstance(next_elem, list):
                ans |= set(range(elem, elem + len(next_elem)))
                i += 2
            else:
                ans |= set(range(elem, next_elem + 1))
                i += 3
    return sorted(ans)
 def merge_w_arrays(arrays):
    ranges = []
    for w in arrays:
@ -822,10 +838,18 @@ def merge_font(fonts):
    cmaps = list(filter(None, (f['ToUnicode'] for f in t0_fonts)))
    if cmaps:
        t0_font['ToUnicode'] = as_bytes(merge_cmaps(cmaps))
-    for key in ('W', 'W2'):
+    base_font['sfnt'], width_for_glyph_id, height_for_glyph_id = merge_truetype_fonts_for_pdf(*(f['sfnt'] for f in descendant_fonts))
-        arrays = tuple(filter(None, (f[key] for f in descendant_fonts)))
+    widths = []
-        base_font[key] = merge_w_arrays(arrays)
+    arrays = tuple(filter(None, (f['W'] for f in descendant_fonts)))
-    base_font['sfnt'] = merge_truetype_fonts_for_pdf(*(f['sfnt'] for f in descendant_fonts))
+    if arrays:
        for gid in all_glyph_ids_in_w_arrays(arrays):
            widths.append(gid), widths.append(gid), widths.append(1000*width_for_glyph_id(gid))
        base_font['W'] = merge_w_arrays((widths,))
    arrays = tuple(filter(None, (f['W2'] for f in descendant_fonts)))
    if arrays:
        for gid in all_glyph_ids_in_w_arrays(arrays):
            widths.append(gid), widths.append(gid), widths.append(1000*height_for_glyph_id(gid))
        base_font['W2'] = merge_w_arrays((widths,))
    return t0_font, base_font, references_to_drop
--- a/src/calibre/utils/fonts/sfnt/container.py
+++ b/src/calibre/utils/fonts/sfnt/container.py
@ -1,29 +1,25 @@
 #!/usr/bin/env python2
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 # License: GPLv3 Copyright: 2012, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from struct import pack, calcsize
 from io import BytesIO
 from collections import OrderedDict
 from io import BytesIO
 from struct import calcsize, pack
-from calibre.utils.fonts.utils import (get_tables, checksum_of_block,
+from calibre.utils.fonts.sfnt import UnknownTable, align_block, max_power_of_two
        verify_checksums)
 from calibre.utils.fonts.sfnt import align_block, UnknownTable, max_power_of_two
 from calibre.utils.fonts.sfnt.errors import UnsupportedFont
 from calibre.utils.fonts.sfnt.head import (HeadTable, HorizontalHeader,
                                           OS2Table, PostTable)
 from calibre.utils.fonts.sfnt.maxp import MaxpTable
 from calibre.utils.fonts.sfnt.loca import LocaTable
 from calibre.utils.fonts.sfnt.glyf import GlyfTable
 from calibre.utils.fonts.sfnt.cmap import CmapTable
 from calibre.utils.fonts.sfnt.kern import KernTable
 from calibre.utils.fonts.sfnt.gsub import GSUBTable
 from calibre.utils.fonts.sfnt.cff.table import CFFTable
 from calibre.utils.fonts.sfnt.cmap import CmapTable
 from calibre.utils.fonts.sfnt.errors import UnsupportedFont
 from calibre.utils.fonts.sfnt.glyf import GlyfTable
 from calibre.utils.fonts.sfnt.gsub import GSUBTable
 from calibre.utils.fonts.sfnt.head import (
    HeadTable, HorizontalHeader, OS2Table, PostTable
 )
 from calibre.utils.fonts.sfnt.kern import KernTable
 from calibre.utils.fonts.sfnt.loca import LocaTable
 from calibre.utils.fonts.sfnt.maxp import MaxpTable
 from calibre.utils.fonts.utils import checksum_of_block, get_tables, verify_checksums
 # OpenType spec: http://www.microsoft.com/typography/otspec/otff.htm
@ -100,6 +96,9 @@ class Sfnt(object):
    def pop(self, key, default=None):
        return self.tables.pop(key, default)
    def get(self, key, default=None):
        return self.tables.get(key, default)
    def sizes(self):
        ans = OrderedDict()
        for tag in self:
--- a/src/calibre/utils/fonts/sfnt/head.py
+++ b/src/calibre/utils/fonts/sfnt/head.py
@ -10,6 +10,7 @@ from struct import unpack_from, pack, calcsize
 from calibre.utils.fonts.sfnt import UnknownTable, DateTimeProperty, FixedProperty
 from calibre.utils.fonts.sfnt.errors import UnsupportedFont
 from calibre.utils.fonts.sfnt.loca import read_array
 from polyglot.builtins import zip
@ -67,7 +68,7 @@ class HorizontalHeader(UnknownTable):
            'descender', 'h',
            'line_gap', 'h',
            'advance_width_max', 'H',
-            'min_left_size_bearing', 'h',
+            'min_left_side_bearing', 'h',
            'min_right_side_bearing', 'h',
            'x_max_extent', 'h',
            'caret_slope_rise', 'h',
@ -92,12 +93,55 @@ class HorizontalHeader(UnknownTable):
        if len(raw) < 4*num:
            raise UnsupportedFont('The hmtx table has insufficient data')
        long_hor_metric = raw[:4*num]
-        fmt = '>%dH'%(2*num)
+        a = read_array(long_hor_metric)
-        entries = unpack_from(fmt.encode('ascii'), long_hor_metric)
+        self.advance_widths = a[0::2]
-        self.advance_widths = entries[0::2]
+        a = read_array(long_hor_metric, 'h')
-        fmt = '>%dh'%(2*num)
+        self.left_side_bearings = a[1::2]
-        entries = unpack_from(fmt.encode('ascii'), long_hor_metric)
+
-        self.left_side_bearings = entries[1::2]
+
 class VericalHeader(UnknownTable):
    version_number = FixedProperty('_version_number')
    def read_data(self, vmtx):
        if hasattr(self, 'ascender'):
            return
        field_types = (
            '_version_number' , 'l',
            'ascender', 'h',
            'descender', 'h',
            'line_gap', 'h',
            'advance_height_max', 'H',
            'min_top_side_bearing', 'h',
            'min_bottom_side_bearing', 'h',
            'y_max_extent', 'h',
            'caret_slope_rise', 'h',
            'caret_slop_run', 'h',
            'caret_offset', 'h',
            'r1', 'h',
            'r2', 'h',
            'r3', 'h',
            'r4', 'h',
            'metric_data_format', 'h',
            'number_of_v_metrics', 'H',
        )
        self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
        self._fields = field_types[0::2]
        for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
            setattr(self, f, val)
        raw = vmtx.raw
        num = self.number_of_h_metrics
        if len(raw) < 4*num:
            raise UnsupportedFont('The vmtx table has insufficient data')
        long_hor_metric = raw[:4*num]
        long_hor_metric = raw[:4*num]
        a = read_array(long_hor_metric)
        self.advance_heights = a[0::2]
        a = read_array(long_hor_metric, 'h')
        self.top_side_bearings = a[1::2]
 class OS2Table(UnknownTable):
--- a/src/calibre/utils/fonts/sfnt/loca.py
+++ b/src/calibre/utils/fonts/sfnt/loca.py
@ -21,14 +21,18 @@ def four_byte_type_code():
            return c
 def read_array(data, fmt='H'):
    ans = array.array(fmt, data)
    if sys.byteorder != 'big':
        ans.byteswap()
    return ans
 class LocaTable(UnknownTable):
    def load_offsets(self, head_table, maxp_table):
        fmt = 'H' if head_table.index_to_loc_format == 0 else four_byte_type_code()
-        locs = array.array(fmt)
+        locs = read_array(self.raw, fmt)
        locs.fromstring(self.raw)
        if sys.byteorder != "big":
            locs.byteswap()
        self.offset_map = locs.tolist()
        if fmt == 'H':
            self.offset_map = [2*i for i in self.offset_map]
--- a/src/calibre/utils/fonts/sfnt/merge.py
+++ b/src/calibre/utils/fonts/sfnt/merge.py
@ -30,6 +30,26 @@ def merge_truetype_fonts_for_pdf(*fonts):
    head = ans[b'head']
    loca = ans[b'loca']
    maxp = ans[b'maxp']
    advance_widths = advance_heights = (0,)
    hhea = ans.get(b'hhea')
    if hhea is not None:
        hhea.read_data(ans[b'hmtx'])
        advance_widths = tuple(x/head.units_per_em for x in hhea.advance_widths)
    vhea = ans.get(b'vhea')
    if vhea is not None:
        vhea.read_data(ans[b'vmtx'])
        advance_heights = tuple(x/head.units_per_em for x in hhea.advance_heights)
    def width_for_glyph_id(gid):
        if gid >= len(advance_widths):
            gid = -1
        return advance_widths[gid]
    def height_for_glyph_id(gid):
        if gid >= len(advance_widths):
            gid = -1
        return advance_heights[gid]
    gmap = OrderedDict()
    for glyph_id in sorted(all_glyphs):
        gmap[glyph_id] = partial(all_glyphs.__getitem__, glyph_id)
@ -39,4 +59,4 @@ def merge_truetype_fonts_for_pdf(*fonts):
    head.update()
    maxp.num_glyphs = len(loca.offset_map) - 1
    maxp.update()
-    return ans
+    return ans, width_for_glyph_id, height_for_glyph_id