Fix cmap table generation not handling contiguous indices

Also fix tab indentation in code imported from fonttools.
2025-07-08 02:34:06 -04:00 · 2013-09-19 12:03:12 +05:30 · 2013-09-19 12:03:12 +05:30 · 85d09338d7
commit 85d09338d7
parent 46ed78b892
1 changed files with 105 additions and 109 deletions
--- a/src/calibre/utils/fonts/sfnt/cmap.py
+++ b/src/calibre/utils/fonts/sfnt/cmap.py
@ -17,85 +17,85 @@ from calibre.utils.fonts.utils import read_bmp_prefix
 from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two
 from calibre.utils.fonts.sfnt.errors import UnsupportedFont
-def split_range(start_code, end_code, cmap): # {{{
+def split_range(start_code, end_code, cmap):  # {{{
-	# Try to split a range of character codes into subranges with consecutive
+    # Try to split a range of character codes into subranges with consecutive
-	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
+    # glyph IDs in such a way that the cmap4 subtable can be stored "most"
-	# efficiently.
+    # efficiently.
-	if start_code == end_code:
+    if start_code == end_code:
-		return [], [end_code]
+        return [], [end_code]
-	last_id = cmap[start_code]
+    last_id = cmap[start_code]
-	last_code = start_code
+    last_code = start_code
-	in_order = None
+    in_order = None
-	ordered_begin = None
+    ordered_begin = None
-	sub_ranges = []
+    sub_ranges = []
-	# Gather subranges in which the glyph IDs are consecutive.
+    # Gather subranges in which the glyph IDs are consecutive.
-	for code in range(start_code + 1, end_code + 1):
+    for code in range(start_code + 1, end_code + 1):
-		glyph_id = cmap[code]
+        glyph_id = cmap[code]
-		if glyph_id - 1 == last_id:
+        if glyph_id - 1 == last_id:
-			if in_order is None or not in_order:
+            if in_order is None or not in_order:
-				in_order = 1
+                in_order = 1
-				ordered_begin = last_code
+                ordered_begin = last_code
-		else:
+        else:
-			if in_order:
+            if in_order:
-				in_order = 0
+                in_order = 0
-				sub_ranges.append((ordered_begin, last_code))
+                sub_ranges.append((ordered_begin, last_code))
-				ordered_begin = None
+                ordered_begin = None
-		last_id = glyph_id
+        last_id = glyph_id
-		last_code = code
+        last_code = code
-	if in_order:
+    if in_order:
-		sub_ranges.append((ordered_begin, last_code))
+        sub_ranges.append((ordered_begin, last_code))
-	assert last_code == end_code
+    assert last_code == end_code
-	# Now filter out those new subranges that would only make the data bigger.
+    # Now filter out those new subranges that would only make the data bigger.
-	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
+    # A new segment cost 8 bytes, not using a new segment costs 2 bytes per
-	# character.
+    # character.
-	new_ranges = []
+    new_ranges = []
-	for b, e in sub_ranges:
+    for b, e in sub_ranges:
-		if b == start_code and e == end_code:
+        if b == start_code and e == end_code:
-			break  # the whole range, we're fine
+            break  # the whole range, we're fine
-		if b == start_code or e == end_code:
+        if b == start_code or e == end_code:
-			threshold = 4  # split costs one more segment
+            threshold = 4  # split costs one more segment
-		else:
+        else:
-			threshold = 8  # split costs two more segments
+            threshold = 8  # split costs two more segments
-		if (e - b + 1) > threshold:
+        if (e - b + 1) > threshold:
-			new_ranges.append((b, e))
+            new_ranges.append((b, e))
-	sub_ranges = new_ranges
+    sub_ranges = new_ranges
-	if not sub_ranges:
+    if not sub_ranges:
-		return [], [end_code]
+        return [], [end_code]
-	if sub_ranges[0][0] != start_code:
+    if sub_ranges[0][0] != start_code:
-		sub_ranges.insert(0, (start_code, sub_ranges[0][0] - 1))
+        sub_ranges.insert(0, (start_code, sub_ranges[0][0] - 1))
-	if sub_ranges[-1][1] != end_code:
+    if sub_ranges[-1][1] != end_code:
-		sub_ranges.append((sub_ranges[-1][1] + 1, end_code))
+        sub_ranges.append((sub_ranges[-1][1] + 1, end_code))
-	# Fill the "holes" in the segments list -- those are the segments in which
+    # Fill the "holes" in the segments list -- those are the segments in which
-	# the glyph IDs are _not_ consecutive.
+    # the glyph IDs are _not_ consecutive.
-	i = 1
+    i = 1
-	while i < len(sub_ranges):
+    while i < len(sub_ranges):
-		if sub_ranges[i-1][1] + 1 != sub_ranges[i][0]:
+        if sub_ranges[i-1][1] + 1 != sub_ranges[i][0]:
-			sub_ranges.insert(i, (sub_ranges[i-1][1] + 1, sub_ranges[i][0] - 1))
+            sub_ranges.insert(i, (sub_ranges[i-1][1] + 1, sub_ranges[i][0] - 1))
-			i = i + 1
+            i = i + 1
-		i = i + 1
+        i = i + 1
-	# Transform the ranges into start_code/end_code lists.
+    # Transform the ranges into start_code/end_code lists.
-	start = []
+    start = []
-	end = []
+    end = []
-	for b, e in sub_ranges:
+    for b, e in sub_ranges:
-		start.append(b)
+        start.append(b)
-		end.append(e)
+        end.append(e)
-	start.pop(0)
+    start.pop(0)
-	assert len(start) + 1 == len(end)
+    assert len(start) + 1 == len(end)
-	return start, end
+    return start, end
 # }}}
-def set_id_delta(id_delta): # {{{
+def set_id_delta(id_delta):  # {{{
    # The lowest gid in glyphIndexArray, after subtracting id_delta, must be 1.
    # id_delta is a short, and must be between -32K and 32K
    # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
@ -237,57 +237,53 @@ class CmapTable(UnknownTable):
            start_code = [last_code]
            for code in codes[1:]:
-				if code == last_code + 1:
+                if code == last_code + 1:
-					last_code = code
+                    last_code = code
-					continue
+                    continue
-				start, end = split_range(start_code[-1], last_code, cmap)
+                start, end = split_range(start_code[-1], last_code, cmap)
-				start_code.extend(start)
+                start_code.extend(start)
-				end_code.extend(end)
+                end_code.extend(end)
-				start_code.append(code)
+                start_code.append(code)
-				last_code = code
+                last_code = code
-			end_code.append(last_code)
+            end_code.append(last_code)
-			start_code.append(0xffff)
+            start_code.append(0xffff)
-			end_code.append(0xffff)
+            end_code.append(0xffff)
-		id_delta = []
+        id_delta = []
-		id_range_offset = []
+        id_range_offset = []
-		glyph_index_array = []
+        glyph_index_array = []
-		for i in xrange(len(end_code)-1):  # skip the closing codes (0xffff)
+        for i in xrange(len(end_code)-1):  # skip the closing codes (0xffff)
-			indices = []
+            indices = list(cmap[char_code] for char_code in xrange(start_code[i], end_code[i] + 1))
-			for char_code in xrange(start_code[i], end_code[i] + 1):
+            if indices == list(xrange(indices[0], indices[0] + len(indices))):
-				indices.append(cmap[char_code])
+                # indices is a contiguous list
-			if  (indices == xrange(indices[0], indices[0] + len(indices))):
+                id_delta_temp = set_id_delta(indices[0] - start_code[i])
-				id_delta_temp = set_id_delta(indices[0] - start_code[i])
+                id_delta.append(id_delta_temp)
-				id_delta.append(id_delta_temp)
+                id_range_offset.append(0)
-				id_range_offset.append(0)
+            else:
-			else:
+                id_delta.append(0)
-				id_delta.append(0)
+                id_range_offset.append(2 * (len(end_code) + len(glyph_index_array) - i))
-				id_range_offset.append(2 * (len(end_code) +
+                glyph_index_array.extend(indices)
-                    len(glyph_index_array) - i))
+        id_delta.append(1)  # 0xffff + 1 == 0. So this end code maps to .notdef
-				glyph_index_array.extend(indices)
+        id_range_offset.append(0)
 		id_delta.append(1)  # 0xffff + 1 == 0. So this end code maps to .notdef
 		id_range_offset.append(0)
-		seg_count = len(end_code)
+        seg_count = len(end_code)
-		max_exponent = max_power_of_two(seg_count)
+        max_exponent = max_power_of_two(seg_count)
-		search_range = 2 * (2 ** max_exponent)
+        search_range = 2 * (2 ** max_exponent)
-		entry_selector = max_exponent
+        entry_selector = max_exponent
-		range_shift = 2 * seg_count - search_range
+        range_shift = 2 * seg_count - search_range
        char_code_array = end_code + [0] + start_code
-		char_code_array = pack(b'>%dH'%len(char_code_array), *char_code_array)
+        char_code_array = pack(b'>%dH'%len(char_code_array), *char_code_array)
-		id_delta_array = pack(b'>%dh'%len(id_delta), *id_delta)
+        id_delta_array = pack(b'>%dh'%len(id_delta), *id_delta)
        rest_array = id_range_offset + glyph_index_array
        rest_array = pack(b'>%dH'%len(rest_array), *rest_array)
-		data = char_code_array + id_delta_array + rest_array
+        data = char_code_array + id_delta_array + rest_array
-		length = calcsize(fmt) + len(data)
+        length = calcsize(fmt) + len(data)
-		header = pack(fmt, 4, length, 0,
+        header = pack(fmt, 4, length, 0, 2*seg_count, search_range, entry_selector, range_shift)
-				2*seg_count, search_range, entry_selector, range_shift)
+        self.bmp_table = header + data
 		self.bmp_table = header + data
        fmt = b'>4HL'
        offset = calcsize(fmt)
-        self.raw = pack(fmt, self.version, self.num_tables, 3, 1, offset) + \
+        self.raw = pack(fmt, self.version, self.num_tables, 3, 1, offset) + self.bmp_table
                self.bmp_table