Fix cmap table generation not handling contiguous indices

Also fix tab indentation in code imported from fonttools.
This commit is contained in:
Kovid Goyal 2013-09-19 12:03:12 +05:30
parent 46ed78b892
commit 85d09338d7

View File

@ -17,85 +17,85 @@ from calibre.utils.fonts.utils import read_bmp_prefix
from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two
from calibre.utils.fonts.sfnt.errors import UnsupportedFont from calibre.utils.fonts.sfnt.errors import UnsupportedFont
def split_range(start_code, end_code, cmap): # {{{ def split_range(start_code, end_code, cmap): # {{{
# Try to split a range of character codes into subranges with consecutive # Try to split a range of character codes into subranges with consecutive
# glyph IDs in such a way that the cmap4 subtable can be stored "most" # glyph IDs in such a way that the cmap4 subtable can be stored "most"
# efficiently. # efficiently.
if start_code == end_code: if start_code == end_code:
return [], [end_code] return [], [end_code]
last_id = cmap[start_code] last_id = cmap[start_code]
last_code = start_code last_code = start_code
in_order = None in_order = None
ordered_begin = None ordered_begin = None
sub_ranges = [] sub_ranges = []
# Gather subranges in which the glyph IDs are consecutive. # Gather subranges in which the glyph IDs are consecutive.
for code in range(start_code + 1, end_code + 1): for code in range(start_code + 1, end_code + 1):
glyph_id = cmap[code] glyph_id = cmap[code]
if glyph_id - 1 == last_id: if glyph_id - 1 == last_id:
if in_order is None or not in_order: if in_order is None or not in_order:
in_order = 1 in_order = 1
ordered_begin = last_code ordered_begin = last_code
else: else:
if in_order: if in_order:
in_order = 0 in_order = 0
sub_ranges.append((ordered_begin, last_code)) sub_ranges.append((ordered_begin, last_code))
ordered_begin = None ordered_begin = None
last_id = glyph_id last_id = glyph_id
last_code = code last_code = code
if in_order: if in_order:
sub_ranges.append((ordered_begin, last_code)) sub_ranges.append((ordered_begin, last_code))
assert last_code == end_code assert last_code == end_code
# Now filter out those new subranges that would only make the data bigger. # Now filter out those new subranges that would only make the data bigger.
# A new segment cost 8 bytes, not using a new segment costs 2 bytes per # A new segment cost 8 bytes, not using a new segment costs 2 bytes per
# character. # character.
new_ranges = [] new_ranges = []
for b, e in sub_ranges: for b, e in sub_ranges:
if b == start_code and e == end_code: if b == start_code and e == end_code:
break # the whole range, we're fine break # the whole range, we're fine
if b == start_code or e == end_code: if b == start_code or e == end_code:
threshold = 4 # split costs one more segment threshold = 4 # split costs one more segment
else: else:
threshold = 8 # split costs two more segments threshold = 8 # split costs two more segments
if (e - b + 1) > threshold: if (e - b + 1) > threshold:
new_ranges.append((b, e)) new_ranges.append((b, e))
sub_ranges = new_ranges sub_ranges = new_ranges
if not sub_ranges: if not sub_ranges:
return [], [end_code] return [], [end_code]
if sub_ranges[0][0] != start_code: if sub_ranges[0][0] != start_code:
sub_ranges.insert(0, (start_code, sub_ranges[0][0] - 1)) sub_ranges.insert(0, (start_code, sub_ranges[0][0] - 1))
if sub_ranges[-1][1] != end_code: if sub_ranges[-1][1] != end_code:
sub_ranges.append((sub_ranges[-1][1] + 1, end_code)) sub_ranges.append((sub_ranges[-1][1] + 1, end_code))
# Fill the "holes" in the segments list -- those are the segments in which # Fill the "holes" in the segments list -- those are the segments in which
# the glyph IDs are _not_ consecutive. # the glyph IDs are _not_ consecutive.
i = 1 i = 1
while i < len(sub_ranges): while i < len(sub_ranges):
if sub_ranges[i-1][1] + 1 != sub_ranges[i][0]: if sub_ranges[i-1][1] + 1 != sub_ranges[i][0]:
sub_ranges.insert(i, (sub_ranges[i-1][1] + 1, sub_ranges[i][0] - 1)) sub_ranges.insert(i, (sub_ranges[i-1][1] + 1, sub_ranges[i][0] - 1))
i = i + 1 i = i + 1
i = i + 1 i = i + 1
# Transform the ranges into start_code/end_code lists. # Transform the ranges into start_code/end_code lists.
start = [] start = []
end = [] end = []
for b, e in sub_ranges: for b, e in sub_ranges:
start.append(b) start.append(b)
end.append(e) end.append(e)
start.pop(0) start.pop(0)
assert len(start) + 1 == len(end) assert len(start) + 1 == len(end)
return start, end return start, end
# }}} # }}}
def set_id_delta(id_delta): # {{{ def set_id_delta(id_delta): # {{{
# The lowest gid in glyphIndexArray, after subtracting id_delta, must be 1. # The lowest gid in glyphIndexArray, after subtracting id_delta, must be 1.
# id_delta is a short, and must be between -32K and 32K # id_delta is a short, and must be between -32K and 32K
# startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1 # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
@ -237,57 +237,53 @@ class CmapTable(UnknownTable):
start_code = [last_code] start_code = [last_code]
for code in codes[1:]: for code in codes[1:]:
if code == last_code + 1: if code == last_code + 1:
last_code = code last_code = code
continue continue
start, end = split_range(start_code[-1], last_code, cmap) start, end = split_range(start_code[-1], last_code, cmap)
start_code.extend(start) start_code.extend(start)
end_code.extend(end) end_code.extend(end)
start_code.append(code) start_code.append(code)
last_code = code last_code = code
end_code.append(last_code) end_code.append(last_code)
start_code.append(0xffff) start_code.append(0xffff)
end_code.append(0xffff) end_code.append(0xffff)
id_delta = [] id_delta = []
id_range_offset = [] id_range_offset = []
glyph_index_array = [] glyph_index_array = []
for i in xrange(len(end_code)-1): # skip the closing codes (0xffff) for i in xrange(len(end_code)-1): # skip the closing codes (0xffff)
indices = [] indices = list(cmap[char_code] for char_code in xrange(start_code[i], end_code[i] + 1))
for char_code in xrange(start_code[i], end_code[i] + 1): if indices == list(xrange(indices[0], indices[0] + len(indices))):
indices.append(cmap[char_code]) # indices is a contiguous list
if (indices == xrange(indices[0], indices[0] + len(indices))): id_delta_temp = set_id_delta(indices[0] - start_code[i])
id_delta_temp = set_id_delta(indices[0] - start_code[i]) id_delta.append(id_delta_temp)
id_delta.append(id_delta_temp) id_range_offset.append(0)
id_range_offset.append(0) else:
else: id_delta.append(0)
id_delta.append(0) id_range_offset.append(2 * (len(end_code) + len(glyph_index_array) - i))
id_range_offset.append(2 * (len(end_code) + glyph_index_array.extend(indices)
len(glyph_index_array) - i)) id_delta.append(1) # 0xffff + 1 == 0. So this end code maps to .notdef
glyph_index_array.extend(indices) id_range_offset.append(0)
id_delta.append(1) # 0xffff + 1 == 0. So this end code maps to .notdef
id_range_offset.append(0)
seg_count = len(end_code) seg_count = len(end_code)
max_exponent = max_power_of_two(seg_count) max_exponent = max_power_of_two(seg_count)
search_range = 2 * (2 ** max_exponent) search_range = 2 * (2 ** max_exponent)
entry_selector = max_exponent entry_selector = max_exponent
range_shift = 2 * seg_count - search_range range_shift = 2 * seg_count - search_range
char_code_array = end_code + [0] + start_code char_code_array = end_code + [0] + start_code
char_code_array = pack(b'>%dH'%len(char_code_array), *char_code_array) char_code_array = pack(b'>%dH'%len(char_code_array), *char_code_array)
id_delta_array = pack(b'>%dh'%len(id_delta), *id_delta) id_delta_array = pack(b'>%dh'%len(id_delta), *id_delta)
rest_array = id_range_offset + glyph_index_array rest_array = id_range_offset + glyph_index_array
rest_array = pack(b'>%dH'%len(rest_array), *rest_array) rest_array = pack(b'>%dH'%len(rest_array), *rest_array)
data = char_code_array + id_delta_array + rest_array data = char_code_array + id_delta_array + rest_array
length = calcsize(fmt) + len(data) length = calcsize(fmt) + len(data)
header = pack(fmt, 4, length, 0, header = pack(fmt, 4, length, 0, 2*seg_count, search_range, entry_selector, range_shift)
2*seg_count, search_range, entry_selector, range_shift) self.bmp_table = header + data
self.bmp_table = header + data
fmt = b'>4HL' fmt = b'>4HL'
offset = calcsize(fmt) offset = calcsize(fmt)
self.raw = pack(fmt, self.version, self.num_tables, 3, 1, offset) + \ self.raw = pack(fmt, self.version, self.num_tables, 3, 1, offset) + self.bmp_table
self.bmp_table