mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add a ToUnicode map when embedding fonts
This commit is contained in:
parent
992bf4b423
commit
5504db04a9
@ -493,7 +493,7 @@ if __name__ == '__main__':
|
|||||||
# p.scale(2, 2)
|
# p.scale(2, 2)
|
||||||
# p.rotate(45)
|
# p.rotate(45)
|
||||||
# p.setPen(QColor(0, 0, 255))
|
# p.setPen(QColor(0, 0, 255))
|
||||||
p.drawText(QPoint(100, 300), 'Some text ū --- Д AV')
|
p.drawText(QPoint(100, 300), 'Some text ū --- Д AV ff ff')
|
||||||
finally:
|
finally:
|
||||||
p.end()
|
p.end()
|
||||||
if dev.engine.errors:
|
if dev.engine.errors:
|
||||||
|
@ -7,10 +7,10 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
import re, unicodedata
|
||||||
from itertools import izip, groupby
|
from itertools import izip, groupby
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from collections import Counter
|
from collections import Counter, OrderedDict
|
||||||
from future_builtins import map
|
from future_builtins import map
|
||||||
|
|
||||||
from calibre.ebooks.pdf.render.common import (Array, String, Stream,
|
from calibre.ebooks.pdf.render.common import (Array, String, Stream,
|
||||||
@ -43,10 +43,12 @@ first. Each number gets mapped to a glyph id equal to itself by the
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import textwrap
|
||||||
|
|
||||||
class FontStream(Stream):
|
class FontStream(Stream):
|
||||||
|
|
||||||
def __init__(self, is_otf):
|
def __init__(self, is_otf, compress=False):
|
||||||
Stream.__init__(self)
|
Stream.__init__(self, compress=compress)
|
||||||
self.is_otf = is_otf
|
self.is_otf = is_otf
|
||||||
|
|
||||||
def add_extra_keys(self, d):
|
def add_extra_keys(self, d):
|
||||||
@ -54,13 +56,62 @@ class FontStream(Stream):
|
|||||||
if self.is_otf:
|
if self.is_otf:
|
||||||
d['Subtype'] = Name('OpenType')
|
d['Subtype'] = Name('OpenType')
|
||||||
|
|
||||||
|
def to_hex_string(c):
|
||||||
|
return bytes(hex(c)[2:]).rjust(4, b'0').decode('ascii')
|
||||||
|
|
||||||
|
class CMap(Stream):
|
||||||
|
|
||||||
|
skeleton = textwrap.dedent('''\
|
||||||
|
/CIDInit /ProcSet findresource begin
|
||||||
|
12 dict begin
|
||||||
|
begincmap
|
||||||
|
/CMapName {name}-cmap def
|
||||||
|
/CMapType 2 def
|
||||||
|
/CIDSystemInfo <<
|
||||||
|
/Registry (Adobe)
|
||||||
|
/Ordering (UCS)
|
||||||
|
/Supplement 0
|
||||||
|
>> def
|
||||||
|
1 begincodespacerange
|
||||||
|
<0000> <FFFF>
|
||||||
|
endcodespacerange
|
||||||
|
{mapping}
|
||||||
|
endcmap
|
||||||
|
CMapName currentdict /CMap defineresource pop
|
||||||
|
end
|
||||||
|
end
|
||||||
|
''')
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, name, glyph_map, compress=False):
|
||||||
|
Stream.__init__(self, compress)
|
||||||
|
current_map = OrderedDict()
|
||||||
|
maps = []
|
||||||
|
for glyph_id in sorted(glyph_map):
|
||||||
|
if len(current_map) > 99:
|
||||||
|
maps.append(current_map)
|
||||||
|
current_map = OrderedDict()
|
||||||
|
val = []
|
||||||
|
for c in glyph_map[glyph_id]:
|
||||||
|
c = ord(c)
|
||||||
|
val.append(to_hex_string(c))
|
||||||
|
glyph_id = '<%s>'%to_hex_string(glyph_id)
|
||||||
|
current_map[glyph_id] = '<%s>'%''.join(val)
|
||||||
|
if current_map:
|
||||||
|
maps.append(current_map)
|
||||||
|
mapping = []
|
||||||
|
for m in maps:
|
||||||
|
meat = '\n'.join('%s %s'%(k, v) for k, v in m.iteritems())
|
||||||
|
mapping.append('%d beginbfchar\n%s\nendbfchar'%(len(m), meat))
|
||||||
|
self.write(self.skeleton.format(name=name, mapping='\n'.join(mapping)))
|
||||||
|
|
||||||
class Font(object):
|
class Font(object):
|
||||||
|
|
||||||
def __init__(self, metrics, num, objects):
|
def __init__(self, metrics, num, objects, compress):
|
||||||
self.metrics = metrics
|
self.metrics, self.compress = metrics, compress
|
||||||
self.subset_tag = bytes(re.sub('.', lambda m: chr(int(m.group())+ord('A')),
|
self.subset_tag = bytes(re.sub('.', lambda m: chr(int(m.group())+ord('A')),
|
||||||
oct(num))).rjust(6, b'A').decode('ascii')
|
oct(num))).rjust(6, b'A').decode('ascii')
|
||||||
self.font_stream = FontStream(metrics.is_otf)
|
self.font_stream = FontStream(metrics.is_otf, compress=compress)
|
||||||
self.font_descriptor = Dictionary({
|
self.font_descriptor = Dictionary({
|
||||||
'Type': Name('FontDescriptor'),
|
'Type': Name('FontDescriptor'),
|
||||||
'FontName': Name(metrics.postscript_name),
|
'FontName': Name(metrics.postscript_name),
|
||||||
@ -101,9 +152,17 @@ class Font(object):
|
|||||||
# TODO: Subsetting and OpenType
|
# TODO: Subsetting and OpenType
|
||||||
self.font_descriptor['FontFile2'] = objects.add(self.font_stream)
|
self.font_descriptor['FontFile2'] = objects.add(self.font_stream)
|
||||||
self.write_widths(objects)
|
self.write_widths(objects)
|
||||||
|
self.write_to_unicode(objects)
|
||||||
self.metrics.os2.zero_fstype()
|
self.metrics.os2.zero_fstype()
|
||||||
self.metrics.sfnt(self.font_stream)
|
self.metrics.sfnt(self.font_stream)
|
||||||
|
|
||||||
|
def write_to_unicode(self, objects):
|
||||||
|
glyph_map = self.metrics.sfnt['cmap'].get_char_codes(self.used_glyphs)
|
||||||
|
glyph_map = {k:unicodedata.normalize('NFKC', unichr(v)) for k, v in
|
||||||
|
glyph_map.iteritems()}
|
||||||
|
cmap = CMap(self.metrics.postscript_name, glyph_map, compress=self.compress)
|
||||||
|
self.font_dict['ToUnicode'] = objects.add(cmap)
|
||||||
|
|
||||||
def write_widths(self, objects):
|
def write_widths(self, objects):
|
||||||
glyphs = sorted(self.used_glyphs|{0})
|
glyphs = sorted(self.used_glyphs|{0})
|
||||||
widths = {g:self.metrics.pdf_scale(w) for g, w in izip(glyphs,
|
widths = {g:self.metrics.pdf_scale(w) for g, w in izip(glyphs,
|
||||||
@ -129,8 +188,9 @@ class Font(object):
|
|||||||
|
|
||||||
class FontManager(object):
|
class FontManager(object):
|
||||||
|
|
||||||
def __init__(self, objects):
|
def __init__(self, objects, compress):
|
||||||
self.objects = objects
|
self.objects = objects
|
||||||
|
self.compress = compress
|
||||||
self.std_map = {}
|
self.std_map = {}
|
||||||
self.font_map = {}
|
self.font_map = {}
|
||||||
self.fonts = []
|
self.fonts = []
|
||||||
@ -138,7 +198,7 @@ class FontManager(object):
|
|||||||
def add_font(self, font_metrics, glyph_ids):
|
def add_font(self, font_metrics, glyph_ids):
|
||||||
if font_metrics not in self.font_map:
|
if font_metrics not in self.font_map:
|
||||||
self.fonts.append(Font(font_metrics, len(self.fonts),
|
self.fonts.append(Font(font_metrics, len(self.fonts),
|
||||||
self.objects))
|
self.objects, self.compress))
|
||||||
d = self.objects.add(self.fonts[-1].font_dict)
|
d = self.objects.add(self.fonts[-1].font_dict)
|
||||||
self.font_map[font_metrics] = (d, self.fonts[-1])
|
self.font_map[font_metrics] = (d, self.fonts[-1])
|
||||||
|
|
||||||
|
@ -252,7 +252,7 @@ class PDFStream(object):
|
|||||||
self.info = Dictionary({'Creator':String(creator),
|
self.info = Dictionary({'Creator':String(creator),
|
||||||
'Producer':String(creator)})
|
'Producer':String(creator)})
|
||||||
self.stroke_opacities, self.fill_opacities = {}, {}
|
self.stroke_opacities, self.fill_opacities = {}, {}
|
||||||
self.font_manager = FontManager(self.objects)
|
self.font_manager = FontManager(self.objects, self.compress)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def page_tree(self):
|
def page_tree(self):
|
||||||
|
@ -13,7 +13,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
from struct import unpack_from, calcsize, pack
|
from struct import unpack_from, calcsize, pack
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
from calibre.utils.fonts.utils import get_bmp_glyph_ids
|
from calibre.utils.fonts.utils import get_bmp_glyph_ids, read_bmp_prefix
|
||||||
from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two
|
from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two
|
||||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||||
|
|
||||||
@ -165,6 +165,33 @@ class CmapTable(UnknownTable):
|
|||||||
ans[chars[i]] = glyph_id
|
ans[chars[i]] = glyph_id
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def get_char_codes(self, glyph_ids):
|
||||||
|
if self.bmp_table is None:
|
||||||
|
raise UnsupportedFont('This font has no Windows BMP cmap subtable.'
|
||||||
|
' Most likely a special purpose font.')
|
||||||
|
ans = {}
|
||||||
|
(start_count, end_count, range_offset, id_delta, glyph_id_len,
|
||||||
|
glyph_id_map, array_len) = read_bmp_prefix(self.bmp_table, 0)
|
||||||
|
|
||||||
|
glyph_ids = frozenset(glyph_ids)
|
||||||
|
|
||||||
|
for i, ec in enumerate(end_count):
|
||||||
|
sc = start_count[i]
|
||||||
|
ro = range_offset[i]
|
||||||
|
for code in xrange(sc, ec+1):
|
||||||
|
if ro == 0:
|
||||||
|
glyph_id = id_delta[i] + code
|
||||||
|
else:
|
||||||
|
idx = ro//2 + (code - sc) + i - array_len
|
||||||
|
glyph_id = glyph_id_map[idx]
|
||||||
|
if glyph_id != 0:
|
||||||
|
glyph_id += id_delta[i]
|
||||||
|
glyph_id %= 0x1000
|
||||||
|
if glyph_id in glyph_ids:
|
||||||
|
ans[glyph_id] = code
|
||||||
|
|
||||||
|
return ans
|
||||||
|
|
||||||
def set_character_map(self, cmap):
|
def set_character_map(self, cmap):
|
||||||
self.version, self.num_tables = 0, 1
|
self.version, self.num_tables = 0, 1
|
||||||
fmt = b'>7H'
|
fmt = b'>7H'
|
||||||
|
@ -306,7 +306,7 @@ def remove_embed_restriction(raw):
|
|||||||
verify_checksums(raw)
|
verify_checksums(raw)
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
def get_bmp_glyph_ids(table, bmp, codes):
|
def read_bmp_prefix(table, bmp):
|
||||||
length, language, segcount = struct.unpack_from(b'>3H', table, bmp+2)
|
length, language, segcount = struct.unpack_from(b'>3H', table, bmp+2)
|
||||||
array_len = segcount //2
|
array_len = segcount //2
|
||||||
offset = bmp + 7*2
|
offset = bmp + 7*2
|
||||||
@ -324,6 +324,12 @@ def get_bmp_glyph_ids(table, bmp, codes):
|
|||||||
glyph_id_len = (length + bmp - (offset + array_sz))//2
|
glyph_id_len = (length + bmp - (offset + array_sz))//2
|
||||||
glyph_id_map = struct.unpack_from(b'>%dH'%glyph_id_len, table, offset +
|
glyph_id_map = struct.unpack_from(b'>%dH'%glyph_id_len, table, offset +
|
||||||
array_sz)
|
array_sz)
|
||||||
|
return (start_count, end_count, range_offset, id_delta, glyph_id_len,
|
||||||
|
glyph_id_map, array_len)
|
||||||
|
|
||||||
|
def get_bmp_glyph_ids(table, bmp, codes):
|
||||||
|
(start_count, end_count, range_offset, id_delta, glyph_id_len,
|
||||||
|
glyph_id_map, array_len) = read_bmp_prefix(table, bmp)
|
||||||
|
|
||||||
for code in codes:
|
for code in codes:
|
||||||
found = False
|
found = False
|
||||||
|
Loading…
x
Reference in New Issue
Block a user