diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index 4dea75241a..f429d876ac 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -493,7 +493,7 @@ if __name__ == '__main__': # p.scale(2, 2) # p.rotate(45) # p.setPen(QColor(0, 0, 255)) - p.drawText(QPoint(100, 300), 'Some text ū --- Д AV') + p.drawText(QPoint(100, 300), 'Some text ū --- Д AV ff ff') finally: p.end() if dev.engine.errors: diff --git a/src/calibre/ebooks/pdf/render/fonts.py b/src/calibre/ebooks/pdf/render/fonts.py index dee7748e82..3de1f9c86e 100644 --- a/src/calibre/ebooks/pdf/render/fonts.py +++ b/src/calibre/ebooks/pdf/render/fonts.py @@ -7,10 +7,10 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re +import re, unicodedata from itertools import izip, groupby from operator import itemgetter -from collections import Counter +from collections import Counter, OrderedDict from future_builtins import map from calibre.ebooks.pdf.render.common import (Array, String, Stream, @@ -43,10 +43,12 @@ first. Each number gets mapped to a glyph id equal to itself by the ''' +import textwrap + class FontStream(Stream): - def __init__(self, is_otf): - Stream.__init__(self) + def __init__(self, is_otf, compress=False): + Stream.__init__(self, compress=compress) self.is_otf = is_otf def add_extra_keys(self, d): @@ -54,13 +56,62 @@ class FontStream(Stream): if self.is_otf: d['Subtype'] = Name('OpenType') +def to_hex_string(c): + return bytes(hex(c)[2:]).rjust(4, b'0').decode('ascii') + +class CMap(Stream): + + skeleton = textwrap.dedent('''\ + /CIDInit /ProcSet findresource begin + 12 dict begin + begincmap + /CMapName {name}-cmap def + /CMapType 2 def + /CIDSystemInfo << + /Registry (Adobe) + /Ordering (UCS) + /Supplement 0 + >> def + 1 begincodespacerange + <0000> + endcodespacerange + {mapping} + endcmap + CMapName currentdict /CMap defineresource pop + end + end + ''') + + + def __init__(self, name, glyph_map, compress=False): + Stream.__init__(self, compress) + current_map = OrderedDict() + maps = [] + for glyph_id in sorted(glyph_map): + if len(current_map) > 99: + maps.append(current_map) + current_map = OrderedDict() + val = [] + for c in glyph_map[glyph_id]: + c = ord(c) + val.append(to_hex_string(c)) + glyph_id = '<%s>'%to_hex_string(glyph_id) + current_map[glyph_id] = '<%s>'%''.join(val) + if current_map: + maps.append(current_map) + mapping = [] + for m in maps: + meat = '\n'.join('%s %s'%(k, v) for k, v in m.iteritems()) + mapping.append('%d beginbfchar\n%s\nendbfchar'%(len(m), meat)) + self.write(self.skeleton.format(name=name, mapping='\n'.join(mapping))) + class Font(object): - def __init__(self, metrics, num, objects): - self.metrics = metrics + def __init__(self, metrics, num, objects, compress): + self.metrics, self.compress = metrics, compress self.subset_tag = bytes(re.sub('.', lambda m: chr(int(m.group())+ord('A')), oct(num))).rjust(6, b'A').decode('ascii') - self.font_stream = FontStream(metrics.is_otf) + self.font_stream = FontStream(metrics.is_otf, compress=compress) self.font_descriptor = Dictionary({ 'Type': Name('FontDescriptor'), 'FontName': Name(metrics.postscript_name), @@ -101,9 +152,17 @@ class Font(object): # TODO: Subsetting and OpenType self.font_descriptor['FontFile2'] = objects.add(self.font_stream) self.write_widths(objects) + self.write_to_unicode(objects) self.metrics.os2.zero_fstype() self.metrics.sfnt(self.font_stream) + def write_to_unicode(self, objects): + glyph_map = self.metrics.sfnt['cmap'].get_char_codes(self.used_glyphs) + glyph_map = {k:unicodedata.normalize('NFKC', unichr(v)) for k, v in + glyph_map.iteritems()} + cmap = CMap(self.metrics.postscript_name, glyph_map, compress=self.compress) + self.font_dict['ToUnicode'] = objects.add(cmap) + def write_widths(self, objects): glyphs = sorted(self.used_glyphs|{0}) widths = {g:self.metrics.pdf_scale(w) for g, w in izip(glyphs, @@ -129,8 +188,9 @@ class Font(object): class FontManager(object): - def __init__(self, objects): + def __init__(self, objects, compress): self.objects = objects + self.compress = compress self.std_map = {} self.font_map = {} self.fonts = [] @@ -138,7 +198,7 @@ class FontManager(object): def add_font(self, font_metrics, glyph_ids): if font_metrics not in self.font_map: self.fonts.append(Font(font_metrics, len(self.fonts), - self.objects)) + self.objects, self.compress)) d = self.objects.add(self.fonts[-1].font_dict) self.font_map[font_metrics] = (d, self.fonts[-1]) diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index 92521fd0db..d7e31adc4c 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -252,7 +252,7 @@ class PDFStream(object): self.info = Dictionary({'Creator':String(creator), 'Producer':String(creator)}) self.stroke_opacities, self.fill_opacities = {}, {} - self.font_manager = FontManager(self.objects) + self.font_manager = FontManager(self.objects, self.compress) @property def page_tree(self): diff --git a/src/calibre/utils/fonts/sfnt/cmap.py b/src/calibre/utils/fonts/sfnt/cmap.py index a00eb56d6f..13835f173d 100644 --- a/src/calibre/utils/fonts/sfnt/cmap.py +++ b/src/calibre/utils/fonts/sfnt/cmap.py @@ -13,7 +13,7 @@ __docformat__ = 'restructuredtext en' from struct import unpack_from, calcsize, pack from collections import OrderedDict -from calibre.utils.fonts.utils import get_bmp_glyph_ids +from calibre.utils.fonts.utils import get_bmp_glyph_ids, read_bmp_prefix from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two from calibre.utils.fonts.sfnt.errors import UnsupportedFont @@ -165,6 +165,33 @@ class CmapTable(UnknownTable): ans[chars[i]] = glyph_id return ans + def get_char_codes(self, glyph_ids): + if self.bmp_table is None: + raise UnsupportedFont('This font has no Windows BMP cmap subtable.' + ' Most likely a special purpose font.') + ans = {} + (start_count, end_count, range_offset, id_delta, glyph_id_len, + glyph_id_map, array_len) = read_bmp_prefix(self.bmp_table, 0) + + glyph_ids = frozenset(glyph_ids) + + for i, ec in enumerate(end_count): + sc = start_count[i] + ro = range_offset[i] + for code in xrange(sc, ec+1): + if ro == 0: + glyph_id = id_delta[i] + code + else: + idx = ro//2 + (code - sc) + i - array_len + glyph_id = glyph_id_map[idx] + if glyph_id != 0: + glyph_id += id_delta[i] + glyph_id %= 0x1000 + if glyph_id in glyph_ids: + ans[glyph_id] = code + + return ans + def set_character_map(self, cmap): self.version, self.num_tables = 0, 1 fmt = b'>7H' diff --git a/src/calibre/utils/fonts/utils.py b/src/calibre/utils/fonts/utils.py index a5a6230a0c..087c7c1da4 100644 --- a/src/calibre/utils/fonts/utils.py +++ b/src/calibre/utils/fonts/utils.py @@ -306,7 +306,7 @@ def remove_embed_restriction(raw): verify_checksums(raw) return raw -def get_bmp_glyph_ids(table, bmp, codes): +def read_bmp_prefix(table, bmp): length, language, segcount = struct.unpack_from(b'>3H', table, bmp+2) array_len = segcount //2 offset = bmp + 7*2 @@ -324,6 +324,12 @@ def get_bmp_glyph_ids(table, bmp, codes): glyph_id_len = (length + bmp - (offset + array_sz))//2 glyph_id_map = struct.unpack_from(b'>%dH'%glyph_id_len, table, offset + array_sz) + return (start_count, end_count, range_offset, id_delta, glyph_id_len, + glyph_id_map, array_len) + +def get_bmp_glyph_ids(table, bmp, codes): + (start_count, end_count, range_offset, id_delta, glyph_id_len, + glyph_id_map, array_len) = read_bmp_prefix(table, bmp) for code in codes: found = False