PDF Output: Implement embedding of TrueType fonts (without subsetting or ToUnicode maps)

This commit is contained in:
Kovid Goyal 2012-12-20 15:31:16 +05:30
parent 8a0417c72b
commit ea50efb4f5
8 changed files with 329 additions and 101 deletions

View File

@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
import codecs, zlib
from io import BytesIO
from struct import pack
EOL = b'\n'
@ -49,7 +50,7 @@ def serialize(o, stream):
o.pdf_serialize(stream)
elif isinstance(o, bool):
stream.write(b'true' if o else b'false')
elif isinstance(o, (int, float)):
elif isinstance(o, (int, long, float)):
stream.write(type(u'')(o).encode('ascii'))
elif o is None:
stream.write(b'null')
@ -78,14 +79,30 @@ class String(unicode):
raw = codecs.BOM_UTF16_BE + s.encode('utf-16-be')
stream.write(b'('+raw+b')')
class GlyphIndex(object):
def __init__(self, code, compress):
self.code = code
self.compress = compress
def pdf_serialize(self, stream):
if self.compress:
stream.write(pack(b'>sHs', b'(', self.code, b')'))
else:
byts = bytearray(pack(b'>H', self.code))
stream.write('<%s>'%''.join(map(
lambda x: bytes(hex(x)[2:]).rjust(2, b'0'), byts)))
class Dictionary(dict):
def pdf_serialize(self, stream):
stream.write(b'<<' + EOL)
for k, v in self.iteritems():
sorted_keys = sorted(self.iterkeys(),
key=lambda x:((' ' if x == 'Type' else '')+x))
for k in sorted_keys:
serialize(Name(k), stream)
stream.write(b' ')
serialize(v, stream)
serialize(self[k], stream)
stream.write(EOL)
stream.write(b'>>' + EOL)
@ -116,6 +133,9 @@ class Stream(BytesIO):
BytesIO.__init__(self)
self.compress = compress
def add_extra_keys(self, d):
pass
def pdf_serialize(self, stream):
raw = self.getvalue()
dl = len(raw)
@ -125,6 +145,7 @@ class Stream(BytesIO):
raw = zlib.compress(raw)
d = InlineDictionary({'Length':len(raw), 'DL':dl})
self.add_extra_keys(d)
if filters:
d['Filter'] = filters
serialize(d, stream)

View File

@ -13,10 +13,10 @@ from collections import namedtuple
from functools import wraps
from PyQt4.Qt import (QPaintEngine, QPaintDevice, Qt, QApplication, QPainter,
QTransform, QPainterPath, QRawFont)
QTransform, QPainterPath, QTextOption, QTextLayout)
from calibre.constants import DEBUG
from calibre.ebooks.pdf.render.serialize import (Color, PDFStream, Path, Text)
from calibre.ebooks.pdf.render.serialize import (Color, PDFStream, Path)
from calibre.ebooks.pdf.render.common import inch, A4
from calibre.utils.fonts.sfnt.container import Sfnt
from calibre.utils.fonts.sfnt.metrics import FontMetrics
@ -236,6 +236,9 @@ class PdfEngine(QPaintEngine):
self.xscale, self.yscale = sx, sy
self.graphics_state = GraphicsState()
self.errors = []
self.text_option = QTextOption()
self.text_option.setWrapMode(QTextOption.NoWrap)
self.fonts = {}
def init_page(self):
self.pdf.transform(self.pdf_system)
@ -353,44 +356,41 @@ class PdfEngine(QPaintEngine):
@store_error
def drawTextItem(self, point, text_item):
# super(PdfEngine, self).drawTextItem(point+QPoint(0, 300), text_item)
f = text_item.font()
px, pt = f.pixelSize(), f.pointSizeF()
if px == -1:
sz = pt/self.yscale
else:
sz = px
# super(PdfEngine, self).drawTextItem(point+QPoint(0, 0), text_item)
text = type(u'')(text_item.text()).replace('\n', ' ')
tl = QTextLayout(text, text_item.font(), self.paintDevice())
self.text_option.setTextDirection(Qt.RightToLeft if
text_item.renderFlags() & text_item.RightToLeft else Qt.LeftToRight)
tl.setTextOption(self.text_option)
tl.setPosition(point)
tl.beginLayout()
line = tl.createLine()
if not line.isValid():
tl.endLayout()
return
line.setLineWidth(int(1e12))
tl.endLayout()
for run in tl.glyphRuns():
rf = run.rawFont()
name = hash(bytes(rf.fontTable('name')))
if name not in self.fonts:
self.fonts[name] = FontMetrics(Sfnt(rf))
metrics = self.fonts[name]
indices = run.glyphIndexes()
glyphs = []
pdf_pos = point
first_baseline = None
for i, pos in enumerate(run.positions()):
if first_baseline is None:
first_baseline = pos.y()
glyph_pos = point + pos
delta = glyph_pos - pdf_pos
glyphs.append((delta.x(), pos.y()-first_baseline, indices[i]))
pdf_pos = glyph_pos
self.pdf.draw_glyph_run([1, 0, 0, -1, point.x(),
point.y()], rf.pixelSize(), metrics, glyphs)
r = QRawFont.fromFont(f)
metrics = FontMetrics(Sfnt(r))
to = Text()
to.size = sz
to.set_transform(1, 0, 0, -1, point.x(), point.y())
stretch = f.stretch()
if stretch != 100:
to.horizontal_scale = stretch
ws = f.wordSpacing()
if ws != 0:
to.word_spacing = ws
spacing = f.letterSpacing()
st = f.letterSpacingType()
text = type(u'')(text_item.text())
if st == f.AbsoluteSpacing and spacing != 0:
to.char_space = spacing/self.scale
if st == f.PercentageSpacing and spacing not in {100, 0}:
# TODO: Figure out why the results from uncommenting the super
# class call above differ. The advance widths are the same as those
# reported by QRawfont, so presumably, Qt use some other
# algorithm, I can't be bothered to track it down. This behavior is
# correct as per the Qt docs' description of PercentageSpacing
widths = [w*-1 for w in metrics.advance_widths(text,
sz, f.stretch()/100.)]
to.glyph_adjust = ((spacing-100)/100., widths)
to.text = text
with self:
self.graphics_state.apply_fill(self.graphics_state.current_state['stroke'],
self, self.pdf)
self.pdf.draw_text(to)
@store_error
def drawPolygon(self, points, mode):
@ -460,40 +460,40 @@ if __name__ == '__main__':
xmax, ymax = p.viewport().width(), p.viewport().height()
try:
p.drawRect(0, 0, xmax, ymax)
p.drawPolyline(QPoint(0, 0), QPoint(xmax, 0), QPoint(xmax, ymax),
QPoint(0, ymax), QPoint(0, 0))
pp = QPainterPath()
pp.addRect(0, 0, xmax, ymax)
p.drawPath(pp)
p.save()
for i in xrange(3):
col = [0, 0, 0, 200]
col[i] = 255
p.setOpacity(0.3)
p.setBrush(QBrush(QColor(*col)))
p.drawRect(0, 0, xmax/10, xmax/10)
p.translate(xmax/10, xmax/10)
p.scale(1, 1.5)
p.restore()
# p.drawPolyline(QPoint(0, 0), QPoint(xmax, 0), QPoint(xmax, ymax),
# QPoint(0, ymax), QPoint(0, 0))
# pp = QPainterPath()
# pp.addRect(0, 0, xmax, ymax)
# p.drawPath(pp)
# p.save()
# for i in xrange(3):
# col = [0, 0, 0, 200]
# col[i] = 255
# p.setOpacity(0.3)
# p.setBrush(QBrush(QColor(*col)))
# p.drawRect(0, 0, xmax/10, xmax/10)
# p.translate(xmax/10, xmax/10)
# p.scale(1, 1.5)
# p.restore()
p.save()
p.drawLine(0, 0, 5000, 0)
p.rotate(45)
p.drawLine(0, 0, 5000, 0)
p.restore()
# p.save()
# p.drawLine(0, 0, 5000, 0)
# p.rotate(45)
# p.drawLine(0, 0, 5000, 0)
# p.restore()
f = p.font()
f.setPointSize(48)
f.setLetterSpacing(f.PercentageSpacing, 200)
f.setPointSize(24)
# f.setLetterSpacing(f.PercentageSpacing, 200)
# f.setUnderline(True)
# f.setOverline(True)
# f.setStrikeOut(True)
f.setFamily('Times New Roman')
f.setFamily('Calibri')
p.setFont(f)
# p.scale(2, 2)
# p.rotate(45)
p.setPen(QColor(0, 0, 255))
p.drawText(QPoint(100, 300), 'Some text')
# p.setPen(QColor(0, 0, 255))
p.drawText(QPoint(100, 300), 'Some text ū --- Д AV')
finally:
p.end()
if dev.engine.errors:

View File

@ -7,7 +7,13 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.pdf.render.common import (
import re
from itertools import izip, groupby
from operator import itemgetter
from collections import Counter
from future_builtins import map
from calibre.ebooks.pdf.render.common import (Array, String, Stream,
Dictionary, Name)
STANDARD_FONTS = {
@ -16,11 +22,129 @@ STANDARD_FONTS = {
'Helvetica-Oblique', 'Courier-Oblique', 'Times-BoldItalic',
'Helvetica-BoldOblique', 'Courier-BoldOblique', }
'''
Notes
=======
We must use Type 0 CID keyed fonts to represent unicode text.
For TrueType
--------------
The mapping from the text strings to glyph ids is defined by two things:
The /Encoding key of the Type-0 font dictionary
The /CIDToGIDMap key of the descendant font dictionary (for TrueType fonts)
We set /Encoding to /Identity-H and /CIDToGIDMap to /Identity. This means that
text strings are interpreted as a sequence of two-byte numbers, high order byte
first. Each number gets mapped to a glyph id equal to itself by the
/CIDToGIDMap.
'''
class FontStream(Stream):
def __init__(self, is_otf):
Stream.__init__(self)
self.is_otf = is_otf
def add_extra_keys(self, d):
d['Length1'] = d['DL']
if self.is_otf:
d['Subtype'] = Name('OpenType')
class Font(object):
def __init__(self, metrics, num, objects):
self.metrics = metrics
self.subset_tag = bytes(re.sub('.', lambda m: chr(int(m.group())+ord('A')),
oct(num))).rjust(6, b'A').decode('ascii')
self.font_stream = FontStream(metrics.is_otf)
self.font_descriptor = Dictionary({
'Type': Name('FontDescriptor'),
'FontName': Name(metrics.postscript_name),
'Flags': 0b100, # Symbolic font
'FontBBox': Array(metrics.pdf_bbox),
'ItalicAngle': metrics.post.italic_angle,
'Ascent': metrics.pdf_ascent,
'Descent': metrics.pdf_descent,
'CapHeight': metrics.pdf_capheight,
'AvgWidth': metrics.pdf_avg_width,
'StemV': metrics.pdf_stemv,
})
self.descendant_font = Dictionary({
'Type':Name('Font'),
'Subtype':Name('CIDFontType' + ('0' if metrics.is_otf else '2')),
'BaseFont': self.font_descriptor['FontName'],
'FontDescriptor':objects.add(self.font_descriptor),
'CIDSystemInfo':Dictionary({
'Registry':String('Adobe'),
'Ordering':String('Identity'),
'Supplement':0,
}),
})
if not metrics.is_otf:
self.descendant_font['CIDToGIDMap'] = Name('Identity')
self.font_dict = Dictionary({
'Type':Name('Font'),
'Subtype':Name('Type0'),
'Encoding':Name('Identity-H'),
'BaseFont':self.descendant_font['BaseFont'],
'DescendantFonts':Array([objects.add(self.descendant_font)]),
})
self.used_glyphs = set()
def embed(self, objects):
# TODO: Subsetting and OpenType
self.font_descriptor['FontFile2'] = objects.add(self.font_stream)
self.write_widths(objects)
self.metrics.os2.zero_fstype()
self.metrics.sfnt(self.font_stream)
def write_widths(self, objects):
glyphs = sorted(self.used_glyphs|{0})
widths = {g:self.metrics.pdf_scale(w) for g, w in izip(glyphs,
self.metrics.glyph_widths(glyphs))}
counter = Counter()
for g, w in widths.iteritems():
counter[w] += 1
most_common = counter.most_common(1)[0][0]
self.descendant_font['DW'] = most_common
widths = {g:w for g, w in widths.iteritems() if w != most_common}
groups = Array()
for k, g in groupby(enumerate(widths.iterkeys()), lambda (i,x):i-x):
group = list(map(itemgetter(1), g))
gwidths = [widths[g] for g in group]
if len(set(gwidths)) == 1 and len(group) > 1:
w = (min(group), max(group), gwidths[0])
else:
w = (min(group), Array(gwidths))
groups.extend(w)
self.descendant_font['W'] = objects.add(groups)
class FontManager(object):
def __init__(self, objects):
self.objects = objects
self.std_map = {}
self.font_map = {}
self.fonts = []
def add_font(self, font_metrics, glyph_ids):
if font_metrics not in self.font_map:
self.fonts.append(Font(font_metrics, len(self.fonts),
self.objects))
d = self.objects.add(self.fonts[-1].font_dict)
self.font_map[font_metrics] = (d, self.fonts[-1])
fontref, font = self.font_map[font_metrics]
font.used_glyphs |= glyph_ids
return fontref
def add_standard_font(self, name):
if name not in STANDARD_FONTS:
@ -33,3 +157,7 @@ class FontManager(object):
}))
return self.std_map[name]
def embed_fonts(self):
for font in self.fonts:
font.embed(self.objects)

View File

@ -14,7 +14,8 @@ from collections import namedtuple
from calibre.constants import (__appname__, __version__)
from calibre.ebooks.pdf.render.common import (
Reference, EOL, serialize, Stream, Dictionary, String, Name, Array)
Reference, EOL, serialize, Stream, Dictionary, String, Name, Array,
GlyphIndex)
from calibre.ebooks.pdf.render.fonts import FontManager
PDFVER = b'%PDF-1.6'
@ -357,9 +358,24 @@ class PDFStream(object):
name = self.current_page.add_font(fontref)
text_object.pdf_serialize(self.current_page, name)
def draw_glyph_run(self, transform, size, font_metrics, glyphs):
glyph_ids = {x[-1] for x in glyphs}
fontref = self.font_manager.add_font(font_metrics, glyph_ids)
name = self.current_page.add_font(fontref)
self.current_page.write(b'BT ')
serialize(Name(name), self.current_page)
self.current_page.write(' %g Tf '%size)
self.current_page.write('%s Tm '%' '.join(map(type(u''), transform)))
for x, y, glyph_id in glyphs:
self.current_page.write('%g %g Td '%(x, y))
serialize(GlyphIndex(glyph_id, self.compress), self.current_page)
self.current_page.write(' Tj ')
self.current_page.write_line(b' ET')
def end(self):
if self.current_page.getvalue():
self.end_page()
self.font_manager.embed_fonts()
inforef = self.objects.add(self.info)
self.objects.pdf_serialize(self.stream)
self.write_line()

View File

@ -66,6 +66,8 @@ class Sfnt(object):
if table:
self.tables[table_tag] = self.TABLE_MAP.get(
table_tag, UnknownTable)(table)
self.sfnt_version = (b'\0\x01\0\0' if b'glyf' in self.tables
else b'OTTO')
def __getitem__(self, key):
return self.tables[key]
@ -102,8 +104,8 @@ class Sfnt(object):
ans[tag] = len(self[tag])
return ans
def __call__(self):
stream = BytesIO()
def __call__(self, stream=None):
stream = BytesIO() if stream is None else stream
def spack(*args):
stream.write(pack(*args))

View File

@ -8,7 +8,7 @@ __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from itertools import izip
from struct import unpack_from, pack
from struct import unpack_from, pack, calcsize
from calibre.utils.fonts.sfnt import UnknownTable, DateTimeProperty, FixedProperty
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
@ -32,10 +32,10 @@ class HeadTable(UnknownTable):
'units_per_em' , 'H',
'_created' , 'q',
'_modified' , 'q',
'x_min' , 'H',
'y_min' , 'H',
'x_max' , 'H',
'y_max' , 'H',
'x_min' , 'h',
'y_min' , 'h',
'x_max' , 'h',
'y_max' , 'h',
'mac_style' , 'H',
'lowest_rec_ppem' , 'H',
'font_direction_hint' , 'h',
@ -99,21 +99,58 @@ class HorizontalHeader(UnknownTable):
class OS2Table(UnknownTable):
version_number = FixedProperty('_version')
def read_data(self):
if hasattr(self, 'char_width'): return
from calibre.utils.fonts.utils import get_font_characteristics
vals = get_font_characteristics(self.raw, raw_is_table=True,
return_all=True)
for i, attr in enumerate((
'_version', 'char_width', 'weight', 'width', 'fs_type',
'subscript_x_size', 'subscript_y_size', 'subscript_x_offset',
'subscript_y_offset', 'superscript_x_size', 'superscript_y_size',
'superscript_x_offset', 'superscript_y_offset', 'strikeout_size',
'strikeout_position', 'family_class', 'panose', 'selection',
'is_italic', 'is_bold', 'is_regular')):
setattr(self, attr, vals[i])
ver, = unpack_from(b'>H', self.raw)
field_types = [
'version' , 'H',
'average_char_width', 'h',
'weight_class', 'H',
'width_class', 'H',
'fs_type', 'H',
'subscript_x_size', 'h',
'subscript_y_size', 'h',
'subscript_x_offset', 'h',
'subscript_y_offset', 'h',
'superscript_x_size', 'h',
'superscript_y_size', 'h',
'superscript_x_offset', 'h',
'superscript_y_offset', 'h',
'strikeout_size', 'h',
'strikeout_position', 'h',
'family_class', 'h',
'panose', '10s',
'ranges', '16s',
'vendor_id', '4s',
'selection', 'H',
'first_char_index', 'H',
'last_char_index', 'H',
'typo_ascender', 'h',
'typo_descender', 'h',
'typo_line_gap', 'h',
'win_ascent', 'H',
'win_descent', 'H',
]
if ver > 1:
field_types += [
'code_page_range', '8s',
'x_height', 'h',
'cap_height', 'h',
'default_char', 'H',
'break_char', 'H',
'max_context', 'H',
]
self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
self._fields = field_types[0::2]
for f, val in izip(self._fields, unpack_from(self._fmt, self.raw)):
setattr(self, f, val)
def zero_fstype(self):
prefix = calcsize(b'>HhHH')
self.raw = self.raw[:prefix] + b'\0\0' + self.raw[prefix+2:]
self.fs_type = 0
class PostTable(UnknownTable):

View File

@ -21,25 +21,42 @@ class FontMetrics(object):
def __init__(self, sfnt):
self.sfnt = sfnt
self.head = self.sfnt[b'head']
hhea = self.sfnt[b'hhea']
hhea.read_data(self.sfnt[b'hmtx'])
self.ascent = hhea.ascender
self.descent = hhea.descender
self.bbox = ( self.head.x_min, self.head.y_min, self.head.x_max,
self.head.y_max )
self._advance_widths = hhea.advance_widths
self.cmap = self.sfnt[b'cmap']
self.head = self.sfnt[b'head']
self.units_per_em = self.head.units_per_em
self.os2 = self.sfnt[b'OS/2']
self.os2.read_data()
self.post = self.sfnt[b'post']
self.post.read_data()
self.names = get_all_font_names(self.sfnt[b'name'].raw, raw_is_table=True)
self.is_otf = 'CFF ' in self.sfnt.tables
self._sig = hash(self.sfnt[b'name'].raw)
# Metrics for embedding in PDF
pdf_scale = self.pdf_scale = lambda x:int(round(x*1000./self.units_per_em))
self.pdf_ascent, self.pdf_descent = map(pdf_scale,
(self.os2.typo_ascender, self.os2.typo_descender))
self.pdf_bbox = tuple(map(pdf_scale, self.bbox))
self.pdf_capheight = pdf_scale(getattr(self.os2, 'cap_height',
self.os2.typo_ascender))
self.pdf_avg_width = pdf_scale(self.os2.average_char_width)
self.pdf_stemv = 50 + int((self.os2.weight_class / 65.0) ** 2)
def __hash__(self):
return self._sig
@property
def postscript_name(self):
if 'postscript_name' in self.names:
return self.names['postscript_name']
return self.names['full_name'].replace(' ', '')
return self.names['postscript_name'].replace(' ', '-')
return self.names['full_name'].replace(' ', '-')
def underline_thickness(self, pixel_size=12.0):
'Thickness for lines (in pixels) at the specified size'
@ -74,10 +91,14 @@ class FontMetrics(object):
chars = tuple(map(ord, string))
cmap = self.cmap.get_character_map(chars)
glyph_ids = (cmap[c] for c in chars)
last = len(self._advance_widths)
pixel_size_x = stretch * pixel_size
xscale = pixel_size_x / self.units_per_em
return tuple(self._advance_widths[i if i < last else -1]*xscale for i in glyph_ids)
return tuple(i*xscale for i in self.glyph_widths(glyph_ids))
def glyph_widths(self, glyph_ids):
last = len(self._advance_widths)
return tuple(self._advance_widths[i if i < last else -1] for i in
glyph_ids)
def width(self, string, pixel_size=12.0, stretch=1.0):
'The width of the string at the specified pixel size and stretch, in pixels'
@ -86,9 +107,15 @@ class FontMetrics(object):
if __name__ == '__main__':
import sys
from calibre.utils.fonts.sfnt.container import Sfnt
with open(sys.argv[-2], 'rb') as f:
with open(sys.argv[-1], 'rb') as f:
raw = f.read()
sfnt = Sfnt(raw)
m = FontMetrics(sfnt)
print (m.advance_widths(sys.argv[-1]))
print ('Ascent:', m.pdf_ascent)
print ('Descent:', m.pdf_descent)
print ('PDF BBox:', m.pdf_bbox)
print ('CapHeight:', m.pdf_capheight)
print ('AvgWidth:', m.pdf_avg_width)
print ('ItalicAngle', m.post.italic_angle)
print ('StemV', m.pdf_stemv)

View File

@ -64,11 +64,8 @@ def get_font_characteristics(raw, raw_is_table=False, return_all=False):
offset = struct.calcsize(common_fields)
panose = struct.unpack_from(b'>10B', os2_table, offset)
offset += 10
(range1,) = struct.unpack_from(b'>L', os2_table, offset)
offset += struct.calcsize(b'>L')
if version > 0:
range2, range3, range4 = struct.unpack_from(b'>3L', os2_table, offset)
offset += struct.calcsize(b'>3L')
(range1, range2, range3, range4) = struct.unpack_from(b'>4L', os2_table, offset)
offset += struct.calcsize(b'>4L')
vendor_id = os2_table[offset:offset+4]
vendor_id
offset += 4