diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py
index 43608fff8c..20fc490735 100644
--- a/src/calibre/ebooks/pdf/html_writer.py
+++ b/src/calibre/ebooks/pdf/html_writer.py
@@ -32,6 +32,7 @@ from calibre.ebooks.pdf.render.serialize import PDFStream
from calibre.gui2 import setup_unix_signals
from calibre.gui2.webengine import secure_webengine
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
+from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf
from calibre.utils.logging import default_log
from calibre.utils.podofo import (
get_podofo, remove_unused_fonts, set_metadata_implementation
@@ -600,17 +601,19 @@ def merge_w_arrays(arrays):
def merge_font(fonts):
- # TODO: Check if the ToUnicode entry in the Type) dict needs to be merged
+ # TODO: Check if the ToUnicode entry in the Type0 dict needs to be merged
# choose the largest font as the base font
fonts.sort(key=lambda f: len(f['Data'] or b''), reverse=True)
base_font = fonts[0]
t0_font = next(f for f in fonts if f['DescendantFont'] == base_font['Reference'])
- descendant_fonts = [f for f in fonts if f['Subtype'] != 'Type0' and f is not base_font]
+ descendant_fonts = [f for f in fonts if f['Subtype'] != 'Type0']
for key in ('W', 'W2'):
- arrays = tuple(filter(True, (f[key] for f in descendant_fonts)))
+ arrays = tuple(filter(None, (f[key] for f in descendant_fonts)))
base_font[key] = merge_w_arrays(arrays)
- t0_font
+ base_font['sfnt'] = merge_truetype_fonts_for_pdf(*(f['sfnt'] for f in descendant_fonts))
+ references_to_drop = tuple(f['Reference'] for f in fonts if f is not base_font and f is not t0_font)
+ return t0_font, base_font, references_to_drop
def merge_fonts(pdf_doc):
@@ -639,9 +642,16 @@ def merge_fonts(pdf_doc):
for f in all_fonts:
base_font_map.setdefault(f['BaseFont'], []).append(f)
+ replacements = {}
+ items = []
for name, fonts in iteritems(base_font_map):
if mergeable(fonts):
- merge_font(fonts)
+ t0_font, base_font, references_to_drop = merge_font(fonts)
+ for ref in references_to_drop:
+ replacements[ref] = t0_font['Reference']
+ data = base_font['sfnt']()[0]
+ items.append((base_font['Reference'], base_font['W'] or [], base_font['W2'] or [], data))
+ pdf_doc.merge_fonts(tuple(items), replacements)
def test_merge_fonts():
diff --git a/src/calibre/utils/fonts/sfnt/glyf.py b/src/calibre/utils/fonts/sfnt/glyf.py
index 6a403afb4f..ce28b6e16c 100644
--- a/src/calibre/utils/fonts/sfnt/glyf.py
+++ b/src/calibre/utils/fonts/sfnt/glyf.py
@@ -70,8 +70,10 @@ class CompositeGlyph(SimpleGlyph):
class GlyfTable(UnknownTable):
- def glyph_data(self, offset, length):
+ def glyph_data(self, offset, length, as_raw=False):
raw = self.raw[offset:offset+length]
+ if as_raw:
+ return raw
num_of_countours = unpack_from(b'>h', raw)[0] if raw else 0
if num_of_countours >= 0:
return SimpleGlyph(num_of_countours, raw)
diff --git a/src/calibre/utils/fonts/sfnt/loca.py b/src/calibre/utils/fonts/sfnt/loca.py
index 3d4c8ee94e..0c9bf617d0 100644
--- a/src/calibre/utils/fonts/sfnt/loca.py
+++ b/src/calibre/utils/fonts/sfnt/loca.py
@@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
from struct import calcsize, unpack_from, pack
from operator import itemgetter
+from itertools import repeat
from calibre.utils.fonts.sfnt import UnknownTable
from polyglot.builtins import iteritems, range
@@ -32,12 +33,13 @@ class LocaTable(UnknownTable):
next_offset = self.offset_map[glyph_id+1]
return offset, next_offset - offset
- def subset(self, resolved_glyph_map):
+ def update(self, resolved_glyph_map):
'''
Update this table to contain pointers only to the glyphs in
resolved_glyph_map which must be a map of glyph_ids to (offset, sz)
'''
- self.offset_map = [0 for i in self.offset_map]
+ max_glyph_id = max(resolved_glyph_map or (0,))
+ self.offset_map = list(repeat(0, max_glyph_id + 2))
glyphs = [(glyph_id, x[0], x[1]) for glyph_id, x in
iteritems(resolved_glyph_map)]
glyphs.sort(key=itemgetter(1))
@@ -55,6 +57,7 @@ class LocaTable(UnknownTable):
vals = [i//2 for i in self.offset_map]
self.raw = pack(('>%d%s'%(len(vals), self.fmt)).encode('ascii'), *vals)
+ subset = update
def dump_glyphs(self, sfnt):
if not hasattr(self, 'offset_map'):
diff --git a/src/calibre/utils/fonts/sfnt/merge.py b/src/calibre/utils/fonts/sfnt/merge.py
new file mode 100644
index 0000000000..6ef28eeda8
--- /dev/null
+++ b/src/calibre/utils/fonts/sfnt/merge.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPL v3 Copyright: 2019, Kovid Goyal
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from collections import OrderedDict
+from functools import partial
+
+
+def merge_truetype_fonts_for_pdf(*fonts):
+ # only merges the glyf and loca tables, ignoring all other tables
+ all_glyphs = {}
+ for font in fonts:
+ loca = font[b'loca']
+ glyf = font[b'glyf']
+ loca.load_offsets(font[b'head'], font[b'maxp'])
+ for glyph_id in range(len(loca.offset_map) - 1):
+ if glyph_id not in all_glyphs:
+ offset, sz = loca.glyph_location(glyph_id)
+ if sz > 0:
+ all_glyphs[glyph_id] = glyf.glyph_data(offset, sz, as_raw=True)
+
+ ans = fonts[0]
+ loca = ans[b'loca']
+ glyf = ans[b'glyf']
+ gmap = OrderedDict()
+ for glyph_id in sorted(all_glyphs):
+ gmap[glyph_id] = partial(all_glyphs.__getitem__, glyph_id)
+ offset_map = glyf.update(gmap)
+ loca.update(offset_map)
+ return ans
diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp
index 84f57edb65..058fba489a 100644
--- a/src/calibre/utils/podofo/doc.cpp
+++ b/src/calibre/utils/podofo/doc.cpp
@@ -738,6 +738,9 @@ static PyMethodDef PDFDoc_methods[] = {
{"remove_fonts", (PyCFunction)remove_fonts, METH_VARARGS,
"remove_fonts() -> Remove the specified font objects."
},
+ {"merge_fonts", (PyCFunction)merge_fonts, METH_VARARGS,
+ "merge_fonts() -> Merge the specified fonts."
+ },
{"delete_pages", (PyCFunction)PDFDoc_delete_pages, METH_VARARGS,
"delete_page(page_num, count=1) -> Delete the specified pages from the pdf."
},
diff --git a/src/calibre/utils/podofo/fonts.cpp b/src/calibre/utils/podofo/fonts.cpp
index 967f46092a..fb629b10f9 100644
--- a/src/calibre/utils/podofo/fonts.cpp
+++ b/src/calibre/utils/podofo/fonts.cpp
@@ -8,6 +8,7 @@
#include "global.h"
#include
#include
+#include
using namespace pdf;
@@ -17,7 +18,7 @@ ref_as_tuple(const PdfReference &ref) {
return Py_BuildValue("kk", num, generation);
}
-static inline const PdfObject*
+static inline PdfObject*
get_font_file(const PdfObject *descriptor) {
PdfObject *ff = descriptor->GetIndirectKey("FontFile");
if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
@@ -25,7 +26,7 @@ get_font_file(const PdfObject *descriptor) {
return ff;
}
-static void
+static inline void
remove_font(PdfVecObjects &objects, PdfObject *font) {
PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
if (descriptor) {
@@ -36,6 +37,40 @@ remove_font(PdfVecObjects &objects, PdfObject *font) {
delete objects.RemoveObject(font->Reference());
}
+static inline uint64_t
+ref_as_integer(pdf_objnum num, pdf_gennum gen) {
+ return static_cast(num) | (static_cast(gen) << 32);
+}
+
+static inline uint64_t
+ref_as_integer(const PdfReference &ref) { return ref_as_integer(ref.ObjectNumber(), ref.GenerationNumber()); }
+
+
+static inline void
+replace_font_references(PDFDoc *self, std::unordered_map &ref_map) {
+ int num_pages = self->doc->GetPageCount();
+ for (int i = 0; i < num_pages; i++) {
+ PdfPage *page = self->doc->GetPage(i);
+ PdfDictionary &resources = page->GetResources()->GetDictionary();
+ PdfObject* f = resources.GetKey("Font");
+ if (f && f->IsDictionary()) {
+ const PdfDictionary &font = f->GetDictionary();
+ PdfDictionary new_font = PdfDictionary(font);
+ for (auto &k : font.GetKeys()) {
+ if (k.second->IsReference()) {
+ uint64_t key = ref_as_integer(k.second->GetReference()), r;
+ try {
+ r = ref_map.at(key);
+ } catch (const std::out_of_range &err) { continue; }
+ PdfReference new_ref(static_cast(r & 0xffffffff), r >> 32);
+ new_font.AddKey(k.first.GetName(), new_ref);
+ }
+ }
+ resources.AddKey("Font", new_font);
+ }
+ }
+}
+
static bool
used_fonts_in_page(PdfPage *page, int page_num, PyObject *ans) {
PdfContentsTokenizer tokenizer(page);
@@ -91,6 +126,34 @@ convert_w_array(const PdfArray &w) {
return ans.release();
}
+#if PY_MAJOR_VERSION > 2
+#define py_as_long_long PyLong_AsLongLong
+#else
+static inline long long
+py_as_long_long(const PyObject *x) {
+ if (PyInt_Check(x)) return PyInt_AS_LONG(x);
+ return PyLong_AsLongLong(x);
+}
+#endif
+
+static void
+convert_w_array(PyObject *src, PdfArray &dest) {
+ for (Py_ssize_t i = 0; i < PyList_GET_SIZE(src); i++) {
+ PyObject *item = PyList_GET_ITEM(src, i);
+ if (PyFloat_Check(item)) {
+ dest.push_back(PdfObject(PyFloat_AS_DOUBLE(item)));
+ } else if (PyList_Check(item)) {
+ PdfArray sub;
+ convert_w_array(item, sub);
+ dest.push_back(sub);
+ } else {
+ pdf_int64 val = py_as_long_long(item);
+ if (val == -1 && PyErr_Occurred()) { PyErr_Print(); continue; }
+ dest.push_back(PdfObject(val));
+ }
+ }
+}
+
extern "C" {
PyObject*
list_fonts(PDFDoc *self, PyObject *args) {
@@ -195,4 +258,57 @@ remove_fonts(PDFDoc *self, PyObject *args) {
Py_RETURN_NONE;
}
+PyObject*
+merge_fonts(PDFDoc *self, PyObject *args) {
+ PyObject *items, *replacements;
+ if (!PyArg_ParseTuple(args, "O!O!", &PyTuple_Type, &items, &PyDict_Type, &replacements)) return NULL;
+ std::unordered_map ref_map;
+ PdfVecObjects &objects = self->doc->GetObjects();
+ PyObject *key, *value;
+ Py_ssize_t pos = 0;
+ size_t c = 0;
+ while (PyDict_Next(replacements, &pos, &key, &value)) {
+ c++;
+ unsigned long num, gen;
+ if (!PyArg_ParseTuple(key, "kk", &num, &gen)) return NULL;
+ uint64_t k = ref_as_integer(num, gen);
+ PdfReference ref(num, gen);
+ PdfObject *font = objects.GetObject(ref);
+ if (font) remove_font(objects, font);
+ if (!PyArg_ParseTuple(value, "kk", &num, &gen)) return NULL;
+ uint64_t v = ref_as_integer(num, gen);
+ ref_map[k] = v;
+ }
+ if (c > 0) replace_font_references(self, ref_map);
+
+ for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(items); i++) {
+ long num, gen;
+ PyObject *W, *W2;
+ const char *data;
+ Py_ssize_t sz;
+ if (!PyArg_ParseTuple(PyTuple_GET_ITEM(items, i), "(ll)O!O!s#", &num, &gen, &PyList_Type, &W, &PyList_Type, &W2, &data, &sz)) return NULL;
+ PdfReference ref(num, gen);
+ PdfObject *font = objects.GetObject(ref);
+ if (font) {
+ if (PyObject_IsTrue(W)) {
+ PdfArray w;
+ convert_w_array(W, w);
+ font->GetDictionary().AddKey("W", w);
+ }
+ if (PyObject_IsTrue(W2)) {
+ PdfArray w;
+ convert_w_array(W2, w);
+ font->GetDictionary().AddKey("W2", w);
+ }
+ const PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
+ if (descriptor) {
+ PdfObject *ff = get_font_file(descriptor);
+ PdfStream *stream = ff->GetStream();
+ stream->Set(data, sz);
+ }
+ }
+ }
+ Py_RETURN_NONE;
+}
+
}
diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h
index 2063d67ed6..7925ae73be 100644
--- a/src/calibre/utils/podofo/global.h
+++ b/src/calibre/utils/podofo/global.h
@@ -88,5 +88,6 @@ extern "C" {
PyObject* list_fonts(PDFDoc*, PyObject*);
PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args);
PyObject* remove_fonts(PDFDoc *self, PyObject *args);
+PyObject* merge_fonts(PDFDoc *self, PyObject *args);
}
}