mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merging fonts now works for truetype fonts
This commit is contained in:
parent
c383a2ce25
commit
b573c33d1c
@ -32,6 +32,7 @@ from calibre.ebooks.pdf.render.serialize import PDFStream
|
||||
from calibre.gui2 import setup_unix_signals
|
||||
from calibre.gui2.webengine import secure_webengine
|
||||
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
|
||||
from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre.utils.podofo import (
|
||||
get_podofo, remove_unused_fonts, set_metadata_implementation
|
||||
@ -600,17 +601,19 @@ def merge_w_arrays(arrays):
|
||||
|
||||
|
||||
def merge_font(fonts):
|
||||
# TODO: Check if the ToUnicode entry in the Type) dict needs to be merged
|
||||
# TODO: Check if the ToUnicode entry in the Type0 dict needs to be merged
|
||||
|
||||
# choose the largest font as the base font
|
||||
fonts.sort(key=lambda f: len(f['Data'] or b''), reverse=True)
|
||||
base_font = fonts[0]
|
||||
t0_font = next(f for f in fonts if f['DescendantFont'] == base_font['Reference'])
|
||||
descendant_fonts = [f for f in fonts if f['Subtype'] != 'Type0' and f is not base_font]
|
||||
descendant_fonts = [f for f in fonts if f['Subtype'] != 'Type0']
|
||||
for key in ('W', 'W2'):
|
||||
arrays = tuple(filter(True, (f[key] for f in descendant_fonts)))
|
||||
arrays = tuple(filter(None, (f[key] for f in descendant_fonts)))
|
||||
base_font[key] = merge_w_arrays(arrays)
|
||||
t0_font
|
||||
base_font['sfnt'] = merge_truetype_fonts_for_pdf(*(f['sfnt'] for f in descendant_fonts))
|
||||
references_to_drop = tuple(f['Reference'] for f in fonts if f is not base_font and f is not t0_font)
|
||||
return t0_font, base_font, references_to_drop
|
||||
|
||||
|
||||
def merge_fonts(pdf_doc):
|
||||
@ -639,9 +642,16 @@ def merge_fonts(pdf_doc):
|
||||
|
||||
for f in all_fonts:
|
||||
base_font_map.setdefault(f['BaseFont'], []).append(f)
|
||||
replacements = {}
|
||||
items = []
|
||||
for name, fonts in iteritems(base_font_map):
|
||||
if mergeable(fonts):
|
||||
merge_font(fonts)
|
||||
t0_font, base_font, references_to_drop = merge_font(fonts)
|
||||
for ref in references_to_drop:
|
||||
replacements[ref] = t0_font['Reference']
|
||||
data = base_font['sfnt']()[0]
|
||||
items.append((base_font['Reference'], base_font['W'] or [], base_font['W2'] or [], data))
|
||||
pdf_doc.merge_fonts(tuple(items), replacements)
|
||||
|
||||
|
||||
def test_merge_fonts():
|
||||
|
@ -70,8 +70,10 @@ class CompositeGlyph(SimpleGlyph):
|
||||
|
||||
class GlyfTable(UnknownTable):
|
||||
|
||||
def glyph_data(self, offset, length):
|
||||
def glyph_data(self, offset, length, as_raw=False):
|
||||
raw = self.raw[offset:offset+length]
|
||||
if as_raw:
|
||||
return raw
|
||||
num_of_countours = unpack_from(b'>h', raw)[0] if raw else 0
|
||||
if num_of_countours >= 0:
|
||||
return SimpleGlyph(num_of_countours, raw)
|
||||
|
@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import calcsize, unpack_from, pack
|
||||
from operator import itemgetter
|
||||
from itertools import repeat
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable
|
||||
from polyglot.builtins import iteritems, range
|
||||
@ -32,12 +33,13 @@ class LocaTable(UnknownTable):
|
||||
next_offset = self.offset_map[glyph_id+1]
|
||||
return offset, next_offset - offset
|
||||
|
||||
def subset(self, resolved_glyph_map):
|
||||
def update(self, resolved_glyph_map):
|
||||
'''
|
||||
Update this table to contain pointers only to the glyphs in
|
||||
resolved_glyph_map which must be a map of glyph_ids to (offset, sz)
|
||||
'''
|
||||
self.offset_map = [0 for i in self.offset_map]
|
||||
max_glyph_id = max(resolved_glyph_map or (0,))
|
||||
self.offset_map = list(repeat(0, max_glyph_id + 2))
|
||||
glyphs = [(glyph_id, x[0], x[1]) for glyph_id, x in
|
||||
iteritems(resolved_glyph_map)]
|
||||
glyphs.sort(key=itemgetter(1))
|
||||
@ -55,6 +57,7 @@ class LocaTable(UnknownTable):
|
||||
vals = [i//2 for i in self.offset_map]
|
||||
|
||||
self.raw = pack(('>%d%s'%(len(vals), self.fmt)).encode('ascii'), *vals)
|
||||
subset = update
|
||||
|
||||
def dump_glyphs(self, sfnt):
|
||||
if not hasattr(self, 'offset_map'):
|
||||
|
32
src/calibre/utils/fonts/sfnt/merge.py
Normal file
32
src/calibre/utils/fonts/sfnt/merge.py
Normal file
@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPL v3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from collections import OrderedDict
|
||||
from functools import partial
|
||||
|
||||
|
||||
def merge_truetype_fonts_for_pdf(*fonts):
|
||||
# only merges the glyf and loca tables, ignoring all other tables
|
||||
all_glyphs = {}
|
||||
for font in fonts:
|
||||
loca = font[b'loca']
|
||||
glyf = font[b'glyf']
|
||||
loca.load_offsets(font[b'head'], font[b'maxp'])
|
||||
for glyph_id in range(len(loca.offset_map) - 1):
|
||||
if glyph_id not in all_glyphs:
|
||||
offset, sz = loca.glyph_location(glyph_id)
|
||||
if sz > 0:
|
||||
all_glyphs[glyph_id] = glyf.glyph_data(offset, sz, as_raw=True)
|
||||
|
||||
ans = fonts[0]
|
||||
loca = ans[b'loca']
|
||||
glyf = ans[b'glyf']
|
||||
gmap = OrderedDict()
|
||||
for glyph_id in sorted(all_glyphs):
|
||||
gmap[glyph_id] = partial(all_glyphs.__getitem__, glyph_id)
|
||||
offset_map = glyf.update(gmap)
|
||||
loca.update(offset_map)
|
||||
return ans
|
@ -738,6 +738,9 @@ static PyMethodDef PDFDoc_methods[] = {
|
||||
{"remove_fonts", (PyCFunction)remove_fonts, METH_VARARGS,
|
||||
"remove_fonts() -> Remove the specified font objects."
|
||||
},
|
||||
{"merge_fonts", (PyCFunction)merge_fonts, METH_VARARGS,
|
||||
"merge_fonts() -> Merge the specified fonts."
|
||||
},
|
||||
{"delete_pages", (PyCFunction)PDFDoc_delete_pages, METH_VARARGS,
|
||||
"delete_page(page_num, count=1) -> Delete the specified pages from the pdf."
|
||||
},
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "global.h"
|
||||
#include <iostream>
|
||||
#include <stack>
|
||||
#include <unordered_map>
|
||||
|
||||
using namespace pdf;
|
||||
|
||||
@ -17,7 +18,7 @@ ref_as_tuple(const PdfReference &ref) {
|
||||
return Py_BuildValue("kk", num, generation);
|
||||
}
|
||||
|
||||
static inline const PdfObject*
|
||||
static inline PdfObject*
|
||||
get_font_file(const PdfObject *descriptor) {
|
||||
PdfObject *ff = descriptor->GetIndirectKey("FontFile");
|
||||
if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
|
||||
@ -25,7 +26,7 @@ get_font_file(const PdfObject *descriptor) {
|
||||
return ff;
|
||||
}
|
||||
|
||||
static void
|
||||
static inline void
|
||||
remove_font(PdfVecObjects &objects, PdfObject *font) {
|
||||
PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
|
||||
if (descriptor) {
|
||||
@ -36,6 +37,40 @@ remove_font(PdfVecObjects &objects, PdfObject *font) {
|
||||
delete objects.RemoveObject(font->Reference());
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
ref_as_integer(pdf_objnum num, pdf_gennum gen) {
|
||||
return static_cast<uint64_t>(num) | (static_cast<uint64_t>(gen) << 32);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
ref_as_integer(const PdfReference &ref) { return ref_as_integer(ref.ObjectNumber(), ref.GenerationNumber()); }
|
||||
|
||||
|
||||
static inline void
|
||||
replace_font_references(PDFDoc *self, std::unordered_map<uint64_t, uint64_t> &ref_map) {
|
||||
int num_pages = self->doc->GetPageCount();
|
||||
for (int i = 0; i < num_pages; i++) {
|
||||
PdfPage *page = self->doc->GetPage(i);
|
||||
PdfDictionary &resources = page->GetResources()->GetDictionary();
|
||||
PdfObject* f = resources.GetKey("Font");
|
||||
if (f && f->IsDictionary()) {
|
||||
const PdfDictionary &font = f->GetDictionary();
|
||||
PdfDictionary new_font = PdfDictionary(font);
|
||||
for (auto &k : font.GetKeys()) {
|
||||
if (k.second->IsReference()) {
|
||||
uint64_t key = ref_as_integer(k.second->GetReference()), r;
|
||||
try {
|
||||
r = ref_map.at(key);
|
||||
} catch (const std::out_of_range &err) { continue; }
|
||||
PdfReference new_ref(static_cast<uint32_t>(r & 0xffffffff), r >> 32);
|
||||
new_font.AddKey(k.first.GetName(), new_ref);
|
||||
}
|
||||
}
|
||||
resources.AddKey("Font", new_font);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
used_fonts_in_page(PdfPage *page, int page_num, PyObject *ans) {
|
||||
PdfContentsTokenizer tokenizer(page);
|
||||
@ -91,6 +126,34 @@ convert_w_array(const PdfArray &w) {
|
||||
return ans.release();
|
||||
}
|
||||
|
||||
#if PY_MAJOR_VERSION > 2
|
||||
#define py_as_long_long PyLong_AsLongLong
|
||||
#else
|
||||
static inline long long
|
||||
py_as_long_long(const PyObject *x) {
|
||||
if (PyInt_Check(x)) return PyInt_AS_LONG(x);
|
||||
return PyLong_AsLongLong(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
convert_w_array(PyObject *src, PdfArray &dest) {
|
||||
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(src); i++) {
|
||||
PyObject *item = PyList_GET_ITEM(src, i);
|
||||
if (PyFloat_Check(item)) {
|
||||
dest.push_back(PdfObject(PyFloat_AS_DOUBLE(item)));
|
||||
} else if (PyList_Check(item)) {
|
||||
PdfArray sub;
|
||||
convert_w_array(item, sub);
|
||||
dest.push_back(sub);
|
||||
} else {
|
||||
pdf_int64 val = py_as_long_long(item);
|
||||
if (val == -1 && PyErr_Occurred()) { PyErr_Print(); continue; }
|
||||
dest.push_back(PdfObject(val));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
PyObject*
|
||||
list_fonts(PDFDoc *self, PyObject *args) {
|
||||
@ -195,4 +258,57 @@ remove_fonts(PDFDoc *self, PyObject *args) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
PyObject*
|
||||
merge_fonts(PDFDoc *self, PyObject *args) {
|
||||
PyObject *items, *replacements;
|
||||
if (!PyArg_ParseTuple(args, "O!O!", &PyTuple_Type, &items, &PyDict_Type, &replacements)) return NULL;
|
||||
std::unordered_map<uint64_t, uint64_t> ref_map;
|
||||
PdfVecObjects &objects = self->doc->GetObjects();
|
||||
PyObject *key, *value;
|
||||
Py_ssize_t pos = 0;
|
||||
size_t c = 0;
|
||||
while (PyDict_Next(replacements, &pos, &key, &value)) {
|
||||
c++;
|
||||
unsigned long num, gen;
|
||||
if (!PyArg_ParseTuple(key, "kk", &num, &gen)) return NULL;
|
||||
uint64_t k = ref_as_integer(num, gen);
|
||||
PdfReference ref(num, gen);
|
||||
PdfObject *font = objects.GetObject(ref);
|
||||
if (font) remove_font(objects, font);
|
||||
if (!PyArg_ParseTuple(value, "kk", &num, &gen)) return NULL;
|
||||
uint64_t v = ref_as_integer(num, gen);
|
||||
ref_map[k] = v;
|
||||
}
|
||||
if (c > 0) replace_font_references(self, ref_map);
|
||||
|
||||
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(items); i++) {
|
||||
long num, gen;
|
||||
PyObject *W, *W2;
|
||||
const char *data;
|
||||
Py_ssize_t sz;
|
||||
if (!PyArg_ParseTuple(PyTuple_GET_ITEM(items, i), "(ll)O!O!s#", &num, &gen, &PyList_Type, &W, &PyList_Type, &W2, &data, &sz)) return NULL;
|
||||
PdfReference ref(num, gen);
|
||||
PdfObject *font = objects.GetObject(ref);
|
||||
if (font) {
|
||||
if (PyObject_IsTrue(W)) {
|
||||
PdfArray w;
|
||||
convert_w_array(W, w);
|
||||
font->GetDictionary().AddKey("W", w);
|
||||
}
|
||||
if (PyObject_IsTrue(W2)) {
|
||||
PdfArray w;
|
||||
convert_w_array(W2, w);
|
||||
font->GetDictionary().AddKey("W2", w);
|
||||
}
|
||||
const PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
|
||||
if (descriptor) {
|
||||
PdfObject *ff = get_font_file(descriptor);
|
||||
PdfStream *stream = ff->GetStream();
|
||||
stream->Set(data, sz);
|
||||
}
|
||||
}
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -88,5 +88,6 @@ extern "C" {
|
||||
PyObject* list_fonts(PDFDoc*, PyObject*);
|
||||
PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args);
|
||||
PyObject* remove_fonts(PDFDoc *self, PyObject *args);
|
||||
PyObject* merge_fonts(PDFDoc *self, PyObject *args);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user