mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merging fonts now works for truetype fonts
This commit is contained in:
parent
c383a2ce25
commit
b573c33d1c
@ -32,6 +32,7 @@ from calibre.ebooks.pdf.render.serialize import PDFStream
|
|||||||
from calibre.gui2 import setup_unix_signals
|
from calibre.gui2 import setup_unix_signals
|
||||||
from calibre.gui2.webengine import secure_webengine
|
from calibre.gui2.webengine import secure_webengine
|
||||||
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
|
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
|
||||||
|
from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
from calibre.utils.podofo import (
|
from calibre.utils.podofo import (
|
||||||
get_podofo, remove_unused_fonts, set_metadata_implementation
|
get_podofo, remove_unused_fonts, set_metadata_implementation
|
||||||
@ -600,17 +601,19 @@ def merge_w_arrays(arrays):
|
|||||||
|
|
||||||
|
|
||||||
def merge_font(fonts):
|
def merge_font(fonts):
|
||||||
# TODO: Check if the ToUnicode entry in the Type) dict needs to be merged
|
# TODO: Check if the ToUnicode entry in the Type0 dict needs to be merged
|
||||||
|
|
||||||
# choose the largest font as the base font
|
# choose the largest font as the base font
|
||||||
fonts.sort(key=lambda f: len(f['Data'] or b''), reverse=True)
|
fonts.sort(key=lambda f: len(f['Data'] or b''), reverse=True)
|
||||||
base_font = fonts[0]
|
base_font = fonts[0]
|
||||||
t0_font = next(f for f in fonts if f['DescendantFont'] == base_font['Reference'])
|
t0_font = next(f for f in fonts if f['DescendantFont'] == base_font['Reference'])
|
||||||
descendant_fonts = [f for f in fonts if f['Subtype'] != 'Type0' and f is not base_font]
|
descendant_fonts = [f for f in fonts if f['Subtype'] != 'Type0']
|
||||||
for key in ('W', 'W2'):
|
for key in ('W', 'W2'):
|
||||||
arrays = tuple(filter(True, (f[key] for f in descendant_fonts)))
|
arrays = tuple(filter(None, (f[key] for f in descendant_fonts)))
|
||||||
base_font[key] = merge_w_arrays(arrays)
|
base_font[key] = merge_w_arrays(arrays)
|
||||||
t0_font
|
base_font['sfnt'] = merge_truetype_fonts_for_pdf(*(f['sfnt'] for f in descendant_fonts))
|
||||||
|
references_to_drop = tuple(f['Reference'] for f in fonts if f is not base_font and f is not t0_font)
|
||||||
|
return t0_font, base_font, references_to_drop
|
||||||
|
|
||||||
|
|
||||||
def merge_fonts(pdf_doc):
|
def merge_fonts(pdf_doc):
|
||||||
@ -639,9 +642,16 @@ def merge_fonts(pdf_doc):
|
|||||||
|
|
||||||
for f in all_fonts:
|
for f in all_fonts:
|
||||||
base_font_map.setdefault(f['BaseFont'], []).append(f)
|
base_font_map.setdefault(f['BaseFont'], []).append(f)
|
||||||
|
replacements = {}
|
||||||
|
items = []
|
||||||
for name, fonts in iteritems(base_font_map):
|
for name, fonts in iteritems(base_font_map):
|
||||||
if mergeable(fonts):
|
if mergeable(fonts):
|
||||||
merge_font(fonts)
|
t0_font, base_font, references_to_drop = merge_font(fonts)
|
||||||
|
for ref in references_to_drop:
|
||||||
|
replacements[ref] = t0_font['Reference']
|
||||||
|
data = base_font['sfnt']()[0]
|
||||||
|
items.append((base_font['Reference'], base_font['W'] or [], base_font['W2'] or [], data))
|
||||||
|
pdf_doc.merge_fonts(tuple(items), replacements)
|
||||||
|
|
||||||
|
|
||||||
def test_merge_fonts():
|
def test_merge_fonts():
|
||||||
|
@ -70,8 +70,10 @@ class CompositeGlyph(SimpleGlyph):
|
|||||||
|
|
||||||
class GlyfTable(UnknownTable):
|
class GlyfTable(UnknownTable):
|
||||||
|
|
||||||
def glyph_data(self, offset, length):
|
def glyph_data(self, offset, length, as_raw=False):
|
||||||
raw = self.raw[offset:offset+length]
|
raw = self.raw[offset:offset+length]
|
||||||
|
if as_raw:
|
||||||
|
return raw
|
||||||
num_of_countours = unpack_from(b'>h', raw)[0] if raw else 0
|
num_of_countours = unpack_from(b'>h', raw)[0] if raw else 0
|
||||||
if num_of_countours >= 0:
|
if num_of_countours >= 0:
|
||||||
return SimpleGlyph(num_of_countours, raw)
|
return SimpleGlyph(num_of_countours, raw)
|
||||||
|
@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
from struct import calcsize, unpack_from, pack
|
from struct import calcsize, unpack_from, pack
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
from itertools import repeat
|
||||||
|
|
||||||
from calibre.utils.fonts.sfnt import UnknownTable
|
from calibre.utils.fonts.sfnt import UnknownTable
|
||||||
from polyglot.builtins import iteritems, range
|
from polyglot.builtins import iteritems, range
|
||||||
@ -32,12 +33,13 @@ class LocaTable(UnknownTable):
|
|||||||
next_offset = self.offset_map[glyph_id+1]
|
next_offset = self.offset_map[glyph_id+1]
|
||||||
return offset, next_offset - offset
|
return offset, next_offset - offset
|
||||||
|
|
||||||
def subset(self, resolved_glyph_map):
|
def update(self, resolved_glyph_map):
|
||||||
'''
|
'''
|
||||||
Update this table to contain pointers only to the glyphs in
|
Update this table to contain pointers only to the glyphs in
|
||||||
resolved_glyph_map which must be a map of glyph_ids to (offset, sz)
|
resolved_glyph_map which must be a map of glyph_ids to (offset, sz)
|
||||||
'''
|
'''
|
||||||
self.offset_map = [0 for i in self.offset_map]
|
max_glyph_id = max(resolved_glyph_map or (0,))
|
||||||
|
self.offset_map = list(repeat(0, max_glyph_id + 2))
|
||||||
glyphs = [(glyph_id, x[0], x[1]) for glyph_id, x in
|
glyphs = [(glyph_id, x[0], x[1]) for glyph_id, x in
|
||||||
iteritems(resolved_glyph_map)]
|
iteritems(resolved_glyph_map)]
|
||||||
glyphs.sort(key=itemgetter(1))
|
glyphs.sort(key=itemgetter(1))
|
||||||
@ -55,6 +57,7 @@ class LocaTable(UnknownTable):
|
|||||||
vals = [i//2 for i in self.offset_map]
|
vals = [i//2 for i in self.offset_map]
|
||||||
|
|
||||||
self.raw = pack(('>%d%s'%(len(vals), self.fmt)).encode('ascii'), *vals)
|
self.raw = pack(('>%d%s'%(len(vals), self.fmt)).encode('ascii'), *vals)
|
||||||
|
subset = update
|
||||||
|
|
||||||
def dump_glyphs(self, sfnt):
|
def dump_glyphs(self, sfnt):
|
||||||
if not hasattr(self, 'offset_map'):
|
if not hasattr(self, 'offset_map'):
|
||||||
|
32
src/calibre/utils/fonts/sfnt/merge.py
Normal file
32
src/calibre/utils/fonts/sfnt/merge.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
# License: GPL v3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from collections import OrderedDict
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
|
||||||
|
def merge_truetype_fonts_for_pdf(*fonts):
|
||||||
|
# only merges the glyf and loca tables, ignoring all other tables
|
||||||
|
all_glyphs = {}
|
||||||
|
for font in fonts:
|
||||||
|
loca = font[b'loca']
|
||||||
|
glyf = font[b'glyf']
|
||||||
|
loca.load_offsets(font[b'head'], font[b'maxp'])
|
||||||
|
for glyph_id in range(len(loca.offset_map) - 1):
|
||||||
|
if glyph_id not in all_glyphs:
|
||||||
|
offset, sz = loca.glyph_location(glyph_id)
|
||||||
|
if sz > 0:
|
||||||
|
all_glyphs[glyph_id] = glyf.glyph_data(offset, sz, as_raw=True)
|
||||||
|
|
||||||
|
ans = fonts[0]
|
||||||
|
loca = ans[b'loca']
|
||||||
|
glyf = ans[b'glyf']
|
||||||
|
gmap = OrderedDict()
|
||||||
|
for glyph_id in sorted(all_glyphs):
|
||||||
|
gmap[glyph_id] = partial(all_glyphs.__getitem__, glyph_id)
|
||||||
|
offset_map = glyf.update(gmap)
|
||||||
|
loca.update(offset_map)
|
||||||
|
return ans
|
@ -738,6 +738,9 @@ static PyMethodDef PDFDoc_methods[] = {
|
|||||||
{"remove_fonts", (PyCFunction)remove_fonts, METH_VARARGS,
|
{"remove_fonts", (PyCFunction)remove_fonts, METH_VARARGS,
|
||||||
"remove_fonts() -> Remove the specified font objects."
|
"remove_fonts() -> Remove the specified font objects."
|
||||||
},
|
},
|
||||||
|
{"merge_fonts", (PyCFunction)merge_fonts, METH_VARARGS,
|
||||||
|
"merge_fonts() -> Merge the specified fonts."
|
||||||
|
},
|
||||||
{"delete_pages", (PyCFunction)PDFDoc_delete_pages, METH_VARARGS,
|
{"delete_pages", (PyCFunction)PDFDoc_delete_pages, METH_VARARGS,
|
||||||
"delete_page(page_num, count=1) -> Delete the specified pages from the pdf."
|
"delete_page(page_num, count=1) -> Delete the specified pages from the pdf."
|
||||||
},
|
},
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include "global.h"
|
#include "global.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <stack>
|
#include <stack>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
using namespace pdf;
|
using namespace pdf;
|
||||||
|
|
||||||
@ -17,7 +18,7 @@ ref_as_tuple(const PdfReference &ref) {
|
|||||||
return Py_BuildValue("kk", num, generation);
|
return Py_BuildValue("kk", num, generation);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline const PdfObject*
|
static inline PdfObject*
|
||||||
get_font_file(const PdfObject *descriptor) {
|
get_font_file(const PdfObject *descriptor) {
|
||||||
PdfObject *ff = descriptor->GetIndirectKey("FontFile");
|
PdfObject *ff = descriptor->GetIndirectKey("FontFile");
|
||||||
if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
|
if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
|
||||||
@ -25,7 +26,7 @@ get_font_file(const PdfObject *descriptor) {
|
|||||||
return ff;
|
return ff;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static inline void
|
||||||
remove_font(PdfVecObjects &objects, PdfObject *font) {
|
remove_font(PdfVecObjects &objects, PdfObject *font) {
|
||||||
PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
|
PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
|
||||||
if (descriptor) {
|
if (descriptor) {
|
||||||
@ -36,6 +37,40 @@ remove_font(PdfVecObjects &objects, PdfObject *font) {
|
|||||||
delete objects.RemoveObject(font->Reference());
|
delete objects.RemoveObject(font->Reference());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uint64_t
|
||||||
|
ref_as_integer(pdf_objnum num, pdf_gennum gen) {
|
||||||
|
return static_cast<uint64_t>(num) | (static_cast<uint64_t>(gen) << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t
|
||||||
|
ref_as_integer(const PdfReference &ref) { return ref_as_integer(ref.ObjectNumber(), ref.GenerationNumber()); }
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
replace_font_references(PDFDoc *self, std::unordered_map<uint64_t, uint64_t> &ref_map) {
|
||||||
|
int num_pages = self->doc->GetPageCount();
|
||||||
|
for (int i = 0; i < num_pages; i++) {
|
||||||
|
PdfPage *page = self->doc->GetPage(i);
|
||||||
|
PdfDictionary &resources = page->GetResources()->GetDictionary();
|
||||||
|
PdfObject* f = resources.GetKey("Font");
|
||||||
|
if (f && f->IsDictionary()) {
|
||||||
|
const PdfDictionary &font = f->GetDictionary();
|
||||||
|
PdfDictionary new_font = PdfDictionary(font);
|
||||||
|
for (auto &k : font.GetKeys()) {
|
||||||
|
if (k.second->IsReference()) {
|
||||||
|
uint64_t key = ref_as_integer(k.second->GetReference()), r;
|
||||||
|
try {
|
||||||
|
r = ref_map.at(key);
|
||||||
|
} catch (const std::out_of_range &err) { continue; }
|
||||||
|
PdfReference new_ref(static_cast<uint32_t>(r & 0xffffffff), r >> 32);
|
||||||
|
new_font.AddKey(k.first.GetName(), new_ref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resources.AddKey("Font", new_font);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
used_fonts_in_page(PdfPage *page, int page_num, PyObject *ans) {
|
used_fonts_in_page(PdfPage *page, int page_num, PyObject *ans) {
|
||||||
PdfContentsTokenizer tokenizer(page);
|
PdfContentsTokenizer tokenizer(page);
|
||||||
@ -91,6 +126,34 @@ convert_w_array(const PdfArray &w) {
|
|||||||
return ans.release();
|
return ans.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if PY_MAJOR_VERSION > 2
|
||||||
|
#define py_as_long_long PyLong_AsLongLong
|
||||||
|
#else
|
||||||
|
static inline long long
|
||||||
|
py_as_long_long(const PyObject *x) {
|
||||||
|
if (PyInt_Check(x)) return PyInt_AS_LONG(x);
|
||||||
|
return PyLong_AsLongLong(x);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void
|
||||||
|
convert_w_array(PyObject *src, PdfArray &dest) {
|
||||||
|
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(src); i++) {
|
||||||
|
PyObject *item = PyList_GET_ITEM(src, i);
|
||||||
|
if (PyFloat_Check(item)) {
|
||||||
|
dest.push_back(PdfObject(PyFloat_AS_DOUBLE(item)));
|
||||||
|
} else if (PyList_Check(item)) {
|
||||||
|
PdfArray sub;
|
||||||
|
convert_w_array(item, sub);
|
||||||
|
dest.push_back(sub);
|
||||||
|
} else {
|
||||||
|
pdf_int64 val = py_as_long_long(item);
|
||||||
|
if (val == -1 && PyErr_Occurred()) { PyErr_Print(); continue; }
|
||||||
|
dest.push_back(PdfObject(val));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
PyObject*
|
PyObject*
|
||||||
list_fonts(PDFDoc *self, PyObject *args) {
|
list_fonts(PDFDoc *self, PyObject *args) {
|
||||||
@ -195,4 +258,57 @@ remove_fonts(PDFDoc *self, PyObject *args) {
|
|||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject*
|
||||||
|
merge_fonts(PDFDoc *self, PyObject *args) {
|
||||||
|
PyObject *items, *replacements;
|
||||||
|
if (!PyArg_ParseTuple(args, "O!O!", &PyTuple_Type, &items, &PyDict_Type, &replacements)) return NULL;
|
||||||
|
std::unordered_map<uint64_t, uint64_t> ref_map;
|
||||||
|
PdfVecObjects &objects = self->doc->GetObjects();
|
||||||
|
PyObject *key, *value;
|
||||||
|
Py_ssize_t pos = 0;
|
||||||
|
size_t c = 0;
|
||||||
|
while (PyDict_Next(replacements, &pos, &key, &value)) {
|
||||||
|
c++;
|
||||||
|
unsigned long num, gen;
|
||||||
|
if (!PyArg_ParseTuple(key, "kk", &num, &gen)) return NULL;
|
||||||
|
uint64_t k = ref_as_integer(num, gen);
|
||||||
|
PdfReference ref(num, gen);
|
||||||
|
PdfObject *font = objects.GetObject(ref);
|
||||||
|
if (font) remove_font(objects, font);
|
||||||
|
if (!PyArg_ParseTuple(value, "kk", &num, &gen)) return NULL;
|
||||||
|
uint64_t v = ref_as_integer(num, gen);
|
||||||
|
ref_map[k] = v;
|
||||||
|
}
|
||||||
|
if (c > 0) replace_font_references(self, ref_map);
|
||||||
|
|
||||||
|
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(items); i++) {
|
||||||
|
long num, gen;
|
||||||
|
PyObject *W, *W2;
|
||||||
|
const char *data;
|
||||||
|
Py_ssize_t sz;
|
||||||
|
if (!PyArg_ParseTuple(PyTuple_GET_ITEM(items, i), "(ll)O!O!s#", &num, &gen, &PyList_Type, &W, &PyList_Type, &W2, &data, &sz)) return NULL;
|
||||||
|
PdfReference ref(num, gen);
|
||||||
|
PdfObject *font = objects.GetObject(ref);
|
||||||
|
if (font) {
|
||||||
|
if (PyObject_IsTrue(W)) {
|
||||||
|
PdfArray w;
|
||||||
|
convert_w_array(W, w);
|
||||||
|
font->GetDictionary().AddKey("W", w);
|
||||||
|
}
|
||||||
|
if (PyObject_IsTrue(W2)) {
|
||||||
|
PdfArray w;
|
||||||
|
convert_w_array(W2, w);
|
||||||
|
font->GetDictionary().AddKey("W2", w);
|
||||||
|
}
|
||||||
|
const PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
|
||||||
|
if (descriptor) {
|
||||||
|
PdfObject *ff = get_font_file(descriptor);
|
||||||
|
PdfStream *stream = ff->GetStream();
|
||||||
|
stream->Set(data, sz);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -88,5 +88,6 @@ extern "C" {
|
|||||||
PyObject* list_fonts(PDFDoc*, PyObject*);
|
PyObject* list_fonts(PDFDoc*, PyObject*);
|
||||||
PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args);
|
PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args);
|
||||||
PyObject* remove_fonts(PDFDoc *self, PyObject *args);
|
PyObject* remove_fonts(PDFDoc *self, PyObject *args);
|
||||||
|
PyObject* merge_fonts(PDFDoc *self, PyObject *args);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user