mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Remove unused fonts from generated PDF
This commit is contained in:
parent
24a344546e
commit
6caf7cf77a
@ -31,7 +31,9 @@ from calibre.ebooks.pdf.render.serialize import PDFStream
|
||||
from calibre.gui2 import setup_unix_signals
|
||||
from calibre.gui2.webengine import secure_webengine
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre.utils.podofo import get_podofo, set_metadata_implementation
|
||||
from calibre.utils.podofo import (
|
||||
get_podofo, remove_unused_fonts, set_metadata_implementation
|
||||
)
|
||||
from calibre.utils.short_uuid import uuid4
|
||||
from polyglot.builtins import iteritems, map, range, unicode_type
|
||||
from polyglot.urllib import urlparse
|
||||
@ -563,11 +565,14 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
|
||||
add_toc(PDFOutlineRoot(pdf_doc), toc)
|
||||
report_progress(0.75, _('Added links to PDF content'))
|
||||
|
||||
# TODO: Remove unused fonts
|
||||
# TODO: Remove duplicate fonts
|
||||
# TODO: Subset and embed fonts before rendering PDF
|
||||
# TODO: Support for mathematics
|
||||
|
||||
num_removed = remove_unused_fonts(pdf_doc)
|
||||
if num_removed:
|
||||
log('Removed', num_removed, 'unused fonts')
|
||||
|
||||
if cover_data:
|
||||
add_cover(pdf_doc, cover_data, page_layout, opts)
|
||||
|
||||
|
@ -10,7 +10,7 @@ from calibre.constants import plugins, preferred_encoding
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.ipc.simple_worker import WorkerError, fork_job
|
||||
from polyglot.builtins import range, unicode_type
|
||||
from polyglot.builtins import range, unicode_type, iteritems
|
||||
|
||||
|
||||
def get_podofo():
|
||||
@ -154,6 +154,23 @@ def list_fonts(pdf_doc):
|
||||
return ref_map
|
||||
|
||||
|
||||
def remove_unused_fonts(pdf_doc):
|
||||
font_ref_map = list_fonts(pdf_doc)
|
||||
unused = tuple(ref for ref, font in iteritems(font_ref_map) if not font['used'])
|
||||
pdf_doc.remove_fonts(unused)
|
||||
return len(tuple(f for f in unused if font_ref_map[f]['StreamRef']))
|
||||
|
||||
|
||||
def test_remove_unused_fonts(src):
|
||||
podofo = get_podofo()
|
||||
p = podofo.PDFDoc()
|
||||
p.open(src)
|
||||
remove_unused_fonts(p)
|
||||
dest = src.rpartition('.')[0] + '-removed.pdf'
|
||||
p.save(dest)
|
||||
print('Modified pdf saved to:', dest)
|
||||
|
||||
|
||||
def test_list_fonts(src):
|
||||
podofo = get_podofo()
|
||||
p = podofo.PDFDoc()
|
||||
|
@ -715,12 +715,15 @@ static PyMethodDef PDFDoc_methods[] = {
|
||||
{"alter_links", (PyCFunction)PDFDoc_alter_links, METH_VARARGS,
|
||||
"alter_links() -> Change links in the document."
|
||||
},
|
||||
{"list_fonts", (PyCFunction)list_fonts, METH_VARARGS,
|
||||
{"list_fonts", (PyCFunction)list_fonts, METH_NOARGS,
|
||||
"list_fonts() -> Get list of fonts in document"
|
||||
},
|
||||
{"used_fonts_in_page_range", (PyCFunction)used_fonts_in_page_range, METH_VARARGS,
|
||||
"used_fonts_in_page_range() -> Get list of references to fonts used in the specified pages"
|
||||
},
|
||||
{"remove_fonts", (PyCFunction)remove_fonts, METH_VARARGS,
|
||||
"remove_fonts() -> Remove the specified font objects."
|
||||
},
|
||||
{"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS,
|
||||
"delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)."
|
||||
},
|
||||
|
@ -17,9 +17,27 @@ ref_as_tuple(const PdfReference &ref) {
|
||||
return Py_BuildValue("kk", num, generation);
|
||||
}
|
||||
|
||||
static inline const PdfObject*
|
||||
get_font_file(const PdfObject *descriptor) {
|
||||
PdfObject *ff = descriptor->GetIndirectKey("FontFile");
|
||||
if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
|
||||
if (!ff) ff = descriptor->GetIndirectKey("FontFile3");
|
||||
return ff;
|
||||
}
|
||||
|
||||
static void
|
||||
remove_font(PdfVecObjects &objects, PdfObject *font) {
|
||||
PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
|
||||
if (descriptor) {
|
||||
const PdfObject *ff = get_font_file(descriptor);
|
||||
if (ff) delete objects.RemoveObject(ff->Reference());
|
||||
delete objects.RemoveObject(descriptor->Reference());
|
||||
}
|
||||
delete objects.RemoveObject(font->Reference());
|
||||
}
|
||||
|
||||
static bool
|
||||
used_fonts_in_page(PdfPage *page, PyObject *ans) {
|
||||
used_fonts_in_page(PdfPage *page, int page_num, PyObject *ans) {
|
||||
PdfContentsTokenizer tokenizer(page);
|
||||
bool in_text_block = false;
|
||||
const char* token = NULL;
|
||||
@ -73,9 +91,7 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
||||
long long stream_len = 0;
|
||||
pyunique_ptr descendant_font, stream_ref;
|
||||
if (descriptor) {
|
||||
const PdfObject *ff = descriptor->GetIndirectKey("FontFile");
|
||||
if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
|
||||
if (!ff) ff = descriptor->GetIndirectKey("FontFile3");
|
||||
const PdfObject *ff = get_font_file(descriptor);
|
||||
if (ff) {
|
||||
stream_ref.reset(ref_as_tuple(ff->Reference()));
|
||||
if (!stream_ref) return NULL;
|
||||
@ -119,10 +135,27 @@ used_fonts_in_page_range(PDFDoc *self, PyObject *args) {
|
||||
for (int i = first - 1; i < last; i++) {
|
||||
try {
|
||||
PdfPage *page = self->doc->GetPage(i);
|
||||
if (!used_fonts_in_page(page, ans.get())) return NULL;
|
||||
if (!used_fonts_in_page(page, i, ans.get())) return NULL;
|
||||
} catch (const PdfError &err) { continue; }
|
||||
}
|
||||
return ans.release();
|
||||
}
|
||||
|
||||
PyObject*
|
||||
remove_fonts(PDFDoc *self, PyObject *args) {
|
||||
PyObject *fonts;
|
||||
if (!PyArg_ParseTuple(args, "O!", &PyTuple_Type, &fonts)) return NULL;
|
||||
PdfVecObjects &objects = self->doc->GetObjects();
|
||||
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(fonts); i++) {
|
||||
unsigned long num, gen;
|
||||
if (!PyArg_ParseTuple(PyTuple_GET_ITEM(fonts, i), "kk", &num, &gen)) return NULL;
|
||||
PdfReference ref(num, gen);
|
||||
PdfObject *font = objects.GetObject(ref);
|
||||
if (font) {
|
||||
remove_font(objects, font);
|
||||
}
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -61,5 +61,6 @@ dictionary_has_key_name(const PdfDictionary &d, T key, const char *name) {
|
||||
extern "C" {
|
||||
PyObject* list_fonts(PDFDoc*, PyObject*);
|
||||
PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args);
|
||||
PyObject* remove_fonts(PDFDoc *self, PyObject *args);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user