mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Remove unused fonts from generated PDF
This commit is contained in:
parent
24a344546e
commit
6caf7cf77a
@ -31,7 +31,9 @@ from calibre.ebooks.pdf.render.serialize import PDFStream
|
|||||||
from calibre.gui2 import setup_unix_signals
|
from calibre.gui2 import setup_unix_signals
|
||||||
from calibre.gui2.webengine import secure_webengine
|
from calibre.gui2.webengine import secure_webengine
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
from calibre.utils.podofo import get_podofo, set_metadata_implementation
|
from calibre.utils.podofo import (
|
||||||
|
get_podofo, remove_unused_fonts, set_metadata_implementation
|
||||||
|
)
|
||||||
from calibre.utils.short_uuid import uuid4
|
from calibre.utils.short_uuid import uuid4
|
||||||
from polyglot.builtins import iteritems, map, range, unicode_type
|
from polyglot.builtins import iteritems, map, range, unicode_type
|
||||||
from polyglot.urllib import urlparse
|
from polyglot.urllib import urlparse
|
||||||
@ -563,11 +565,14 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
|
|||||||
add_toc(PDFOutlineRoot(pdf_doc), toc)
|
add_toc(PDFOutlineRoot(pdf_doc), toc)
|
||||||
report_progress(0.75, _('Added links to PDF content'))
|
report_progress(0.75, _('Added links to PDF content'))
|
||||||
|
|
||||||
# TODO: Remove unused fonts
|
|
||||||
# TODO: Remove duplicate fonts
|
# TODO: Remove duplicate fonts
|
||||||
# TODO: Subset and embed fonts before rendering PDF
|
# TODO: Subset and embed fonts before rendering PDF
|
||||||
# TODO: Support for mathematics
|
# TODO: Support for mathematics
|
||||||
|
|
||||||
|
num_removed = remove_unused_fonts(pdf_doc)
|
||||||
|
if num_removed:
|
||||||
|
log('Removed', num_removed, 'unused fonts')
|
||||||
|
|
||||||
if cover_data:
|
if cover_data:
|
||||||
add_cover(pdf_doc, cover_data, page_layout, opts)
|
add_cover(pdf_doc, cover_data, page_layout, opts)
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ from calibre.constants import plugins, preferred_encoding
|
|||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.utils.ipc.simple_worker import WorkerError, fork_job
|
from calibre.utils.ipc.simple_worker import WorkerError, fork_job
|
||||||
from polyglot.builtins import range, unicode_type
|
from polyglot.builtins import range, unicode_type, iteritems
|
||||||
|
|
||||||
|
|
||||||
def get_podofo():
|
def get_podofo():
|
||||||
@ -154,6 +154,23 @@ def list_fonts(pdf_doc):
|
|||||||
return ref_map
|
return ref_map
|
||||||
|
|
||||||
|
|
||||||
|
def remove_unused_fonts(pdf_doc):
|
||||||
|
font_ref_map = list_fonts(pdf_doc)
|
||||||
|
unused = tuple(ref for ref, font in iteritems(font_ref_map) if not font['used'])
|
||||||
|
pdf_doc.remove_fonts(unused)
|
||||||
|
return len(tuple(f for f in unused if font_ref_map[f]['StreamRef']))
|
||||||
|
|
||||||
|
|
||||||
|
def test_remove_unused_fonts(src):
|
||||||
|
podofo = get_podofo()
|
||||||
|
p = podofo.PDFDoc()
|
||||||
|
p.open(src)
|
||||||
|
remove_unused_fonts(p)
|
||||||
|
dest = src.rpartition('.')[0] + '-removed.pdf'
|
||||||
|
p.save(dest)
|
||||||
|
print('Modified pdf saved to:', dest)
|
||||||
|
|
||||||
|
|
||||||
def test_list_fonts(src):
|
def test_list_fonts(src):
|
||||||
podofo = get_podofo()
|
podofo = get_podofo()
|
||||||
p = podofo.PDFDoc()
|
p = podofo.PDFDoc()
|
||||||
|
@ -715,12 +715,15 @@ static PyMethodDef PDFDoc_methods[] = {
|
|||||||
{"alter_links", (PyCFunction)PDFDoc_alter_links, METH_VARARGS,
|
{"alter_links", (PyCFunction)PDFDoc_alter_links, METH_VARARGS,
|
||||||
"alter_links() -> Change links in the document."
|
"alter_links() -> Change links in the document."
|
||||||
},
|
},
|
||||||
{"list_fonts", (PyCFunction)list_fonts, METH_VARARGS,
|
{"list_fonts", (PyCFunction)list_fonts, METH_NOARGS,
|
||||||
"list_fonts() -> Get list of fonts in document"
|
"list_fonts() -> Get list of fonts in document"
|
||||||
},
|
},
|
||||||
{"used_fonts_in_page_range", (PyCFunction)used_fonts_in_page_range, METH_VARARGS,
|
{"used_fonts_in_page_range", (PyCFunction)used_fonts_in_page_range, METH_VARARGS,
|
||||||
"used_fonts_in_page_range() -> Get list of references to fonts used in the specified pages"
|
"used_fonts_in_page_range() -> Get list of references to fonts used in the specified pages"
|
||||||
},
|
},
|
||||||
|
{"remove_fonts", (PyCFunction)remove_fonts, METH_VARARGS,
|
||||||
|
"remove_fonts() -> Remove the specified font objects."
|
||||||
|
},
|
||||||
{"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS,
|
{"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS,
|
||||||
"delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)."
|
"delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)."
|
||||||
},
|
},
|
||||||
|
@ -17,9 +17,27 @@ ref_as_tuple(const PdfReference &ref) {
|
|||||||
return Py_BuildValue("kk", num, generation);
|
return Py_BuildValue("kk", num, generation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline const PdfObject*
|
||||||
|
get_font_file(const PdfObject *descriptor) {
|
||||||
|
PdfObject *ff = descriptor->GetIndirectKey("FontFile");
|
||||||
|
if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
|
||||||
|
if (!ff) ff = descriptor->GetIndirectKey("FontFile3");
|
||||||
|
return ff;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
remove_font(PdfVecObjects &objects, PdfObject *font) {
|
||||||
|
PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
|
||||||
|
if (descriptor) {
|
||||||
|
const PdfObject *ff = get_font_file(descriptor);
|
||||||
|
if (ff) delete objects.RemoveObject(ff->Reference());
|
||||||
|
delete objects.RemoveObject(descriptor->Reference());
|
||||||
|
}
|
||||||
|
delete objects.RemoveObject(font->Reference());
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
used_fonts_in_page(PdfPage *page, PyObject *ans) {
|
used_fonts_in_page(PdfPage *page, int page_num, PyObject *ans) {
|
||||||
PdfContentsTokenizer tokenizer(page);
|
PdfContentsTokenizer tokenizer(page);
|
||||||
bool in_text_block = false;
|
bool in_text_block = false;
|
||||||
const char* token = NULL;
|
const char* token = NULL;
|
||||||
@ -73,9 +91,7 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
|||||||
long long stream_len = 0;
|
long long stream_len = 0;
|
||||||
pyunique_ptr descendant_font, stream_ref;
|
pyunique_ptr descendant_font, stream_ref;
|
||||||
if (descriptor) {
|
if (descriptor) {
|
||||||
const PdfObject *ff = descriptor->GetIndirectKey("FontFile");
|
const PdfObject *ff = get_font_file(descriptor);
|
||||||
if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
|
|
||||||
if (!ff) ff = descriptor->GetIndirectKey("FontFile3");
|
|
||||||
if (ff) {
|
if (ff) {
|
||||||
stream_ref.reset(ref_as_tuple(ff->Reference()));
|
stream_ref.reset(ref_as_tuple(ff->Reference()));
|
||||||
if (!stream_ref) return NULL;
|
if (!stream_ref) return NULL;
|
||||||
@ -119,10 +135,27 @@ used_fonts_in_page_range(PDFDoc *self, PyObject *args) {
|
|||||||
for (int i = first - 1; i < last; i++) {
|
for (int i = first - 1; i < last; i++) {
|
||||||
try {
|
try {
|
||||||
PdfPage *page = self->doc->GetPage(i);
|
PdfPage *page = self->doc->GetPage(i);
|
||||||
if (!used_fonts_in_page(page, ans.get())) return NULL;
|
if (!used_fonts_in_page(page, i, ans.get())) return NULL;
|
||||||
} catch (const PdfError &err) { continue; }
|
} catch (const PdfError &err) { continue; }
|
||||||
}
|
}
|
||||||
return ans.release();
|
return ans.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject*
|
||||||
|
remove_fonts(PDFDoc *self, PyObject *args) {
|
||||||
|
PyObject *fonts;
|
||||||
|
if (!PyArg_ParseTuple(args, "O!", &PyTuple_Type, &fonts)) return NULL;
|
||||||
|
PdfVecObjects &objects = self->doc->GetObjects();
|
||||||
|
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(fonts); i++) {
|
||||||
|
unsigned long num, gen;
|
||||||
|
if (!PyArg_ParseTuple(PyTuple_GET_ITEM(fonts, i), "kk", &num, &gen)) return NULL;
|
||||||
|
PdfReference ref(num, gen);
|
||||||
|
PdfObject *font = objects.GetObject(ref);
|
||||||
|
if (font) {
|
||||||
|
remove_font(objects, font);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -61,5 +61,6 @@ dictionary_has_key_name(const PdfDictionary &d, T key, const char *name) {
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
PyObject* list_fonts(PDFDoc*, PyObject*);
|
PyObject* list_fonts(PDFDoc*, PyObject*);
|
||||||
PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args);
|
PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args);
|
||||||
|
PyObject* remove_fonts(PDFDoc *self, PyObject *args);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user