diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py
index b68352041b..6f2ed6726f 100644
--- a/src/calibre/ebooks/pdf/html_writer.py
+++ b/src/calibre/ebooks/pdf/html_writer.py
@@ -31,7 +31,9 @@ from calibre.ebooks.pdf.render.serialize import PDFStream
from calibre.gui2 import setup_unix_signals
from calibre.gui2.webengine import secure_webengine
from calibre.utils.logging import default_log
-from calibre.utils.podofo import get_podofo, set_metadata_implementation
+from calibre.utils.podofo import (
+ get_podofo, remove_unused_fonts, set_metadata_implementation
+)
from calibre.utils.short_uuid import uuid4
from polyglot.builtins import iteritems, map, range, unicode_type
from polyglot.urllib import urlparse
@@ -563,11 +565,14 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
add_toc(PDFOutlineRoot(pdf_doc), toc)
report_progress(0.75, _('Added links to PDF content'))
- # TODO: Remove unused fonts
# TODO: Remove duplicate fonts
# TODO: Subset and embed fonts before rendering PDF
# TODO: Support for mathematics
+ num_removed = remove_unused_fonts(pdf_doc)
+ if num_removed:
+ log('Removed', num_removed, 'unused fonts')
+
if cover_data:
add_cover(pdf_doc, cover_data, page_layout, opts)
diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py
index a53d7123f8..a57d5c60ff 100644
--- a/src/calibre/utils/podofo/__init__.py
+++ b/src/calibre/utils/podofo/__init__.py
@@ -10,7 +10,7 @@ from calibre.constants import plugins, preferred_encoding
from calibre.ebooks.metadata import authors_to_string
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.ipc.simple_worker import WorkerError, fork_job
-from polyglot.builtins import range, unicode_type
+from polyglot.builtins import range, unicode_type, iteritems
def get_podofo():
@@ -154,6 +154,23 @@ def list_fonts(pdf_doc):
return ref_map
+def remove_unused_fonts(pdf_doc):
+ font_ref_map = list_fonts(pdf_doc)
+ unused = tuple(ref for ref, font in iteritems(font_ref_map) if not font['used'])
+ pdf_doc.remove_fonts(unused)
+ return len(tuple(f for f in unused if font_ref_map[f]['StreamRef']))
+
+
+def test_remove_unused_fonts(src):
+ podofo = get_podofo()
+ p = podofo.PDFDoc()
+ p.open(src)
+ remove_unused_fonts(p)
+ dest = src.rpartition('.')[0] + '-removed.pdf'
+ p.save(dest)
+ print('Modified pdf saved to:', dest)
+
+
def test_list_fonts(src):
podofo = get_podofo()
p = podofo.PDFDoc()
diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp
index 3cd4dc9fc9..1e99abfb62 100644
--- a/src/calibre/utils/podofo/doc.cpp
+++ b/src/calibre/utils/podofo/doc.cpp
@@ -715,12 +715,15 @@ static PyMethodDef PDFDoc_methods[] = {
{"alter_links", (PyCFunction)PDFDoc_alter_links, METH_VARARGS,
"alter_links() -> Change links in the document."
},
- {"list_fonts", (PyCFunction)list_fonts, METH_VARARGS,
+ {"list_fonts", (PyCFunction)list_fonts, METH_NOARGS,
"list_fonts() -> Get list of fonts in document"
},
{"used_fonts_in_page_range", (PyCFunction)used_fonts_in_page_range, METH_VARARGS,
"used_fonts_in_page_range() -> Get list of references to fonts used in the specified pages"
},
+ {"remove_fonts", (PyCFunction)remove_fonts, METH_VARARGS,
+ "remove_fonts() -> Remove the specified font objects."
+ },
{"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS,
"delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)."
},
diff --git a/src/calibre/utils/podofo/fonts.cpp b/src/calibre/utils/podofo/fonts.cpp
index 0217d993d0..62b125b204 100644
--- a/src/calibre/utils/podofo/fonts.cpp
+++ b/src/calibre/utils/podofo/fonts.cpp
@@ -17,9 +17,27 @@ ref_as_tuple(const PdfReference &ref) {
return Py_BuildValue("kk", num, generation);
}
+static inline const PdfObject*
+get_font_file(const PdfObject *descriptor) {
+ PdfObject *ff = descriptor->GetIndirectKey("FontFile");
+ if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
+ if (!ff) ff = descriptor->GetIndirectKey("FontFile3");
+ return ff;
+}
+
+static void
+remove_font(PdfVecObjects &objects, PdfObject *font) {
+ PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
+ if (descriptor) {
+ const PdfObject *ff = get_font_file(descriptor);
+ if (ff) delete objects.RemoveObject(ff->Reference());
+ delete objects.RemoveObject(descriptor->Reference());
+ }
+ delete objects.RemoveObject(font->Reference());
+}
static bool
-used_fonts_in_page(PdfPage *page, PyObject *ans) {
+used_fonts_in_page(PdfPage *page, int page_num, PyObject *ans) {
PdfContentsTokenizer tokenizer(page);
bool in_text_block = false;
const char* token = NULL;
@@ -73,9 +91,7 @@ list_fonts(PDFDoc *self, PyObject *args) {
long long stream_len = 0;
pyunique_ptr descendant_font, stream_ref;
if (descriptor) {
- const PdfObject *ff = descriptor->GetIndirectKey("FontFile");
- if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
- if (!ff) ff = descriptor->GetIndirectKey("FontFile3");
+ const PdfObject *ff = get_font_file(descriptor);
if (ff) {
stream_ref.reset(ref_as_tuple(ff->Reference()));
if (!stream_ref) return NULL;
@@ -119,10 +135,27 @@ used_fonts_in_page_range(PDFDoc *self, PyObject *args) {
for (int i = first - 1; i < last; i++) {
try {
PdfPage *page = self->doc->GetPage(i);
- if (!used_fonts_in_page(page, ans.get())) return NULL;
+ if (!used_fonts_in_page(page, i, ans.get())) return NULL;
} catch (const PdfError &err) { continue; }
}
return ans.release();
}
+PyObject*
+remove_fonts(PDFDoc *self, PyObject *args) {
+ PyObject *fonts;
+ if (!PyArg_ParseTuple(args, "O!", &PyTuple_Type, &fonts)) return NULL;
+ PdfVecObjects &objects = self->doc->GetObjects();
+ for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(fonts); i++) {
+ unsigned long num, gen;
+ if (!PyArg_ParseTuple(PyTuple_GET_ITEM(fonts, i), "kk", &num, &gen)) return NULL;
+ PdfReference ref(num, gen);
+ PdfObject *font = objects.GetObject(ref);
+ if (font) {
+ remove_font(objects, font);
+ }
+ }
+ Py_RETURN_NONE;
+}
+
}
diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h
index 3a36fea3ca..ee69492a05 100644
--- a/src/calibre/utils/podofo/global.h
+++ b/src/calibre/utils/podofo/global.h
@@ -61,5 +61,6 @@ dictionary_has_key_name(const PdfDictionary &d, T key, const char *name) {
extern "C" {
PyObject* list_fonts(PDFDoc*, PyObject*);
PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args);
+PyObject* remove_fonts(PDFDoc *self, PyObject *args);
}
}