diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py
index de1139ee6c..1c03a9f742 100644
--- a/src/calibre/ebooks/pdf/html_writer.py
+++ b/src/calibre/ebooks/pdf/html_writer.py
@@ -23,7 +23,7 @@ from PyQt5.Qt import (
from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor
from PyQt5.QtWebEngineWidgets import QWebEnginePage, QWebEngineProfile
-from calibre import detect_ncpus, prepare_string_for_xml
+from calibre import detect_ncpus, prepare_string_for_xml, human_readable
from calibre.constants import __version__, iswindows
from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
from calibre.ebooks.oeb.base import XHTML, XPath
@@ -38,6 +38,7 @@ from calibre.gui2.webengine import secure_webengine
from calibre.srv.render_book import check_for_maths
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf
+from calibre.utils.fonts.sfnt.subset import pdf_subset
from calibre.utils.logging import default_log
from calibre.utils.monotonic import monotonic
from calibre.utils.podofo import (
@@ -752,7 +753,7 @@ class Range(object):
return len(self.widths) == 1
-def all_glyph_ids_in_w_arrays(arrays):
+def all_glyph_ids_in_w_arrays(arrays, as_set=False):
ans = set()
for w in arrays:
i = 0
@@ -765,7 +766,7 @@ def all_glyph_ids_in_w_arrays(arrays):
else:
ans |= set(range(elem, next_elem + 1))
i += 3
- return sorted(ans)
+ return ans if as_set else sorted(ans)
def merge_w_arrays(arrays):
@@ -994,8 +995,25 @@ def test_merge_fonts():
merge_fonts(pdf_doc)
out = path.rpartition('.')[0] + '-merged.pdf'
pdf_doc.save(out)
- print('Merged PDF writted to', out)
+ print('Merged PDF written to', out)
+
+def subset_fonts(pdf_doc, log):
+ all_fonts = pdf_doc.list_fonts(True)
+ for font in all_fonts:
+ if font['Subtype'] != 'Type0' and font['Data']:
+ try:
+ sfnt = Sfnt(font['Data'])
+ except UnsupportedFont:
+ continue
+ if b'glyf' not in sfnt:
+ continue
+ num, gen = font['Reference']
+ glyphs = all_glyph_ids_in_w_arrays((font['W'] or (), font['W2'] or ()), as_set=True)
+ pdf_subset(sfnt, glyphs)
+ data = sfnt()[0]
+ log('Subset embedded font from: {} to {}'.format(human_readable(len(font['Data'])), human_readable(len(data))))
+ pdf_doc.replace_font_data(data, num, gen)
# }}}
@@ -1293,6 +1311,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
if num_removed:
log('Removed', num_removed, 'unused fonts')
+ # Needed because of https://bugreports.qt.io/browse/QTBUG-88976
+ subset_fonts(pdf_doc, log)
+
num_removed = pdf_doc.dedup_images()
if num_removed:
log('Removed', num_removed, 'duplicate images')
diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp
index 6ace6c9f26..2799522a65 100644
--- a/src/calibre/utils/podofo/doc.cpp
+++ b/src/calibre/utils/podofo/doc.cpp
@@ -775,6 +775,9 @@ static PyMethodDef PDFDoc_methods[] = {
{"merge_fonts", (PyCFunction)py_merge_fonts, METH_VARARGS,
"merge_fonts() -> Merge the specified fonts."
},
+ {"replace_font_data", (PyCFunction)py_replace_font_data, METH_VARARGS,
+ "replace_font_data() -> Replace the data stream for the specified font."
+ },
{"dedup_type3_fonts", (PyCFunction)py_dedup_type3_fonts, METH_VARARGS,
"dedup_type3_fonts() -> De-duplicate repeated glyphs in Type3 fonts"
},
diff --git a/src/calibre/utils/podofo/fonts.cpp b/src/calibre/utils/podofo/fonts.cpp
index 72cbe1444f..4ba18c34ac 100644
--- a/src/calibre/utils/podofo/fonts.cpp
+++ b/src/calibre/utils/podofo/fonts.cpp
@@ -302,6 +302,22 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) {
return Py_BuildValue("k", count);
}
+PyObject*
+replace_font_data(PDFDoc *self, PyObject *args) {
+ const char *data; Py_ssize_t sz;
+ unsigned long num, gen;
+ if (!PyArg_ParseTuple(args, "y#kk", &data, &sz, &num, &gen)) return NULL;
+ const PdfVecObjects &objects = self->doc->GetObjects();
+ PdfObject *font = objects.GetObject(PdfReference(num, static_cast(gen)));
+ if (!font) { PyErr_SetString(PyExc_KeyError, "No font with the specified reference found"); return NULL; }
+ const PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
+ if (!descriptor) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; }
+ PdfObject *ff = get_font_file(descriptor);
+ PdfStream *stream = ff->GetStream();
+ stream->Set(data, sz);
+ Py_RETURN_NONE;
+}
+
PyObject*
merge_fonts(PDFDoc *self, PyObject *args) {
PyObject *items, *replacements;
@@ -462,3 +478,4 @@ PYWRAP(list_fonts)
PYWRAP(merge_fonts)
PYWRAP(remove_unused_fonts)
PYWRAP(dedup_type3_fonts)
+PYWRAP(replace_font_data)
diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h
index 09b8f1b02b..b723ef8504 100644
--- a/src/calibre/utils/podofo/global.h
+++ b/src/calibre/utils/podofo/global.h
@@ -99,6 +99,7 @@ extern "C" {
PyObject* py_list_fonts(PDFDoc*, PyObject*);
PyObject* py_remove_unused_fonts(PDFDoc *self, PyObject *args);
PyObject* py_merge_fonts(PDFDoc *self, PyObject *args);
+PyObject* py_replace_font_data(PDFDoc *self, PyObject *args);
PyObject* py_dedup_type3_fonts(PDFDoc *self, PyObject *args);
PyObject* py_impose(PDFDoc *self, PyObject *args);
PyObject* py_dedup_images(PDFDoc *self, PyObject *args);