diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py index de1139ee6c..1c03a9f742 100644 --- a/src/calibre/ebooks/pdf/html_writer.py +++ b/src/calibre/ebooks/pdf/html_writer.py @@ -23,7 +23,7 @@ from PyQt5.Qt import ( from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor from PyQt5.QtWebEngineWidgets import QWebEnginePage, QWebEngineProfile -from calibre import detect_ncpus, prepare_string_for_xml +from calibre import detect_ncpus, prepare_string_for_xml, human_readable from calibre.constants import __version__, iswindows from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet from calibre.ebooks.oeb.base import XHTML, XPath @@ -38,6 +38,7 @@ from calibre.gui2.webengine import secure_webengine from calibre.srv.render_book import check_for_maths from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf +from calibre.utils.fonts.sfnt.subset import pdf_subset from calibre.utils.logging import default_log from calibre.utils.monotonic import monotonic from calibre.utils.podofo import ( @@ -752,7 +753,7 @@ class Range(object): return len(self.widths) == 1 -def all_glyph_ids_in_w_arrays(arrays): +def all_glyph_ids_in_w_arrays(arrays, as_set=False): ans = set() for w in arrays: i = 0 @@ -765,7 +766,7 @@ def all_glyph_ids_in_w_arrays(arrays): else: ans |= set(range(elem, next_elem + 1)) i += 3 - return sorted(ans) + return ans if as_set else sorted(ans) def merge_w_arrays(arrays): @@ -994,8 +995,25 @@ def test_merge_fonts(): merge_fonts(pdf_doc) out = path.rpartition('.')[0] + '-merged.pdf' pdf_doc.save(out) - print('Merged PDF writted to', out) + print('Merged PDF written to', out) + +def subset_fonts(pdf_doc, log): + all_fonts = pdf_doc.list_fonts(True) + for font in all_fonts: + if font['Subtype'] != 'Type0' and font['Data']: + try: + sfnt = Sfnt(font['Data']) + except UnsupportedFont: + continue + if b'glyf' not in sfnt: + continue + num, gen = font['Reference'] + glyphs = all_glyph_ids_in_w_arrays((font['W'] or (), font['W2'] or ()), as_set=True) + pdf_subset(sfnt, glyphs) + data = sfnt()[0] + log('Subset embedded font from: {} to {}'.format(human_readable(len(font['Data'])), human_readable(len(data)))) + pdf_doc.replace_font_data(data, num, gen) # }}} @@ -1293,6 +1311,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co if num_removed: log('Removed', num_removed, 'unused fonts') + # Needed because of https://bugreports.qt.io/browse/QTBUG-88976 + subset_fonts(pdf_doc, log) + num_removed = pdf_doc.dedup_images() if num_removed: log('Removed', num_removed, 'duplicate images') diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 6ace6c9f26..2799522a65 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -775,6 +775,9 @@ static PyMethodDef PDFDoc_methods[] = { {"merge_fonts", (PyCFunction)py_merge_fonts, METH_VARARGS, "merge_fonts() -> Merge the specified fonts." }, + {"replace_font_data", (PyCFunction)py_replace_font_data, METH_VARARGS, + "replace_font_data() -> Replace the data stream for the specified font." + }, {"dedup_type3_fonts", (PyCFunction)py_dedup_type3_fonts, METH_VARARGS, "dedup_type3_fonts() -> De-duplicate repeated glyphs in Type3 fonts" }, diff --git a/src/calibre/utils/podofo/fonts.cpp b/src/calibre/utils/podofo/fonts.cpp index 72cbe1444f..4ba18c34ac 100644 --- a/src/calibre/utils/podofo/fonts.cpp +++ b/src/calibre/utils/podofo/fonts.cpp @@ -302,6 +302,22 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) { return Py_BuildValue("k", count); } +PyObject* +replace_font_data(PDFDoc *self, PyObject *args) { + const char *data; Py_ssize_t sz; + unsigned long num, gen; + if (!PyArg_ParseTuple(args, "y#kk", &data, &sz, &num, &gen)) return NULL; + const PdfVecObjects &objects = self->doc->GetObjects(); + PdfObject *font = objects.GetObject(PdfReference(num, static_cast(gen))); + if (!font) { PyErr_SetString(PyExc_KeyError, "No font with the specified reference found"); return NULL; } + const PdfObject *descriptor = font->GetIndirectKey("FontDescriptor"); + if (!descriptor) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; } + PdfObject *ff = get_font_file(descriptor); + PdfStream *stream = ff->GetStream(); + stream->Set(data, sz); + Py_RETURN_NONE; +} + PyObject* merge_fonts(PDFDoc *self, PyObject *args) { PyObject *items, *replacements; @@ -462,3 +478,4 @@ PYWRAP(list_fonts) PYWRAP(merge_fonts) PYWRAP(remove_unused_fonts) PYWRAP(dedup_type3_fonts) +PYWRAP(replace_font_data) diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h index 09b8f1b02b..b723ef8504 100644 --- a/src/calibre/utils/podofo/global.h +++ b/src/calibre/utils/podofo/global.h @@ -99,6 +99,7 @@ extern "C" { PyObject* py_list_fonts(PDFDoc*, PyObject*); PyObject* py_remove_unused_fonts(PDFDoc *self, PyObject *args); PyObject* py_merge_fonts(PDFDoc *self, PyObject *args); +PyObject* py_replace_font_data(PDFDoc *self, PyObject *args); PyObject* py_dedup_type3_fonts(PDFDoc *self, PyObject *args); PyObject* py_impose(PDFDoc *self, PyObject *args); PyObject* py_dedup_images(PDFDoc *self, PyObject *args);