PDF Output: Manually subset fonts to workaound https://bugreports.qt.io/browse/QTBUG-88976

This commit is contained in:
Kovid Goyal 2020-11-28 11:26:45 +05:30
parent 4fb58f192e
commit 5e12d32755
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 46 additions and 4 deletions

View File

@ -23,7 +23,7 @@ from PyQt5.Qt import (
from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor
from PyQt5.QtWebEngineWidgets import QWebEnginePage, QWebEngineProfile
from calibre import detect_ncpus, prepare_string_for_xml
from calibre import detect_ncpus, prepare_string_for_xml, human_readable
from calibre.constants import __version__, iswindows
from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
from calibre.ebooks.oeb.base import XHTML, XPath
@ -38,6 +38,7 @@ from calibre.gui2.webengine import secure_webengine
from calibre.srv.render_book import check_for_maths
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf
from calibre.utils.fonts.sfnt.subset import pdf_subset
from calibre.utils.logging import default_log
from calibre.utils.monotonic import monotonic
from calibre.utils.podofo import (
@ -752,7 +753,7 @@ class Range(object):
return len(self.widths) == 1
def all_glyph_ids_in_w_arrays(arrays):
def all_glyph_ids_in_w_arrays(arrays, as_set=False):
ans = set()
for w in arrays:
i = 0
@ -765,7 +766,7 @@ def all_glyph_ids_in_w_arrays(arrays):
else:
ans |= set(range(elem, next_elem + 1))
i += 3
return sorted(ans)
return ans if as_set else sorted(ans)
def merge_w_arrays(arrays):
@ -994,8 +995,25 @@ def test_merge_fonts():
merge_fonts(pdf_doc)
out = path.rpartition('.')[0] + '-merged.pdf'
pdf_doc.save(out)
print('Merged PDF writted to', out)
print('Merged PDF written to', out)
def subset_fonts(pdf_doc, log):
all_fonts = pdf_doc.list_fonts(True)
for font in all_fonts:
if font['Subtype'] != 'Type0' and font['Data']:
try:
sfnt = Sfnt(font['Data'])
except UnsupportedFont:
continue
if b'glyf' not in sfnt:
continue
num, gen = font['Reference']
glyphs = all_glyph_ids_in_w_arrays((font['W'] or (), font['W2'] or ()), as_set=True)
pdf_subset(sfnt, glyphs)
data = sfnt()[0]
log('Subset embedded font from: {} to {}'.format(human_readable(len(font['Data'])), human_readable(len(data))))
pdf_doc.replace_font_data(data, num, gen)
# }}}
@ -1293,6 +1311,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
if num_removed:
log('Removed', num_removed, 'unused fonts')
# Needed because of https://bugreports.qt.io/browse/QTBUG-88976
subset_fonts(pdf_doc, log)
num_removed = pdf_doc.dedup_images()
if num_removed:
log('Removed', num_removed, 'duplicate images')

View File

@ -775,6 +775,9 @@ static PyMethodDef PDFDoc_methods[] = {
{"merge_fonts", (PyCFunction)py_merge_fonts, METH_VARARGS,
"merge_fonts() -> Merge the specified fonts."
},
{"replace_font_data", (PyCFunction)py_replace_font_data, METH_VARARGS,
"replace_font_data() -> Replace the data stream for the specified font."
},
{"dedup_type3_fonts", (PyCFunction)py_dedup_type3_fonts, METH_VARARGS,
"dedup_type3_fonts() -> De-duplicate repeated glyphs in Type3 fonts"
},

View File

@ -302,6 +302,22 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) {
return Py_BuildValue("k", count);
}
PyObject*
replace_font_data(PDFDoc *self, PyObject *args) {
const char *data; Py_ssize_t sz;
unsigned long num, gen;
if (!PyArg_ParseTuple(args, "y#kk", &data, &sz, &num, &gen)) return NULL;
const PdfVecObjects &objects = self->doc->GetObjects();
PdfObject *font = objects.GetObject(PdfReference(num, static_cast<pdf_gennum>(gen)));
if (!font) { PyErr_SetString(PyExc_KeyError, "No font with the specified reference found"); return NULL; }
const PdfObject *descriptor = font->GetIndirectKey("FontDescriptor");
if (!descriptor) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; }
PdfObject *ff = get_font_file(descriptor);
PdfStream *stream = ff->GetStream();
stream->Set(data, sz);
Py_RETURN_NONE;
}
PyObject*
merge_fonts(PDFDoc *self, PyObject *args) {
PyObject *items, *replacements;
@ -462,3 +478,4 @@ PYWRAP(list_fonts)
PYWRAP(merge_fonts)
PYWRAP(remove_unused_fonts)
PYWRAP(dedup_type3_fonts)
PYWRAP(replace_font_data)

View File

@ -99,6 +99,7 @@ extern "C" {
PyObject* py_list_fonts(PDFDoc*, PyObject*);
PyObject* py_remove_unused_fonts(PDFDoc *self, PyObject *args);
PyObject* py_merge_fonts(PDFDoc *self, PyObject *args);
PyObject* py_replace_font_data(PDFDoc *self, PyObject *args);
PyObject* py_dedup_type3_fonts(PDFDoc *self, PyObject *args);
PyObject* py_impose(PDFDoc *self, PyObject *args);
PyObject* py_dedup_images(PDFDoc *self, PyObject *args);