mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Work on merging fonts
This commit is contained in:
parent
929f65ecf2
commit
d994cf7895
@ -9,8 +9,10 @@ import copy
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
|
import sys
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from operator import attrgetter
|
||||||
|
|
||||||
from PyQt5.Qt import (
|
from PyQt5.Qt import (
|
||||||
QApplication, QMarginsF, QObject, QPageLayout, QTimer, QUrl, pyqtSignal
|
QApplication, QMarginsF, QObject, QPageLayout, QTimer, QUrl, pyqtSignal
|
||||||
@ -29,12 +31,13 @@ from calibre.ebooks.pdf.image_writer import (
|
|||||||
from calibre.ebooks.pdf.render.serialize import PDFStream
|
from calibre.ebooks.pdf.render.serialize import PDFStream
|
||||||
from calibre.gui2 import setup_unix_signals
|
from calibre.gui2 import setup_unix_signals
|
||||||
from calibre.gui2.webengine import secure_webengine
|
from calibre.gui2.webengine import secure_webengine
|
||||||
|
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
from calibre.utils.podofo import (
|
from calibre.utils.podofo import (
|
||||||
get_podofo, remove_unused_fonts, set_metadata_implementation
|
get_podofo, remove_unused_fonts, set_metadata_implementation
|
||||||
)
|
)
|
||||||
from calibre.utils.short_uuid import uuid4
|
from calibre.utils.short_uuid import uuid4
|
||||||
from polyglot.builtins import iteritems, map, range, unicode_type
|
from polyglot.builtins import filter, iteritems, map, range, unicode_type
|
||||||
from polyglot.urllib import urlparse
|
from polyglot.urllib import urlparse
|
||||||
|
|
||||||
OK, KILL_SIGNAL = range(0, 2)
|
OK, KILL_SIGNAL = range(0, 2)
|
||||||
@ -512,6 +515,104 @@ def add_pagenum_toc(root, toc, opts, page_number_display_map):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
# Fonts {{{
|
||||||
|
|
||||||
|
|
||||||
|
class Range(object):
|
||||||
|
|
||||||
|
__slots__ = ('first', 'last', 'width')
|
||||||
|
|
||||||
|
def __init__(self, first, last, width):
|
||||||
|
self.first, self.last, self.width = first, last, width
|
||||||
|
# Sort by first with larger ranges coming before smaller ones
|
||||||
|
self.sort_order = self.first, -self.last
|
||||||
|
|
||||||
|
|
||||||
|
def merge_w_arrays(arrays):
|
||||||
|
ranges = []
|
||||||
|
for w in arrays:
|
||||||
|
i = 0
|
||||||
|
while i + 1 < len(w):
|
||||||
|
elem = w[i]
|
||||||
|
next_elem = w[i+1]
|
||||||
|
if isinstance(next_elem, list):
|
||||||
|
ranges.extend(Range(elem + c, elem + c, w) for c, w in enumerate(next_elem))
|
||||||
|
i += 2
|
||||||
|
elif i + 2 < len(w):
|
||||||
|
ranges.append(Range(elem, next_elem, w[i+2]))
|
||||||
|
i += 3
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
ranges.sort(key=attrgetter('sort_order'))
|
||||||
|
merged_ranges = ranges[:1]
|
||||||
|
for r in ranges[1:]:
|
||||||
|
prev_range = merged_ranges[-1]
|
||||||
|
left_over = prev_range.merge(r)
|
||||||
|
if left_over is not None:
|
||||||
|
merged_ranges.append(left_over)
|
||||||
|
if not merged_ranges:
|
||||||
|
return []
|
||||||
|
# combine consecutive single value ranges
|
||||||
|
|
||||||
|
|
||||||
|
def merge_font(fonts):
|
||||||
|
# TODO: Check if the ToUnicode entry in the Type) dict needs to be merged
|
||||||
|
|
||||||
|
# choose the largest font as the base font
|
||||||
|
fonts.sort(key=lambda f: len(f['Data'] or b''), reverse=True)
|
||||||
|
base_font = fonts[0]
|
||||||
|
t0_font = next(f for f in fonts if f['DescendantFont'] == base_font['Reference'])
|
||||||
|
descendant_fonts = [f for f in fonts if f['Subtype'] != 'Type0' and f is not base_font]
|
||||||
|
for key in ('W', 'W2'):
|
||||||
|
arrays = tuple(filter(True, (f[key] for f in descendant_fonts)))
|
||||||
|
base_font[key] = merge_w_arrays(arrays)
|
||||||
|
t0_font
|
||||||
|
|
||||||
|
|
||||||
|
def merge_fonts(pdf_doc):
|
||||||
|
all_fonts = pdf_doc.list_fonts(True)
|
||||||
|
base_font_map = {}
|
||||||
|
|
||||||
|
def mergeable(fonts):
|
||||||
|
has_type0 = False
|
||||||
|
for font in fonts:
|
||||||
|
if font['Subtype'] == 'Type0':
|
||||||
|
has_type0 = True
|
||||||
|
if not font['Encoding'] or not font['Encoding'].startswith('Identity-'):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
if not font['Data']:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
sfnt = Sfnt(font['Data'])
|
||||||
|
except UnsupportedFont:
|
||||||
|
return False
|
||||||
|
font['sfnt'] = sfnt
|
||||||
|
if b'glyf' not in sfnt:
|
||||||
|
# TODO: Add support for merging CFF tables
|
||||||
|
return False
|
||||||
|
return has_type0
|
||||||
|
|
||||||
|
for f in all_fonts:
|
||||||
|
base_font_map.setdefault(f['BaseFont'], []).append(f)
|
||||||
|
for name, fonts in iteritems(base_font_map):
|
||||||
|
if mergeable(fonts):
|
||||||
|
merge_font(fonts)
|
||||||
|
|
||||||
|
|
||||||
|
def test_merge_fonts():
|
||||||
|
path = sys.argv[-1]
|
||||||
|
podofo = get_podofo()
|
||||||
|
pdf_doc = podofo.PDFDoc()
|
||||||
|
pdf_doc.open(path)
|
||||||
|
merge_fonts(pdf_doc)
|
||||||
|
out = path.rpartition('.')[0] + '-merged.pdf'
|
||||||
|
pdf_doc.save(out)
|
||||||
|
print('Merged PDF writted to', out)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None):
|
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None):
|
||||||
container = Container(opf_path, log)
|
container = Container(opf_path, log)
|
||||||
report_progress(0.05, _('Parsed all content for markup transformation'))
|
report_progress(0.05, _('Parsed all content for markup transformation'))
|
||||||
@ -567,8 +668,8 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
|
|||||||
add_toc(PDFOutlineRoot(pdf_doc), toc)
|
add_toc(PDFOutlineRoot(pdf_doc), toc)
|
||||||
report_progress(0.75, _('Added links to PDF content'))
|
report_progress(0.75, _('Added links to PDF content'))
|
||||||
|
|
||||||
# TODO: Remove duplicate fonts
|
merge_fonts(pdf_doc)
|
||||||
# TODO: Subset and embed fonts before rendering PDF
|
|
||||||
# TODO: Support for mathematics
|
# TODO: Support for mathematics
|
||||||
|
|
||||||
num_removed = remove_unused_fonts(pdf_doc)
|
num_removed = remove_unused_fonts(pdf_doc)
|
||||||
|
@ -729,7 +729,7 @@ static PyMethodDef PDFDoc_methods[] = {
|
|||||||
{"alter_links", (PyCFunction)PDFDoc_alter_links, METH_VARARGS,
|
{"alter_links", (PyCFunction)PDFDoc_alter_links, METH_VARARGS,
|
||||||
"alter_links() -> Change links in the document."
|
"alter_links() -> Change links in the document."
|
||||||
},
|
},
|
||||||
{"list_fonts", (PyCFunction)list_fonts, METH_NOARGS,
|
{"list_fonts", (PyCFunction)list_fonts, METH_VARARGS,
|
||||||
"list_fonts() -> Get list of fonts in document"
|
"list_fonts() -> Get list of fonts in document"
|
||||||
},
|
},
|
||||||
{"used_fonts_in_page_range", (PyCFunction)used_fonts_in_page_range, METH_VARARGS,
|
{"used_fonts_in_page_range", (PyCFunction)used_fonts_in_page_range, METH_VARARGS,
|
||||||
|
@ -72,6 +72,25 @@ used_fonts_in_page(PdfPage *page, int page_num, PyObject *ans) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
convert_w_array(const PdfArray &w) {
|
||||||
|
pyunique_ptr ans(PyList_New(0));
|
||||||
|
if (!ans) return NULL;
|
||||||
|
for (PdfArray::const_iterator it = w.begin(); it != w.end(); it++) {
|
||||||
|
pyunique_ptr item;
|
||||||
|
if ((*it).IsArray()) {
|
||||||
|
item.reset(convert_w_array((*it).GetArray()));
|
||||||
|
} else if ((*it).IsNumber()) {
|
||||||
|
item.reset(PyLong_FromLongLong((long long)(*it).GetNumber()));
|
||||||
|
} else if ((*it).IsReal()) {
|
||||||
|
item.reset(PyFloat_FromDouble((*it).GetReal()));
|
||||||
|
} else PyErr_SetString(PyExc_ValueError, "Unknown datatype in w array");
|
||||||
|
if (!item) return NULL;
|
||||||
|
if (PyList_Append(ans.get(), item.get()) != 0) return NULL;
|
||||||
|
}
|
||||||
|
return ans.release();
|
||||||
|
}
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
PyObject*
|
PyObject*
|
||||||
list_fonts(PDFDoc *self, PyObject *args) {
|
list_fonts(PDFDoc *self, PyObject *args) {
|
||||||
@ -90,10 +109,19 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
|||||||
const PdfReference &ref = (*it)->Reference();
|
const PdfReference &ref = (*it)->Reference();
|
||||||
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
||||||
const PdfObject *descriptor = (*it)->GetIndirectKey("FontDescriptor");
|
const PdfObject *descriptor = (*it)->GetIndirectKey("FontDescriptor");
|
||||||
pyunique_ptr descendant_font, stream_ref, encoding;
|
pyunique_ptr descendant_font, stream_ref, encoding, w, w2;
|
||||||
PyBytesOutputStream stream_data;
|
PyBytesOutputStream stream_data;
|
||||||
|
if (dict.HasKey("W")) {
|
||||||
|
w.reset(convert_w_array(dict.GetKey("W")->GetArray()));
|
||||||
|
if (!w) return NULL;
|
||||||
|
}
|
||||||
|
if (dict.HasKey("W2")) {
|
||||||
|
w2.reset(convert_w_array(dict.GetKey("W2")->GetArray()));
|
||||||
|
if (!w2) return NULL;
|
||||||
|
}
|
||||||
if (dict.HasKey("Encoding") && dict.GetKey("Encoding")->IsName()) {
|
if (dict.HasKey("Encoding") && dict.GetKey("Encoding")->IsName()) {
|
||||||
encoding.reset(PyUnicode_FromString(dict.GetKey("Encoding")->GetName().GetName().c_str()));
|
encoding.reset(PyUnicode_FromString(dict.GetKey("Encoding")->GetName().GetName().c_str()));
|
||||||
|
if (!encoding) return NULL;
|
||||||
}
|
}
|
||||||
if (descriptor) {
|
if (descriptor) {
|
||||||
const PdfObject *ff = get_font_file(descriptor);
|
const PdfObject *ff = get_font_file(descriptor);
|
||||||
@ -102,7 +130,7 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
|||||||
if (!stream_ref) return NULL;
|
if (!stream_ref) return NULL;
|
||||||
const PdfStream *stream = ff->GetStream();
|
const PdfStream *stream = ff->GetStream();
|
||||||
if (stream && get_font_data) {
|
if (stream && get_font_data) {
|
||||||
stream->GetCopy(&stream_data);
|
stream->GetFilteredCopy(&stream_data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (dict.HasKey("DescendantFonts")) {
|
} else if (dict.HasKey("DescendantFonts")) {
|
||||||
@ -112,14 +140,15 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
|||||||
}
|
}
|
||||||
#define V(x) (x ? x.get() : Py_None)
|
#define V(x) (x ? x.get() : Py_None)
|
||||||
pyunique_ptr d(Py_BuildValue(
|
pyunique_ptr d(Py_BuildValue(
|
||||||
"{ss ss s(kk) sO sO sO sO}",
|
"{ss ss s(kk) sO sO sO sO sO sO}",
|
||||||
"BaseFont", name.c_str(),
|
"BaseFont", name.c_str(),
|
||||||
"Subtype", subtype.c_str(),
|
"Subtype", subtype.c_str(),
|
||||||
"Reference", num, generation,
|
"Reference", num, generation,
|
||||||
"Data", V(stream_data),
|
"Data", V(stream_data),
|
||||||
"DescendantFont", V(descendant_font),
|
"DescendantFont", V(descendant_font),
|
||||||
"StreamRef", V(stream_ref),
|
"StreamRef", V(stream_ref),
|
||||||
"Encoding", V(encoding)
|
"Encoding", V(encoding),
|
||||||
|
"W", V(w), "W2", V(w2)
|
||||||
));
|
));
|
||||||
#undef V
|
#undef V
|
||||||
if (!d) { return NULL; }
|
if (!d) { return NULL; }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user