mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Deduplicate Type3 glyph definitions
This commit is contained in:
parent
32d483b3ce
commit
66b7037cd2
@ -36,7 +36,7 @@ from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
|
||||
from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre.utils.podofo import (
|
||||
get_podofo, remove_unused_fonts, set_metadata_implementation
|
||||
dedup_type3_fonts, get_podofo, remove_unused_fonts, set_metadata_implementation
|
||||
)
|
||||
from calibre.utils.short_uuid import uuid4
|
||||
from polyglot.builtins import as_bytes, filter, iteritems, map, range, unicode_type
|
||||
@ -838,6 +838,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
|
||||
report_progress(0.75, _('Added links to PDF content'))
|
||||
|
||||
merge_fonts(pdf_doc)
|
||||
num_removed = dedup_type3_fonts(pdf_doc)
|
||||
if num_removed:
|
||||
log('Removed', num_removed, 'unused Type3 glyphs')
|
||||
|
||||
# TODO: Support for mathematics
|
||||
|
||||
|
@ -144,6 +144,20 @@ def test_remove_unused_fonts(src):
|
||||
print('Modified pdf saved to:', dest)
|
||||
|
||||
|
||||
def dedup_type3_fonts(pdf_doc):
|
||||
return pdf_doc.dedup_type3_fonts()
|
||||
|
||||
|
||||
def test_dedup_type3_fonts(src):
|
||||
podofo = get_podofo()
|
||||
p = podofo.PDFDoc()
|
||||
p.open(src)
|
||||
num = dedup_type3_fonts(p)
|
||||
dest = src.rpartition('.')[0] + '-removed.pdf'
|
||||
p.save(dest)
|
||||
print('Modified pdf with {} glyphs removed saved to:'.format(num), dest)
|
||||
|
||||
|
||||
def test_list_fonts(src):
|
||||
podofo = get_podofo()
|
||||
p = podofo.PDFDoc()
|
||||
|
@ -753,6 +753,9 @@ static PyMethodDef PDFDoc_methods[] = {
|
||||
{"merge_fonts", (PyCFunction)py_merge_fonts, METH_VARARGS,
|
||||
"merge_fonts() -> Merge the specified fonts."
|
||||
},
|
||||
{"dedup_type3_fonts", (PyCFunction)py_dedup_type3_fonts, METH_VARARGS,
|
||||
"dedup_type3_fonts() -> De-duplicate repeated glyphs in Type3 fonts"
|
||||
},
|
||||
{"delete_pages", (PyCFunction)PDFDoc_delete_pages, METH_VARARGS,
|
||||
"delete_page(page_num, count=1) -> Delete the specified pages from the pdf."
|
||||
},
|
||||
|
@ -356,6 +356,103 @@ merge_fonts(PDFDoc *self, PyObject *args) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
class CharProc {
|
||||
char *buf; pdf_long sz;
|
||||
PdfReference ref;
|
||||
std::size_t precomputed_hash;
|
||||
CharProc( const CharProc & ) ;
|
||||
CharProc & operator=( const CharProc & ) ;
|
||||
|
||||
public:
|
||||
CharProc(const PdfReference &reference, const PdfObject *o) : buf(NULL), sz(0), ref(reference), precomputed_hash(0) {
|
||||
const PdfStream *stream = o->GetStream();
|
||||
stream->GetFilteredCopy(&buf, &sz);
|
||||
precomputed_hash = std::hash<pdf_long>()(sz);
|
||||
}
|
||||
CharProc(CharProc &&other) noexcept :
|
||||
buf(other.buf), sz(other.sz), ref(other.ref), precomputed_hash(other.precomputed_hash) {
|
||||
other.buf = NULL;
|
||||
}
|
||||
CharProc& operator=(CharProc &&other) noexcept {
|
||||
if (buf) podofo_free(buf);
|
||||
buf = other.buf; other.buf = NULL; sz = other.sz; ref = other.ref; precomputed_hash = other.precomputed_hash;
|
||||
return *this;
|
||||
}
|
||||
~CharProc() noexcept { if (buf) podofo_free(buf); buf = NULL; }
|
||||
bool operator==(const CharProc &other) const noexcept {
|
||||
return other.sz == sz && memcmp(buf, other.buf, sz) == 0;
|
||||
}
|
||||
std::size_t hash() const noexcept { return precomputed_hash; }
|
||||
const PdfReference& reference() const noexcept { return ref; }
|
||||
};
|
||||
|
||||
struct CharProcHasher {
|
||||
std::size_t operator()(const CharProc& k) const { return k.hash(); }
|
||||
};
|
||||
|
||||
typedef std::unordered_map<CharProc, std::vector<PdfReference>, CharProcHasher> char_proc_reference_map;
|
||||
|
||||
static PyObject*
|
||||
dedup_type3_fonts(PDFDoc *self, PyObject *args) {
|
||||
unsigned long count = 0;
|
||||
unordered_reference_set all_char_procs;
|
||||
unordered_reference_set all_type3_fonts;
|
||||
char_proc_reference_map cp_map;
|
||||
|
||||
PdfVecObjects &objects = self->doc->GetObjects();
|
||||
for (auto &k : objects) {
|
||||
const PdfDictionary &dict = k->GetDictionary();
|
||||
if (dictionary_has_key_name(dict, PdfName::KeyType, "Font")) {
|
||||
const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetName();
|
||||
if (font_type == "Type3") {
|
||||
all_type3_fonts.insert(k->Reference());
|
||||
for (auto &x : dict.GetKey("CharProcs")->GetDictionary().GetKeys()) {
|
||||
const PdfReference &ref = x.second->GetReference();
|
||||
const PdfObject *cpobj = objects.GetObject(ref);
|
||||
if (!cpobj || !cpobj->HasStream()) continue;
|
||||
CharProc cp(ref, cpobj);
|
||||
auto it = cp_map.find(cp);
|
||||
if (it == cp_map.end()) {
|
||||
std::vector<PdfReference> vals;
|
||||
cp_map.insert(std::make_pair(std::move(cp), std::move(vals)));
|
||||
} else (*it).second.push_back(ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
std::unordered_map<PdfReference, PdfReference, PdfReferenceHasher> ref_map;
|
||||
for (auto &x : cp_map) {
|
||||
if (x.second.size() > 0) {
|
||||
const PdfReference &canonical_ref = x.first.reference();
|
||||
for (auto &ref : x.second) {
|
||||
if (ref != canonical_ref) {
|
||||
ref_map[ref] = x.first.reference();
|
||||
delete objects.RemoveObject(ref);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (count > 0) {
|
||||
for (auto &ref : all_type3_fonts) {
|
||||
PdfObject *font = objects.GetObject(ref);
|
||||
PdfDictionary dict = font->GetIndirectKey("CharProcs")->GetDictionary();
|
||||
PdfDictionary new_dict = PdfDictionary(dict);
|
||||
bool changed = false;
|
||||
for (auto &k : dict.GetKeys()) {
|
||||
auto it = ref_map.find(k.second->GetReference());
|
||||
if (it != ref_map.end()) {
|
||||
new_dict.AddKey(k.first, (*it).second);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
if (changed) font->GetDictionary().AddKey("CharProcs", new_dict);
|
||||
}
|
||||
}
|
||||
return Py_BuildValue("k", count);
|
||||
}
|
||||
|
||||
PYWRAP(list_fonts)
|
||||
PYWRAP(merge_fonts)
|
||||
PYWRAP(remove_unused_fonts)
|
||||
PYWRAP(dedup_type3_fonts)
|
||||
|
@ -99,5 +99,6 @@ extern "C" {
|
||||
PyObject* py_list_fonts(PDFDoc*, PyObject*);
|
||||
PyObject* py_remove_unused_fonts(PDFDoc *self, PyObject *args);
|
||||
PyObject* py_merge_fonts(PDFDoc *self, PyObject *args);
|
||||
PyObject* py_dedup_type3_fonts(PDFDoc *self, PyObject *args);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user