mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add support for removing unused Type3 fonts as well
This commit is contained in:
parent
470193f222
commit
7f3bd476d3
@ -10,7 +10,7 @@ from calibre.constants import plugins, preferred_encoding
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.ipc.simple_worker import WorkerError, fork_job
|
||||
from polyglot.builtins import unicode_type, iteritems
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def get_podofo():
|
||||
@ -127,20 +127,11 @@ def get_image_count(path):
|
||||
def list_fonts(pdf_doc):
|
||||
fonts = pdf_doc.list_fonts()
|
||||
ref_map = {f['Reference']: f for f in fonts}
|
||||
for ref in pdf_doc.used_fonts_in_page_range():
|
||||
ref_map[ref]['used'] = True
|
||||
for font in fonts:
|
||||
font['used'] = font.get('used', False)
|
||||
if font['DescendantFont'] and font['used']:
|
||||
ref_map[font['DescendantFont']]['used'] = True
|
||||
return ref_map
|
||||
|
||||
|
||||
def remove_unused_fonts(pdf_doc):
|
||||
font_ref_map = list_fonts(pdf_doc)
|
||||
unused = tuple(ref for ref, font in iteritems(font_ref_map) if not font['used'])
|
||||
pdf_doc.remove_fonts(unused)
|
||||
return len(tuple(f for f in unused if font_ref_map[f]['StreamRef']))
|
||||
return pdf_doc.remove_unused_fonts()
|
||||
|
||||
|
||||
def test_remove_unused_fonts(src):
|
||||
|
@ -747,12 +747,12 @@ static PyMethodDef PDFDoc_methods[] = {
|
||||
{"list_fonts", (PyCFunction)list_fonts, METH_VARARGS,
|
||||
"list_fonts() -> Get list of fonts in document"
|
||||
},
|
||||
{"used_fonts_in_page_range", (PyCFunction)used_fonts_in_page_range, METH_VARARGS,
|
||||
"used_fonts_in_page_range() -> Get list of references to fonts used in the specified pages"
|
||||
},
|
||||
{"remove_fonts", (PyCFunction)remove_fonts, METH_VARARGS,
|
||||
"remove_fonts() -> Remove the specified font objects."
|
||||
},
|
||||
{"remove_unused_fonts", (PyCFunction)remove_unused_fonts, METH_NOARGS,
|
||||
"remove_unused_fonts() -> Remove unused font objects."
|
||||
},
|
||||
{"merge_fonts", (PyCFunction)merge_fonts, METH_VARARGS,
|
||||
"merge_fonts() -> Merge the specified fonts."
|
||||
},
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "global.h"
|
||||
#include <iostream>
|
||||
#include <stack>
|
||||
#include <unordered_map>
|
||||
|
||||
using namespace pdf;
|
||||
|
||||
@ -72,7 +71,7 @@ replace_font_references(PDFDoc *self, std::unordered_map<uint64_t, uint64_t> &re
|
||||
}
|
||||
|
||||
static bool
|
||||
used_fonts_in_page(PdfPage *page, int page_num, PyObject *ans) {
|
||||
used_fonts_in_page(PdfPage *page, unordered_reference_set &ans) {
|
||||
PdfContentsTokenizer tokenizer(page);
|
||||
bool in_text_block = false;
|
||||
const char* token = NULL;
|
||||
@ -96,11 +95,7 @@ used_fonts_in_page(PdfPage *page, int page_num, PyObject *ans) {
|
||||
if (stack.size() > 0 && stack.top().IsName()) {
|
||||
const PdfName &reference_name = stack.top().GetName();
|
||||
PdfObject* font = page->GetFromResources("Font", reference_name);
|
||||
if (font) {
|
||||
pyunique_ptr r(ref_as_tuple(font->Reference()));
|
||||
if (!r) return false;
|
||||
if (PySet_Add(ans, r.get()) != 0) return false;
|
||||
}
|
||||
if (font) ans.insert(font->Reference());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -235,21 +230,6 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
||||
return ans.release();
|
||||
}
|
||||
|
||||
PyObject*
|
||||
used_fonts_in_page_range(PDFDoc *self, PyObject *args) {
|
||||
int first = 1, last = self->doc->GetPageCount();
|
||||
if (!PyArg_ParseTuple(args, "|ii", &first, &last)) return NULL;
|
||||
pyunique_ptr ans(PySet_New(NULL));
|
||||
if (!ans) return NULL;
|
||||
for (int i = first - 1; i < last; i++) {
|
||||
try {
|
||||
PdfPage *page = self->doc->GetPage(i);
|
||||
if (!used_fonts_in_page(page, i, ans.get())) return NULL;
|
||||
} catch (const PdfError &err) { continue; }
|
||||
}
|
||||
return ans.release();
|
||||
}
|
||||
|
||||
PyObject*
|
||||
remove_fonts(PDFDoc *self, PyObject *args) {
|
||||
PyObject *fonts;
|
||||
@ -267,6 +247,63 @@ remove_fonts(PDFDoc *self, PyObject *args) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
typedef std::unordered_map<PdfReference, unsigned long, PdfReferenceHasher> charprocs_usage_map;
|
||||
|
||||
PyObject*
|
||||
remove_unused_fonts(PDFDoc *self, PyObject *args) {
|
||||
unordered_reference_set used_fonts;
|
||||
for (int i = 0; i < self->doc->GetPageCount(); i++) {
|
||||
PdfPage *page = self->doc->GetPage(i);
|
||||
if (page) used_fonts_in_page(page, used_fonts);
|
||||
}
|
||||
unordered_reference_set all_fonts;
|
||||
unordered_reference_set type3_fonts;
|
||||
charprocs_usage_map charprocs_usage;
|
||||
PdfVecObjects &objects = self->doc->GetObjects();
|
||||
for (TCIVecObjects it = objects.begin(); it != objects.end(); it++) {
|
||||
if ((*it)->IsDictionary()) {
|
||||
const PdfDictionary &dict = (*it)->GetDictionary();
|
||||
if (dictionary_has_key_name(dict, PdfName::KeyType, "Font")) {
|
||||
const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetName();
|
||||
if (font_type == "Type0") {
|
||||
all_fonts.insert((*it)->Reference());
|
||||
} else if (font_type == "Type3") {
|
||||
all_fonts.insert((*it)->Reference());
|
||||
type3_fonts.insert((*it)->Reference());
|
||||
for (auto &x : dict.GetKey("CharProcs")->GetDictionary().GetKeys()) {
|
||||
const PdfReference &ref = x.second->GetReference();
|
||||
if (charprocs_usage.find(ref) == charprocs_usage.end()) charprocs_usage[ref] = 1;
|
||||
else charprocs_usage[ref] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long count = 0;
|
||||
for (auto &ref : all_fonts) {
|
||||
if (used_fonts.find(ref) == used_fonts.end()) {
|
||||
PdfObject *font = objects.GetObject(ref);
|
||||
if (font) {
|
||||
count++;
|
||||
if (type3_fonts.find(ref) != type3_fonts.end()) {
|
||||
for (auto &x : font->GetIndirectKey("CharProcs")->GetDictionary().GetKeys()) {
|
||||
charprocs_usage[x.second->GetReference()] -= 1;
|
||||
}
|
||||
}
|
||||
remove_font(objects, font);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &x : charprocs_usage) {
|
||||
if (x.second == 0u) {
|
||||
delete objects.RemoveObject(x.first);
|
||||
}
|
||||
}
|
||||
return Py_BuildValue("k", count);
|
||||
}
|
||||
|
||||
PyObject*
|
||||
merge_fonts(PDFDoc *self, PyObject *args) {
|
||||
PyObject *items, *replacements;
|
||||
|
@ -12,6 +12,8 @@
|
||||
|
||||
#define USING_SHARED_PODOFO
|
||||
#include <podofo.h>
|
||||
#include <unordered_set>
|
||||
#include <unordered_map>
|
||||
using namespace PoDoFo;
|
||||
|
||||
namespace pdf {
|
||||
@ -84,10 +86,19 @@ dictionary_has_key_name(const PdfDictionary &d, T key, const char *name) {
|
||||
return false;
|
||||
}
|
||||
|
||||
class PdfReferenceHasher {
|
||||
public:
|
||||
size_t operator()(const PdfReference & obj) const {
|
||||
return std::hash<pdf_objnum>()(obj.ObjectNumber());
|
||||
}
|
||||
};
|
||||
typedef std::unordered_set<PdfReference, PdfReferenceHasher> unordered_reference_set;
|
||||
|
||||
|
||||
extern "C" {
|
||||
PyObject* list_fonts(PDFDoc*, PyObject*);
|
||||
PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args);
|
||||
PyObject* remove_fonts(PDFDoc *self, PyObject *args);
|
||||
PyObject* remove_unused_fonts(PDFDoc *self, PyObject *args);
|
||||
PyObject* merge_fonts(PDFDoc *self, PyObject *args);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user