mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
When listing fonts check if they are used
This commit is contained in:
parent
dfc09ede98
commit
1e9517f40f
@ -142,6 +142,18 @@ def get_image_count(path):
|
|||||||
return p.image_count()
|
return p.image_count()
|
||||||
|
|
||||||
|
|
||||||
|
def list_fonts(pdf_doc):
|
||||||
|
fonts = pdf_doc.list_fonts()
|
||||||
|
ref_map = {f['Reference']: f for f in fonts}
|
||||||
|
for ref in pdf_doc.used_fonts_in_page_range():
|
||||||
|
ref_map[ref]['used'] = True
|
||||||
|
for font in fonts:
|
||||||
|
font['used'] = font.get('used', False)
|
||||||
|
if font['DescendantFont'] and font['used']:
|
||||||
|
ref_map[font['DescendantFont']]['used'] = True
|
||||||
|
return ref_map
|
||||||
|
|
||||||
|
|
||||||
def test_list_fonts(src):
|
def test_list_fonts(src):
|
||||||
podofo = get_podofo()
|
podofo = get_podofo()
|
||||||
p = podofo.PDFDoc()
|
p = podofo.PDFDoc()
|
||||||
@ -149,7 +161,7 @@ def test_list_fonts(src):
|
|||||||
raw = f.read()
|
raw = f.read()
|
||||||
p.load(raw)
|
p.load(raw)
|
||||||
import pprint
|
import pprint
|
||||||
pprint.pprint(p.list_fonts())
|
pprint.pprint(list_fonts(p))
|
||||||
|
|
||||||
|
|
||||||
def test_save_to(src, dest):
|
def test_save_to(src, dest):
|
||||||
|
@ -718,6 +718,9 @@ static PyMethodDef PDFDoc_methods[] = {
|
|||||||
{"list_fonts", (PyCFunction)list_fonts, METH_VARARGS,
|
{"list_fonts", (PyCFunction)list_fonts, METH_VARARGS,
|
||||||
"list_fonts() -> Get list of fonts in document"
|
"list_fonts() -> Get list of fonts in document"
|
||||||
},
|
},
|
||||||
|
{"used_fonts_in_page_range", (PyCFunction)used_fonts_in_page_range, METH_VARARGS,
|
||||||
|
"used_fonts_in_page_range() -> Get list of references to fonts used in the specified pages"
|
||||||
|
},
|
||||||
{"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS,
|
{"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS,
|
||||||
"delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)."
|
"delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)."
|
||||||
},
|
},
|
||||||
|
@ -11,9 +11,16 @@
|
|||||||
|
|
||||||
using namespace pdf;
|
using namespace pdf;
|
||||||
|
|
||||||
|
static inline PyObject*
|
||||||
|
ref_as_tuple(const PdfReference &ref) {
|
||||||
|
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
||||||
|
return Py_BuildValue("kk", num, generation);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
used_fonts_in_page(const PdfPage *page, PyObject *ans) {
|
used_fonts_in_page(PdfPage *page, PyObject *ans) {
|
||||||
PdfContentsTokenizer tokenizer((PdfCanvas*)page);
|
PdfContentsTokenizer tokenizer(page);
|
||||||
bool in_text_block = false;
|
bool in_text_block = false;
|
||||||
const char* token = NULL;
|
const char* token = NULL;
|
||||||
EPdfContentsType contents_type;
|
EPdfContentsType contents_type;
|
||||||
@ -35,11 +42,9 @@ used_fonts_in_page(const PdfPage *page, PyObject *ans) {
|
|||||||
stack.pop();
|
stack.pop();
|
||||||
if (stack.size() > 0 && stack.top().IsName()) {
|
if (stack.size() > 0 && stack.top().IsName()) {
|
||||||
const PdfName &reference_name = stack.top().GetName();
|
const PdfName &reference_name = stack.top().GetName();
|
||||||
PdfObject* font = pPage->GetFromResources("Font", reference_name);
|
PdfObject* font = page->GetFromResources("Font", reference_name);
|
||||||
if (font) {
|
if (font) {
|
||||||
const PdfReference &ref = font->Reference();
|
pyunique_ptr r(ref_as_tuple(font->Reference()));
|
||||||
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
|
||||||
pyunique_ptr r(Py_BuildValue("kk", num, generation));
|
|
||||||
if (!r) return false;
|
if (!r) return false;
|
||||||
if (PySet_Add(ans, r.get()) != 0) return false;
|
if (PySet_Add(ans, r.get()) != 0) return false;
|
||||||
}
|
}
|
||||||
@ -66,19 +71,33 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
|||||||
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
||||||
const PdfObject *descriptor = (*it)->GetIndirectKey("FontDescriptor");
|
const PdfObject *descriptor = (*it)->GetIndirectKey("FontDescriptor");
|
||||||
long long stream_len = 0;
|
long long stream_len = 0;
|
||||||
|
pyunique_ptr descendant_font, stream_ref;
|
||||||
if (descriptor) {
|
if (descriptor) {
|
||||||
const PdfObject *ff = descriptor->GetIndirectKey("FontFile");
|
const PdfObject *ff = descriptor->GetIndirectKey("FontFile");
|
||||||
if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
|
if (!ff) ff = descriptor->GetIndirectKey("FontFile2");
|
||||||
if (!ff) ff = descriptor->GetIndirectKey("FontFile3");
|
if (!ff) ff = descriptor->GetIndirectKey("FontFile3");
|
||||||
const PdfStream *stream = ff->GetStream();
|
if (ff) {
|
||||||
if (stream) stream_len = stream->GetLength();
|
stream_ref.reset(ref_as_tuple(ff->Reference()));
|
||||||
|
if (!stream_ref) return NULL;
|
||||||
|
const PdfStream *stream = ff->GetStream();
|
||||||
|
if (stream) stream_len = stream->GetLength();
|
||||||
|
}
|
||||||
|
} else if (dict.HasKey("DescendantFonts")) {
|
||||||
|
const PdfArray &df = dict.GetKey("DescendantFonts")->GetArray();
|
||||||
|
descendant_font.reset(ref_as_tuple(df[0].GetReference()));
|
||||||
|
if (!descendant_font) return NULL;
|
||||||
}
|
}
|
||||||
|
#define V(x) (x ? x.get() : Py_None)
|
||||||
pyunique_ptr d(Py_BuildValue(
|
pyunique_ptr d(Py_BuildValue(
|
||||||
"{sssss(kk)sL}",
|
"{ss ss s(kk) sL sO sO}",
|
||||||
"BaseFont", name.c_str(),
|
"BaseFont", name.c_str(),
|
||||||
"Subtype", subtype.c_str(),
|
"Subtype", subtype.c_str(),
|
||||||
"Reference", num, generation,
|
"Reference", num, generation,
|
||||||
"Length", stream_len));
|
"Length", stream_len,
|
||||||
|
"DescendantFont", V(descendant_font),
|
||||||
|
"StreamRef", V(stream_ref)
|
||||||
|
));
|
||||||
|
#undef V
|
||||||
if (!d) { return NULL; }
|
if (!d) { return NULL; }
|
||||||
if (PyList_Append(ans.get(), d.get()) != 0) return NULL;
|
if (PyList_Append(ans.get(), d.get()) != 0) return NULL;
|
||||||
}
|
}
|
||||||
@ -99,7 +118,7 @@ used_fonts_in_page_range(PDFDoc *self, PyObject *args) {
|
|||||||
if (!ans) return NULL;
|
if (!ans) return NULL;
|
||||||
for (int i = first - 1; i < last; i++) {
|
for (int i = first - 1; i < last; i++) {
|
||||||
try {
|
try {
|
||||||
const PdfPage *page = self->doc->GetPage(i);
|
PdfPage *page = self->doc->GetPage(i);
|
||||||
if (!used_fonts_in_page(page, ans.get())) return NULL;
|
if (!used_fonts_in_page(page, ans.get())) return NULL;
|
||||||
} catch (const PdfError &err) { continue; }
|
} catch (const PdfError &err) { continue; }
|
||||||
}
|
}
|
||||||
|
@ -60,5 +60,6 @@ dictionary_has_key_name(const PdfDictionary &d, T key, const char *name) {
|
|||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
PyObject* list_fonts(PDFDoc*, PyObject*);
|
PyObject* list_fonts(PDFDoc*, PyObject*);
|
||||||
|
PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user