diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index 24a2259956..a53d7123f8 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -142,6 +142,18 @@ def get_image_count(path): return p.image_count() +def list_fonts(pdf_doc): + fonts = pdf_doc.list_fonts() + ref_map = {f['Reference']: f for f in fonts} + for ref in pdf_doc.used_fonts_in_page_range(): + ref_map[ref]['used'] = True + for font in fonts: + font['used'] = font.get('used', False) + if font['DescendantFont'] and font['used']: + ref_map[font['DescendantFont']]['used'] = True + return ref_map + + def test_list_fonts(src): podofo = get_podofo() p = podofo.PDFDoc() @@ -149,7 +161,7 @@ def test_list_fonts(src): raw = f.read() p.load(raw) import pprint - pprint.pprint(p.list_fonts()) + pprint.pprint(list_fonts(p)) def test_save_to(src, dest): diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index cbeb4f18ba..3cd4dc9fc9 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -718,6 +718,9 @@ static PyMethodDef PDFDoc_methods[] = { {"list_fonts", (PyCFunction)list_fonts, METH_VARARGS, "list_fonts() -> Get list of fonts in document" }, + {"used_fonts_in_page_range", (PyCFunction)used_fonts_in_page_range, METH_VARARGS, + "used_fonts_in_page_range() -> Get list of references to fonts used in the specified pages" + }, {"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS, "delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)." }, diff --git a/src/calibre/utils/podofo/fonts.cpp b/src/calibre/utils/podofo/fonts.cpp index e07172e1c9..0217d993d0 100644 --- a/src/calibre/utils/podofo/fonts.cpp +++ b/src/calibre/utils/podofo/fonts.cpp @@ -11,9 +11,16 @@ using namespace pdf; +static inline PyObject* +ref_as_tuple(const PdfReference &ref) { + unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber(); + return Py_BuildValue("kk", num, generation); +} + + static bool -used_fonts_in_page(const PdfPage *page, PyObject *ans) { - PdfContentsTokenizer tokenizer((PdfCanvas*)page); +used_fonts_in_page(PdfPage *page, PyObject *ans) { + PdfContentsTokenizer tokenizer(page); bool in_text_block = false; const char* token = NULL; EPdfContentsType contents_type; @@ -35,11 +42,9 @@ used_fonts_in_page(const PdfPage *page, PyObject *ans) { stack.pop(); if (stack.size() > 0 && stack.top().IsName()) { const PdfName &reference_name = stack.top().GetName(); - PdfObject* font = pPage->GetFromResources("Font", reference_name); + PdfObject* font = page->GetFromResources("Font", reference_name); if (font) { - const PdfReference &ref = font->Reference(); - unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber(); - pyunique_ptr r(Py_BuildValue("kk", num, generation)); + pyunique_ptr r(ref_as_tuple(font->Reference())); if (!r) return false; if (PySet_Add(ans, r.get()) != 0) return false; } @@ -66,19 +71,33 @@ list_fonts(PDFDoc *self, PyObject *args) { unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber(); const PdfObject *descriptor = (*it)->GetIndirectKey("FontDescriptor"); long long stream_len = 0; + pyunique_ptr descendant_font, stream_ref; if (descriptor) { const PdfObject *ff = descriptor->GetIndirectKey("FontFile"); if (!ff) ff = descriptor->GetIndirectKey("FontFile2"); if (!ff) ff = descriptor->GetIndirectKey("FontFile3"); - const PdfStream *stream = ff->GetStream(); - if (stream) stream_len = stream->GetLength(); + if (ff) { + stream_ref.reset(ref_as_tuple(ff->Reference())); + if (!stream_ref) return NULL; + const PdfStream *stream = ff->GetStream(); + if (stream) stream_len = stream->GetLength(); + } + } else if (dict.HasKey("DescendantFonts")) { + const PdfArray &df = dict.GetKey("DescendantFonts")->GetArray(); + descendant_font.reset(ref_as_tuple(df[0].GetReference())); + if (!descendant_font) return NULL; } +#define V(x) (x ? x.get() : Py_None) pyunique_ptr d(Py_BuildValue( - "{sssss(kk)sL}", + "{ss ss s(kk) sL sO sO}", "BaseFont", name.c_str(), "Subtype", subtype.c_str(), "Reference", num, generation, - "Length", stream_len)); + "Length", stream_len, + "DescendantFont", V(descendant_font), + "StreamRef", V(stream_ref) + )); +#undef V if (!d) { return NULL; } if (PyList_Append(ans.get(), d.get()) != 0) return NULL; } @@ -99,7 +118,7 @@ used_fonts_in_page_range(PDFDoc *self, PyObject *args) { if (!ans) return NULL; for (int i = first - 1; i < last; i++) { try { - const PdfPage *page = self->doc->GetPage(i); + PdfPage *page = self->doc->GetPage(i); if (!used_fonts_in_page(page, ans.get())) return NULL; } catch (const PdfError &err) { continue; } } diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h index c441f0dbcf..3a36fea3ca 100644 --- a/src/calibre/utils/podofo/global.h +++ b/src/calibre/utils/podofo/global.h @@ -60,5 +60,6 @@ dictionary_has_key_name(const PdfDictionary &d, T key, const char *name) { extern "C" { PyObject* list_fonts(PDFDoc*, PyObject*); +PyObject* used_fonts_in_page_range(PDFDoc *self, PyObject *args); } }