From bf152707b305359f69638f2c572cbba981077cff Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 Mar 2015 09:36:46 +0530 Subject: [PATCH] Add API to count images in a PDF file --- src/calibre/utils/podofo/__init__.py | 8 ++++++++ src/calibre/utils/podofo/doc.cpp | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index ee7a02a4be..4aa708ae66 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -119,6 +119,14 @@ def get_xmp_metadata(path): p.load(raw) return p.get_xmp_metadata() +def get_image_count(path): + podofo = get_podofo() + p = podofo.PDFDoc() + with open(path, 'rb') as f: + raw = f.read() + p.load(raw) + return p.image_count() + def test_outline(src): podofo = get_podofo() p = podofo.PDFDoc() diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index edecf44ba8..b33647336c 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -141,6 +141,31 @@ PDFDoc_page_count(PDFDoc *self, PyObject *args) { return Py_BuildValue("i", count); } // }}} +// image_count() {{{ +static PyObject * +PDFDoc_image_count(PDFDoc *self, PyObject *args) { + int count = 0; + const PdfObject* obj_type = NULL; + const PdfObject* obj_sub_type = NULL; + try { + TCIVecObjects it = self->doc->GetObjects().begin(); + while( it != self->doc->GetObjects().end() ) { + if( (*it)->IsDictionary() ) { + obj_type = (*it)->GetDictionary().GetKey( PdfName::KeyType ); + obj_sub_type = (*it)->GetDictionary().GetKey( PdfName::KeySubtype ); + if( ( obj_type && obj_type->IsName() && ( obj_type->GetName().GetName() == "XObject" ) ) || + ( obj_sub_type && obj_sub_type->IsName() && ( obj_sub_type->GetName().GetName() == "Image" ) ) ) count++; + self->doc->FreeObjectMemory( *it ); + } + it++; + } + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + return Py_BuildValue("i", count); +} // }}} + // delete_page {{{ static PyObject * PDFDoc_delete_page(PDFDoc *self, PyObject *args) { @@ -539,6 +564,9 @@ static PyMethodDef PDFDoc_methods[] = { {"page_count", (PyCFunction)PDFDoc_page_count, METH_VARARGS, "page_count() -> Number of pages in the PDF." }, + {"image_count", (PyCFunction)PDFDoc_image_count, METH_VARARGS, + "image_count() -> Number of images in the PDF." + }, {"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS, "delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)." },