From c4e3fda6823c93ca1536168901334ae85ea60087 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 14 Jul 2019 13:55:30 +0530 Subject: [PATCH] Code to list fonts in a PDF file --- setup/extensions.json | 2 +- src/calibre/utils/podofo/__init__.py | 13 ++----- src/calibre/utils/podofo/doc.cpp | 12 ++---- src/calibre/utils/podofo/fonts.cpp | 55 ++++++++++++++++++++++++++++ src/calibre/utils/podofo/global.h | 20 ++++++++++ 5 files changed, 82 insertions(+), 20 deletions(-) create mode 100644 src/calibre/utils/podofo/fonts.cpp diff --git a/setup/extensions.json b/setup/extensions.json index 146963f752..56bfadf50b 100644 --- a/setup/extensions.json +++ b/setup/extensions.json @@ -116,7 +116,7 @@ }, { "name": "podofo", - "sources": "calibre/utils/podofo/utils.cpp calibre/utils/podofo/output.cpp calibre/utils/podofo/doc.cpp calibre/utils/podofo/outline.cpp calibre/utils/podofo/podofo.cpp", + "sources": "calibre/utils/podofo/utils.cpp calibre/utils/podofo/output.cpp calibre/utils/podofo/doc.cpp calibre/utils/podofo/outline.cpp calibre/utils/podofo/fonts.cpp calibre/utils/podofo/podofo.cpp", "headers": "calibre/utils/podofo/global.h", "libraries": "podofo", "lib_dirs": "!podofo_lib", diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index 33d53180c9..24a2259956 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -142,21 +142,14 @@ def get_image_count(path): return p.image_count() -def test_outline(src): +def test_list_fonts(src): podofo = get_podofo() p = podofo.PDFDoc() with open(src, 'rb') as f: raw = f.read() p.load(raw) - total = p.page_count() - root = p.create_outline('Table of Contents') - for i in range(0, total): - root.create('Page %d'%i, i, True) - raw = p.write() - out = '/tmp/outlined.pdf' - with open(out, 'wb') as f: - f.write(raw) - print('Outlined PDF:', out) + import pprint + pprint.pprint(p.list_fonts()) def test_save_to(src, dest): diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 37939a3f52..cbeb4f18ba 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -429,15 +429,6 @@ PDFDoc_extract_anchors(PDFDoc *self, PyObject *args) { // alter_links() {{{ -template -static inline bool -dictionary_has_key_name(PdfDictionary &d, T key, const char *name) { - const PdfObject *val = d.GetKey(key); - if (val && val->IsName() && val->GetName().GetName() == name) return true; - return false; -} - - static PyObject * PDFDoc_alter_links(PDFDoc *self, PyObject *args) { int count = 0; @@ -724,6 +715,9 @@ static PyMethodDef PDFDoc_methods[] = { {"alter_links", (PyCFunction)PDFDoc_alter_links, METH_VARARGS, "alter_links() -> Change links in the document." }, + {"list_fonts", (PyCFunction)list_fonts, METH_VARARGS, + "list_fonts() -> Get list of fonts in document" + }, {"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS, "delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)." }, diff --git a/src/calibre/utils/podofo/fonts.cpp b/src/calibre/utils/podofo/fonts.cpp new file mode 100644 index 0000000000..51912dd173 --- /dev/null +++ b/src/calibre/utils/podofo/fonts.cpp @@ -0,0 +1,55 @@ +/* + * fonts.cpp + * Copyright (C) 2019 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "global.h" +#include + +using namespace pdf; +using namespace std; + +extern "C" { +PyObject* +list_fonts(PDFDoc *self, PyObject *args) { + pyunique_ptr ans(PyList_New(0)); + if (!ans) return NULL; + try { + const PdfVecObjects &objects = self->doc->GetObjects(); + for (TCIVecObjects it = objects.begin(); it != objects.end(); it++) { + if ((*it)->IsDictionary()) { + const PdfDictionary &dict = (*it)->GetDictionary(); + if (dictionary_has_key_name(dict, PdfName::KeyType, "Font") && dict.HasKey("BaseFont")) { + const string &name = dict.GetKey("BaseFont")->GetName().GetName(); + const string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetName(); + const PdfReference &ref = (*it)->Reference(); + unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber(); + const PdfObject *descriptor = (*it)->GetIndirectKey("FontDescriptor"); + long long stream_len = 0; + if (descriptor) { + const PdfObject *ff = descriptor->GetIndirectKey("FontFile"); + if (!ff) ff = descriptor->GetIndirectKey("FontFile2"); + if (!ff) ff = descriptor->GetIndirectKey("FontFile3"); + const PdfStream *stream = ff->GetStream(); + if (stream) stream_len = stream->GetLength(); + } + pyunique_ptr d(Py_BuildValue( + "{sssss(kk)sL}", + "BaseFont", name.c_str(), + "Subtype", subtype.c_str(), + "Reference", num, generation, + "Length", stream_len)); + if (!d) { return NULL; } + if (PyList_Append(ans.get(), d.get()) != 0) return NULL; + } + } + } + } catch (const PdfError &err) { + podofo_set_exception(err); + return NULL; + } + return ans.release(); +} +} diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h index 6d97159c18..c441f0dbcf 100644 --- a/src/calibre/utils/podofo/global.h +++ b/src/calibre/utils/podofo/global.h @@ -41,4 +41,24 @@ void podofo_set_exception(const PdfError &err); PyObject * podofo_convert_pdfstring(const PdfString &s); const PdfString podofo_convert_pystring(PyObject *py); PyObject* write_doc(PdfMemDocument *doc, PyObject *f); + +struct PyObjectDeleter { + void operator()(PyObject *obj) { + Py_XDECREF(obj); + } +}; +// unique_ptr that uses Py_XDECREF as the destructor function. +typedef std::unique_ptr pyunique_ptr; + +template +static inline bool +dictionary_has_key_name(const PdfDictionary &d, T key, const char *name) { + const PdfObject *val = d.GetKey(key); + if (val && val->IsName() && val->GetName().GetName() == name) return true; + return false; +} + +extern "C" { +PyObject* list_fonts(PDFDoc*, PyObject*); +} }