Code to get used fonts in a page

This commit is contained in:
Kovid Goyal 2019-07-14 15:14:41 +05:30
parent c4e3fda682
commit 10e5d3c486
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -7,9 +7,47 @@
#include "global.h"
#include <iostream>
#include <stack>
using namespace pdf;
using namespace std;
static bool
used_fonts_in_page(const PdfPage *page, PyObject *ans) {
PdfContentsTokenizer tokenizer((PdfCanvas*)page);
bool in_text_block = false;
const char* token = NULL;
EPdfContentsType contents_type;
PdfVariant var;
std::stack<PdfVariant> stack;
while (tokenizer.ReadNext(contents_type, token, var)) {
if (contents_type == ePdfContentsType_Variant) stack.push(var);
if (contents_type != ePdfContentsType_Keyword) continue;
if (strcmp(token, "BT") == 0) {
in_text_block = true;
continue;
} else if (strcmp(token, "ET") == 0) {
in_text_block = false;
continue;
}
if (!in_text_block) continue;
if (strcmp(token, "Tf") == 0) {
stack.pop();
if (stack.size() > 0 && stack.top().IsName()) {
const PdfName &reference_name = stack.top().GetName();
PdfObject* font = pPage->GetFromResources("Font", reference_name);
if (font) {
const PdfReference &ref = font->Reference();
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
pyunique_ptr r(Py_BuildValue("kk", num, generation));
if (!r) return false;
if (PySet_Add(ans, r.get()) != 0) return false;
}
}
}
}
return true;
}
extern "C" {
PyObject*
@ -22,8 +60,8 @@ list_fonts(PDFDoc *self, PyObject *args) {
if ((*it)->IsDictionary()) {
const PdfDictionary &dict = (*it)->GetDictionary();
if (dictionary_has_key_name(dict, PdfName::KeyType, "Font") && dict.HasKey("BaseFont")) {
const string &name = dict.GetKey("BaseFont")->GetName().GetName();
const string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetName();
const std::string &name = dict.GetKey("BaseFont")->GetName().GetName();
const std::string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetName();
const PdfReference &ref = (*it)->Reference();
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
const PdfObject *descriptor = (*it)->GetIndirectKey("FontDescriptor");
@ -52,4 +90,20 @@ list_fonts(PDFDoc *self, PyObject *args) {
}
return ans.release();
}
PyObject*
used_fonts_in_page_range(PDFDoc *self, PyObject *args) {
int first = 1, last = self->doc->GetPageCount();
if (!PyArg_ParseTuple(args, "|ii", &first, &last)) return NULL;
pyunique_ptr ans(PySet_New(NULL));
if (!ans) return NULL;
for (int i = first - 1; i < last; i++) {
try {
const PdfPage *page = self->doc->GetPage(i);
if (!used_fonts_in_page(page, ans.get())) return NULL;
} catch (const PdfError &err) { continue; }
}
return ans.release();
}
}