mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Code to get used fonts in a page
This commit is contained in:
parent
c4e3fda682
commit
10e5d3c486
@ -7,9 +7,47 @@
|
||||
|
||||
#include "global.h"
|
||||
#include <iostream>
|
||||
#include <stack>
|
||||
|
||||
using namespace pdf;
|
||||
using namespace std;
|
||||
|
||||
static bool
|
||||
used_fonts_in_page(const PdfPage *page, PyObject *ans) {
|
||||
PdfContentsTokenizer tokenizer((PdfCanvas*)page);
|
||||
bool in_text_block = false;
|
||||
const char* token = NULL;
|
||||
EPdfContentsType contents_type;
|
||||
PdfVariant var;
|
||||
std::stack<PdfVariant> stack;
|
||||
|
||||
while (tokenizer.ReadNext(contents_type, token, var)) {
|
||||
if (contents_type == ePdfContentsType_Variant) stack.push(var);
|
||||
if (contents_type != ePdfContentsType_Keyword) continue;
|
||||
if (strcmp(token, "BT") == 0) {
|
||||
in_text_block = true;
|
||||
continue;
|
||||
} else if (strcmp(token, "ET") == 0) {
|
||||
in_text_block = false;
|
||||
continue;
|
||||
}
|
||||
if (!in_text_block) continue;
|
||||
if (strcmp(token, "Tf") == 0) {
|
||||
stack.pop();
|
||||
if (stack.size() > 0 && stack.top().IsName()) {
|
||||
const PdfName &reference_name = stack.top().GetName();
|
||||
PdfObject* font = pPage->GetFromResources("Font", reference_name);
|
||||
if (font) {
|
||||
const PdfReference &ref = font->Reference();
|
||||
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
||||
pyunique_ptr r(Py_BuildValue("kk", num, generation));
|
||||
if (!r) return false;
|
||||
if (PySet_Add(ans, r.get()) != 0) return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
PyObject*
|
||||
@ -22,8 +60,8 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
||||
if ((*it)->IsDictionary()) {
|
||||
const PdfDictionary &dict = (*it)->GetDictionary();
|
||||
if (dictionary_has_key_name(dict, PdfName::KeyType, "Font") && dict.HasKey("BaseFont")) {
|
||||
const string &name = dict.GetKey("BaseFont")->GetName().GetName();
|
||||
const string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetName();
|
||||
const std::string &name = dict.GetKey("BaseFont")->GetName().GetName();
|
||||
const std::string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetName();
|
||||
const PdfReference &ref = (*it)->Reference();
|
||||
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
||||
const PdfObject *descriptor = (*it)->GetIndirectKey("FontDescriptor");
|
||||
@ -52,4 +90,20 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
||||
}
|
||||
return ans.release();
|
||||
}
|
||||
|
||||
PyObject*
|
||||
used_fonts_in_page_range(PDFDoc *self, PyObject *args) {
|
||||
int first = 1, last = self->doc->GetPageCount();
|
||||
if (!PyArg_ParseTuple(args, "|ii", &first, &last)) return NULL;
|
||||
pyunique_ptr ans(PySet_New(NULL));
|
||||
if (!ans) return NULL;
|
||||
for (int i = first - 1; i < last; i++) {
|
||||
try {
|
||||
const PdfPage *page = self->doc->GetPage(i);
|
||||
if (!used_fonts_in_page(page, ans.get())) return NULL;
|
||||
} catch (const PdfError &err) { continue; }
|
||||
}
|
||||
return ans.release();
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user