mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Code to get used fonts in a page
This commit is contained in:
parent
c4e3fda682
commit
10e5d3c486
@ -7,9 +7,47 @@
|
|||||||
|
|
||||||
#include "global.h"
|
#include "global.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <stack>
|
||||||
|
|
||||||
using namespace pdf;
|
using namespace pdf;
|
||||||
using namespace std;
|
|
||||||
|
static bool
|
||||||
|
used_fonts_in_page(const PdfPage *page, PyObject *ans) {
|
||||||
|
PdfContentsTokenizer tokenizer((PdfCanvas*)page);
|
||||||
|
bool in_text_block = false;
|
||||||
|
const char* token = NULL;
|
||||||
|
EPdfContentsType contents_type;
|
||||||
|
PdfVariant var;
|
||||||
|
std::stack<PdfVariant> stack;
|
||||||
|
|
||||||
|
while (tokenizer.ReadNext(contents_type, token, var)) {
|
||||||
|
if (contents_type == ePdfContentsType_Variant) stack.push(var);
|
||||||
|
if (contents_type != ePdfContentsType_Keyword) continue;
|
||||||
|
if (strcmp(token, "BT") == 0) {
|
||||||
|
in_text_block = true;
|
||||||
|
continue;
|
||||||
|
} else if (strcmp(token, "ET") == 0) {
|
||||||
|
in_text_block = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!in_text_block) continue;
|
||||||
|
if (strcmp(token, "Tf") == 0) {
|
||||||
|
stack.pop();
|
||||||
|
if (stack.size() > 0 && stack.top().IsName()) {
|
||||||
|
const PdfName &reference_name = stack.top().GetName();
|
||||||
|
PdfObject* font = pPage->GetFromResources("Font", reference_name);
|
||||||
|
if (font) {
|
||||||
|
const PdfReference &ref = font->Reference();
|
||||||
|
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
||||||
|
pyunique_ptr r(Py_BuildValue("kk", num, generation));
|
||||||
|
if (!r) return false;
|
||||||
|
if (PySet_Add(ans, r.get()) != 0) return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
PyObject*
|
PyObject*
|
||||||
@ -22,8 +60,8 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
|||||||
if ((*it)->IsDictionary()) {
|
if ((*it)->IsDictionary()) {
|
||||||
const PdfDictionary &dict = (*it)->GetDictionary();
|
const PdfDictionary &dict = (*it)->GetDictionary();
|
||||||
if (dictionary_has_key_name(dict, PdfName::KeyType, "Font") && dict.HasKey("BaseFont")) {
|
if (dictionary_has_key_name(dict, PdfName::KeyType, "Font") && dict.HasKey("BaseFont")) {
|
||||||
const string &name = dict.GetKey("BaseFont")->GetName().GetName();
|
const std::string &name = dict.GetKey("BaseFont")->GetName().GetName();
|
||||||
const string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetName();
|
const std::string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetName();
|
||||||
const PdfReference &ref = (*it)->Reference();
|
const PdfReference &ref = (*it)->Reference();
|
||||||
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
|
||||||
const PdfObject *descriptor = (*it)->GetIndirectKey("FontDescriptor");
|
const PdfObject *descriptor = (*it)->GetIndirectKey("FontDescriptor");
|
||||||
@ -52,4 +90,20 @@ list_fonts(PDFDoc *self, PyObject *args) {
|
|||||||
}
|
}
|
||||||
return ans.release();
|
return ans.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject*
|
||||||
|
used_fonts_in_page_range(PDFDoc *self, PyObject *args) {
|
||||||
|
int first = 1, last = self->doc->GetPageCount();
|
||||||
|
if (!PyArg_ParseTuple(args, "|ii", &first, &last)) return NULL;
|
||||||
|
pyunique_ptr ans(PySet_New(NULL));
|
||||||
|
if (!ans) return NULL;
|
||||||
|
for (int i = first - 1; i < last; i++) {
|
||||||
|
try {
|
||||||
|
const PdfPage *page = self->doc->GetPage(i);
|
||||||
|
if (!used_fonts_in_page(page, ans.get())) return NULL;
|
||||||
|
} catch (const PdfError &err) { continue; }
|
||||||
|
}
|
||||||
|
return ans.release();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user