Start work on porting to new PoDoFo API

This commit is contained in:
Kovid Goyal 2023-05-11 13:50:59 +05:30
parent 10afcea57f
commit 76fbbef9d0
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
13 changed files with 454 additions and 463 deletions

View File

@ -231,8 +231,12 @@ class Environment(NamedTuple):
def lib_dirs_to_ldflags(self, dirs) -> List[str]: def lib_dirs_to_ldflags(self, dirs) -> List[str]:
return [self.libdir_prefix+x for x in dirs if x] return [self.libdir_prefix+x for x in dirs if x]
def libraries_to_ldflags(self, dirs): def libraries_to_ldflags(self, libs):
return [self.lib_prefix+x+self.lib_suffix for x in dirs] def map_name(x):
if '/' in x:
return x
return self.lib_prefix+x+self.lib_suffix
return list(map(map_name, libs))

View File

@ -209,6 +209,7 @@ else:
podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib) podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib)
podofo_inc = os.environ.get('PODOFO_INC_DIR', podofo_inc) podofo_inc = os.environ.get('PODOFO_INC_DIR', podofo_inc)
podofo = os.environ.get('PODOFO_LIB_NAME', 'podofo')
podofo_error = None if os.path.exists(os.path.join(podofo_inc, 'podofo.h')) else \ podofo_error = None if os.path.exists(os.path.join(podofo_inc, 'podofo.h')) else \
('PoDoFo not found on your system. Various PDF related', ('PoDoFo not found on your system. Various PDF related',
' functionality will not work. Use the PODOFO_INC_DIR and', ' functionality will not work. Use the PODOFO_INC_DIR and',

View File

@ -120,11 +120,11 @@
"name": "podofo", "name": "podofo",
"sources": "calibre/utils/podofo/utils.cpp calibre/utils/podofo/output.cpp calibre/utils/podofo/doc.cpp calibre/utils/podofo/outline.cpp calibre/utils/podofo/fonts.cpp calibre/utils/podofo/impose.cpp calibre/utils/podofo/images.cpp calibre/utils/podofo/outlines.cpp calibre/utils/podofo/podofo.cpp", "sources": "calibre/utils/podofo/utils.cpp calibre/utils/podofo/output.cpp calibre/utils/podofo/doc.cpp calibre/utils/podofo/outline.cpp calibre/utils/podofo/fonts.cpp calibre/utils/podofo/impose.cpp calibre/utils/podofo/images.cpp calibre/utils/podofo/outlines.cpp calibre/utils/podofo/podofo.cpp",
"headers": "calibre/utils/podofo/global.h", "headers": "calibre/utils/podofo/global.h",
"libraries": "podofo", "libraries": "!podofo",
"lib_dirs": "!podofo_lib_dirs", "lib_dirs": "!podofo_lib_dirs",
"inc_dirs": "!podofo_inc_dirs", "inc_dirs": "!podofo_inc_dirs",
"error": "!podofo_error", "error": "!podofo_error",
"needs_c++": "11" "needs_c++": "17"
}, },
{ {
"name": "html_as_json", "name": "html_as_json",

View File

@ -7,6 +7,8 @@
#include "global.h" #include "global.h"
#include <iostream> #include <iostream>
#include <algorithm>
#include <string_view>
using namespace pdf; using namespace pdf;
@ -41,11 +43,7 @@ PDFDoc_load(PDFDoc *self, PyObject *args) {
if (!PyArg_ParseTuple(args, "y#", &buffer, &size)) return NULL; if (!PyArg_ParseTuple(args, "y#", &buffer, &size)) return NULL;
try { try {
#if PODOFO_VERSION <= 0x000905 self->doc->LoadFromBuffer(bufferview(buffer, size));
self->doc->Load(buffer, (long)size);
#else
self->doc->LoadFromBuffer(buffer, (long)size);
#endif
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
@ -84,7 +82,7 @@ PDFDoc_save(PDFDoc *self, PyObject *args) {
if (PyArg_ParseTuple(args, "s", &buffer)) { if (PyArg_ParseTuple(args, "s", &buffer)) {
try { try {
self->doc->Write(buffer); self->doc->Save(buffer);
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
@ -94,16 +92,43 @@ PDFDoc_save(PDFDoc *self, PyObject *args) {
Py_RETURN_NONE; Py_RETURN_NONE;
} }
class BytesOutputDevice : public OutputStreamDevice {
private:
pyunique_ptr bytes;
size_t written;
public:
BytesOutputDevice() : bytes(PyBytes_FromStringAndSize(NULL, 1 * 1024 *1024)) { SetAccess(DeviceAccess::Write); }
size_t GetLength() const { return written; }
size_t GetPosition() const { return written; }
size_t capacity() const { return bytes ? PyBytes_GET_SIZE(bytes.get()) : 0; }
bool Eof() const { return false; }
void writeBuffer(const char* src, size_t src_sz) {
if (written + src_sz > capacity()) {
PyObject* old = bytes.release();
if (_PyBytes_Resize(&old, std::max(written + src_sz, 2 * capacity())) != 0) {
return;
}
bytes.reset(old);
}
if (bytes) {
memcpy(PyBytes_AS_STRING(bytes.get()), src, src_sz);
written += src_sz;
}
}
void Flush() { }
PyObject* Release() { return bytes.release(); }
};
static PyObject * static PyObject *
PDFDoc_write(PDFDoc *self, PyObject *args) { PDFDoc_write(PDFDoc *self, PyObject *args) {
PyObject *ans; PyObject *ans;
BytesOutputDevice d;
try { try {
PdfRefCountedBuffer buffer(1*1024*1024); self->doc->Save(d);
PdfOutputDevice out(&buffer); return d.Release();
self->doc->Write(&out);
ans = PyBytes_FromStringAndSize(buffer.GetBuffer(), out.Tell());
if (ans == NULL) PyErr_NoMemory();
} catch(const PdfError &err) { } catch(const PdfError &err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
@ -124,11 +149,25 @@ PDFDoc_save_to_fileobj(PDFDoc *self, PyObject *args) {
static PyObject * static PyObject *
PDFDoc_uncompress_pdf(PDFDoc *self, PyObject *args) { PDFDoc_uncompress_pdf(PDFDoc *self, PyObject *args) {
for (auto &it : self->doc->GetObjects()) { try {
if(it->HasStream()) { auto& objects = self->doc->GetObjects();
PdfMemStream* stream = dynamic_cast<PdfMemStream*>(it->GetStream()); for (auto obj : objects) {
stream->Uncompress(); auto stream = obj->GetStream();
if (stream == nullptr) continue;
try {
try {
stream->Unwrap();
} catch (PdfError& e) {
if (e.GetCode() != PdfErrorCode::Flate) throw e;
}
}
catch (PdfError& e) {
if (e.GetCode() != PdfErrorCode::UnsupportedFilter) throw e;
}
} }
} catch(const PdfError & err) {
podofo_set_exception(err);
return NULL;
} }
Py_RETURN_NONE; Py_RETURN_NONE;
} }
@ -140,7 +179,8 @@ PDFDoc_uncompress_pdf(PDFDoc *self, PyObject *args) {
static PyObject * static PyObject *
PDFDoc_extract_first_page(PDFDoc *self, PyObject *args) { PDFDoc_extract_first_page(PDFDoc *self, PyObject *args) {
try { try {
while (self->doc->GetPageCount() > 1) self->doc->GetPagesTree()->DeletePage(1); auto pages = &self->doc->GetPages();
while (pages->GetCount() > 1) pages->RemovePageAt(1);
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
@ -154,7 +194,7 @@ static PyObject *
PDFDoc_page_count(PDFDoc *self, PyObject *args) { PDFDoc_page_count(PDFDoc *self, PyObject *args) {
int count; int count;
try { try {
count = self->doc->GetPageCount(); count = self->doc->GetPages().GetCount();
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
@ -173,8 +213,8 @@ PDFDoc_image_count(PDFDoc *self, PyObject *args) {
if( it->IsDictionary() ) { if( it->IsDictionary() ) {
obj_type = it->GetDictionary().GetKey( PdfName::KeyType ); obj_type = it->GetDictionary().GetKey( PdfName::KeyType );
obj_sub_type = it->GetDictionary().GetKey( PdfName::KeySubtype ); obj_sub_type = it->GetDictionary().GetKey( PdfName::KeySubtype );
if( ( obj_type && obj_type->IsName() && ( obj_type->GetName().GetName() == "XObject" ) ) || if( ( obj_type && obj_type->IsName() && ( obj_type->GetName().GetString() == "XObject" ) ) ||
( obj_sub_type && obj_sub_type->IsName() && ( obj_sub_type->GetName().GetName() == "Image" ) ) ) count++; ( obj_sub_type && obj_sub_type->IsName() && ( obj_sub_type->GetName().GetString() == "Image" ) ) ) count++;
} }
} }
} catch(const PdfError & err) { } catch(const PdfError & err) {
@ -190,7 +230,9 @@ PDFDoc_delete_pages(PDFDoc *self, PyObject *args) {
int page = 0, count = 1; int page = 0, count = 1;
if (PyArg_ParseTuple(args, "i|i", &page, &count)) { if (PyArg_ParseTuple(args, "i|i", &page, &count)) {
try { try {
self->doc->DeletePages(page - 1, count); while (count > 0) {
self->doc->GetPages().RemovePageAt(page - 1);
}
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
@ -207,10 +249,9 @@ PDFDoc_get_page_box(PDFDoc *self, PyObject *args) {
const char *which; const char *which;
if (PyArg_ParseTuple(args, "si", &which, &pagenum)) { if (PyArg_ParseTuple(args, "si", &which, &pagenum)) {
try { try {
PdfPagesTree* tree = self->doc->GetPagesTree(); auto page = get_page(self->doc, pagenum-1);
PdfPage* page = tree->GetPage(pagenum - 1); if (!page) { PyErr_Format(PyExc_ValueError, "page number %d not found in PDF file", pagenum); return NULL; }
if (!page) { PyErr_Format(PyExc_ValueError, "page number %d not found in PDF file", pagenum); return NULL; } Rect rect;
PdfRect rect;
if (strcmp(which, "MediaBox") == 0) { if (strcmp(which, "MediaBox") == 0) {
rect = page->GetMediaBox(); rect = page->GetMediaBox();
} else if (strcmp(which, "CropBox") == 0) { } else if (strcmp(which, "CropBox") == 0) {
@ -225,7 +266,7 @@ PDFDoc_get_page_box(PDFDoc *self, PyObject *args) {
PyErr_Format(PyExc_KeyError, "%s is not a known box", which); PyErr_Format(PyExc_KeyError, "%s is not a known box", which);
return NULL; return NULL;
} }
return Py_BuildValue("dddd", rect.GetLeft(), rect.GetBottom(), rect.GetWidth(), rect.GetHeight()); return Py_BuildValue("dddd", rect.GetLeft(), rect.GetBottom(), rect.Width, rect.Height);
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
@ -243,13 +284,12 @@ PDFDoc_set_page_box(PDFDoc *self, PyObject *args) {
const char *which; const char *which;
if (PyArg_ParseTuple(args, "sidddd", &which, &pagenum, &left, &bottom, &width, &height)) { if (PyArg_ParseTuple(args, "sidddd", &which, &pagenum, &left, &bottom, &width, &height)) {
try { try {
PdfPagesTree* tree = self->doc->GetPagesTree(); PdfPage* page = get_page(self->doc, pagenum-1);
PdfPage* page = tree->GetPage(pagenum - 1); if (!page) { PyErr_Format(PyExc_ValueError, "page number %d not found in PDF file", pagenum); return NULL; }
if (!page) { PyErr_Format(PyExc_ValueError, "page number %d not found in PDF file", pagenum); return NULL; } Rect rect(left, bottom, width, height);
PdfRect rect(left, bottom, width, height); PdfArray box;
PdfObject box; rect.ToArray(box);
rect.ToVariant(box); page->GetObject().GetDictionary().AddKey(PdfName(which), box);
page->GetObject()->GetDictionary().AddKey(PdfName(which), box);
Py_RETURN_NONE; Py_RETURN_NONE;
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
@ -266,9 +306,7 @@ PDFDoc_copy_page(PDFDoc *self, PyObject *args) {
int from = 0, to = 0; int from = 0, to = 0;
if (!PyArg_ParseTuple(args, "ii", &from, &to)) return NULL; if (!PyArg_ParseTuple(args, "ii", &from, &to)) return NULL;
try { try {
PdfPagesTree* tree = self->doc->GetPagesTree(); self->doc->GetPages().InsertDocumentPageAt(to - 1, *self->doc, from - 1);
PdfPage* page = tree->GetPage(from - 1);
tree->InsertPage(to - 1, page);
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
@ -287,14 +325,14 @@ PDFDoc_append(PDFDoc *self, PyObject *args) {
typ = PyObject_IsInstance(doc, (PyObject*)&PDFDocType); typ = PyObject_IsInstance(doc, (PyObject*)&PDFDocType);
if (typ == -1) return NULL; if (typ == -1) return NULL;
if (typ == 0) { PyErr_SetString(PyExc_TypeError, "You must pass a PDFDoc instance to this method"); return NULL; } if (typ == 0) { PyErr_SetString(PyExc_TypeError, "You must pass a PDFDoc instance to this method"); return NULL; }
PDFDoc *pdfdoc = (PDFDoc*)doc;
try { try {
self->doc->Append(*((PDFDoc*)doc)->doc, true); self->doc->GetPages().AppendDocumentPages(*pdfdoc->doc);
} catch (const PdfError & err) { } catch (const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
} }
Py_RETURN_NONE; Py_RETURN_NONE;
} // }}} } // }}}
@ -307,7 +345,7 @@ PDFDoc_insert_existing_page(PDFDoc *self, PyObject *args) {
if (!PyArg_ParseTuple(args, "O!|ii", &PDFDocType, &src_doc, &src_page, &at)) return NULL; if (!PyArg_ParseTuple(args, "O!|ii", &PDFDocType, &src_doc, &src_page, &at)) return NULL;
try { try {
self->doc->InsertExistingPageAt(*src_doc->doc, src_page, at); self->doc->GetPages().InsertDocumentPageAt(at, *src_doc->doc, src_page);
} catch (const PdfError & err) { } catch (const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
@ -323,12 +361,11 @@ PDFDoc_set_box(PDFDoc *self, PyObject *args) {
double left, bottom, width, height; double left, bottom, width, height;
char *box; char *box;
if (!PyArg_ParseTuple(args, "isdddd", &num, &box, &left, &bottom, &width, &height)) return NULL; if (!PyArg_ParseTuple(args, "isdddd", &num, &box, &left, &bottom, &width, &height)) return NULL;
try { try {
PdfRect r(left, bottom, width, height); Rect r(left, bottom, width, height);
PdfObject o; PdfArray o;
r.ToVariant(o); r.ToArray(o);
self->doc->GetPage(num)->GetObject()->GetDictionary().AddKey(PdfName(box), o); self->doc->GetPages().GetPageAt(num).GetObject().GetDictionary().AddKey(PdfName(box), o);
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
@ -336,41 +373,21 @@ PDFDoc_set_box(PDFDoc *self, PyObject *args) {
PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the box"); PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the box");
return NULL; return NULL;
} }
Py_RETURN_NONE; Py_RETURN_NONE;
} // }}} } // }}}
// get_xmp_metadata() {{{ // get_xmp_metadata() {{{
static PyObject * static PyObject *
PDFDoc_get_xmp_metadata(PDFDoc *self, PyObject *args) { PDFDoc_get_xmp_metadata(PDFDoc *self, PyObject *args) {
PoDoFo::PdfObject *metadata = NULL;
PoDoFo::PdfStream *str = NULL;
PoDoFo::pdf_long len = 0;
char *buf = NULL;
PyObject *ans = NULL;
try { try {
if ((metadata = self->doc->GetMetadata()) != NULL) { auto s = self->doc->GetCatalog().GetMetadataStreamValue();
if ((str = metadata->GetStream()) != NULL) { return PyBytes_FromStringAndSize(s.data(), s.size());
str->GetFilteredCopy(&buf, &len);
if (buf != NULL) {
Py_ssize_t psz = len;
ans = Py_BuildValue("y#", buf, psz);
free(buf); buf = NULL;
if (ans == NULL) goto error;
}
}
}
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); goto error; podofo_set_exception(err); return NULL;
} catch (...) { } catch (...) {
PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to read the XML metadata"); goto error; PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to read the XML metadata"); return NULL;
} }
if (ans != NULL) return ans;
Py_RETURN_NONE; Py_RETURN_NONE;
error:
return NULL;
} // }}} } // }}}
// set_xmp_metadata() {{{ // set_xmp_metadata() {{{
@ -378,85 +395,58 @@ static PyObject *
PDFDoc_set_xmp_metadata(PDFDoc *self, PyObject *args) { PDFDoc_set_xmp_metadata(PDFDoc *self, PyObject *args) {
const char *raw = NULL; const char *raw = NULL;
Py_ssize_t len = 0; Py_ssize_t len = 0;
PoDoFo::PdfObject *metadata = NULL, *catalog = NULL;
PoDoFo::PdfStream *str = NULL;
TVecFilters compressed(1);
compressed[0] = ePdfFilter_FlateDecode;
if (!PyArg_ParseTuple(args, "y#", &raw, &len)) return NULL; if (!PyArg_ParseTuple(args, "y#", &raw, &len)) return NULL;
try { try {
if ((metadata = self->doc->GetMetadata()) != NULL) { self->doc->GetCatalog().SetMetadataStreamValue(std::string_view(raw, len));
if ((str = metadata->GetStream()) == NULL) { PyErr_NoMemory(); goto error; }
str->Set(raw, len, compressed);
} else {
if ((catalog = self->doc->GetCatalog()) == NULL) { PyErr_SetString(PyExc_ValueError, "Cannot set XML metadata as this document has no catalog"); goto error; }
if ((metadata = self->doc->GetObjects().CreateObject("Metadata")) == NULL) { PyErr_NoMemory(); goto error; }
if ((str = metadata->GetStream()) == NULL) { PyErr_NoMemory(); goto error; }
metadata->GetDictionary().AddKey(PoDoFo::PdfName("Subtype"), PoDoFo::PdfName("XML"));
str->Set(raw, len, compressed);
catalog->GetDictionary().AddKey(PoDoFo::PdfName("Metadata"), metadata->Reference());
}
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); goto error; podofo_set_exception(err); return NULL;
} catch (...) { } catch (...) {
PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the XML metadata"); PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the XML metadata"); return NULL;
goto error;
} }
Py_RETURN_NONE; Py_RETURN_NONE;
error:
return NULL;
} // }}} } // }}}
// extract_anchors() {{{ // extract_anchors() {{{
static PyObject * static PyObject *
PDFDoc_extract_anchors(PDFDoc *self, PyObject *args) { PDFDoc_extract_anchors(PDFDoc *self, PyObject *args) {
const PdfObject* catalog = NULL;
PyObject *ans = PyDict_New(); PyObject *ans = PyDict_New();
if (ans == NULL) return NULL; if (ans == NULL) return NULL;
try { try {
if ((catalog = self->doc->GetCatalog()) != NULL) { const PdfObject *dests_ref = self->doc->GetCatalog().GetDictionary().GetKey("Dests");
const PdfObject *dests_ref = catalog->GetDictionary().GetKey("Dests"); auto& pages = self->doc->GetPages();
PdfPagesTree *tree = self->doc->GetPagesTree(); if (dests_ref && dests_ref->IsReference()) {
if (dests_ref && dests_ref->IsReference()) { const PdfObject *dests_obj = self->doc->GetObjects().GetObject(dests_ref->GetReference());
const PdfObject *dests_obj = self->doc->GetObjects().GetObject(dests_ref->GetReference()); if (dests_obj && dests_obj->IsDictionary()) {
if (dests_obj && dests_obj->IsDictionary()) { const PdfDictionary &dests = dests_obj->GetDictionary();
const PdfDictionary &dests = dests_obj->GetDictionary(); for (auto itres: dests) {
const TKeyMap &keys = dests.GetKeys(); if (itres.second.IsArray()) {
for (TCIKeyMap itres = keys.begin(); itres != keys.end(); ++itres) { const PdfArray &dest = itres.second.GetArray();
if (itres->second->IsArray()) { // see section 8.2 of PDF spec for different types of destination arrays
const PdfArray &dest = itres->second->GetArray(); // but chromium apparently generates only [page /XYZ left top zoom] type arrays
// see section 8.2 of PDF spec for different types of destination arrays if (dest.GetSize() > 4 && dest[1].IsName() && dest[1].GetName().GetString() == "XYZ") {
// but chromium apparently generates only [page /XYZ left top zoom] type arrays const PdfPage *page = get_page(pages, dest[0].GetReference());
if (dest.GetSize() > 4 && dest[1].IsName() && dest[1].GetName().GetName() == "XYZ") { if (page) {
const PdfPage *page = tree->GetPage(dest[0].GetReference()); unsigned int pagenum = page->GetPageNumber();
if (page) { double left = dest[2].GetReal(), top = dest[3].GetReal();
unsigned int pagenum = page->GetPageNumber(); long long zoom = dest[4].GetNumber();
double left = dest[2].GetReal(), top = dest[3].GetReal(); const std::string &anchor = itres.first.GetString();
long long zoom = dest[4].GetNumber(); PyObject *key = PyUnicode_DecodeUTF8(anchor.c_str(), anchor.length(), "replace");
const std::string &anchor = itres->first.GetName(); PyObject *tuple = Py_BuildValue("IddL", pagenum, left, top, zoom);
PyObject *key = PyUnicode_DecodeUTF8(anchor.c_str(), anchor.length(), "replace"); if (!tuple || !key) { break; }
PyObject *tuple = Py_BuildValue("IddL", pagenum, left, top, zoom); int ret = PyDict_SetItem(ans, key, tuple);
if (!tuple || !key) { break; } Py_DECREF(key); Py_DECREF(tuple);
int ret = PyDict_SetItem(ans, key, tuple); if (ret != 0) break;
Py_DECREF(key); Py_DECREF(tuple); }
if (ret != 0) break; }
} }
} }
} }
} }
}
}
}
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
Py_CLEAR(ans);
return NULL;
} catch (...) { } catch (...) {
PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the box"); PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the box");
Py_CLEAR(ans);
return NULL;
} }
if (PyErr_Occurred()) { Py_CLEAR(ans); return NULL; } if (PyErr_Occurred()) { Py_CLEAR(ans); return NULL; }
return ans; return ans;
@ -472,28 +462,22 @@ alter_link(PDFDoc *self, PdfDictionary &link, PyObject *alter_callback, bool mar
} }
PdfDictionary &A = link.GetKey("A")->GetDictionary(); PdfDictionary &A = link.GetKey("A")->GetDictionary();
PdfObject *uo = A.GetKey("URI"); PdfObject *uo = A.GetKey("URI");
const std::string &uri = uo->GetString().GetStringUtf8(); const std::string &uri = uo->GetString().GetString();
pyunique_ptr ret(PyObject_CallObject(alter_callback, Py_BuildValue("(N)", PyUnicode_DecodeUTF8(uri.c_str(), uri.length(), "replace")))); pyunique_ptr ret(PyObject_CallObject(alter_callback, Py_BuildValue("(N)", PyUnicode_DecodeUTF8(uri.c_str(), uri.length(), "replace"))));
if (!ret) { return; } if (!ret) { return; }
if (PyTuple_Check(ret.get()) && PyTuple_GET_SIZE(ret.get()) == 4) { if (PyTuple_Check(ret.get()) && PyTuple_GET_SIZE(ret.get()) == 4) {
int pagenum; double left, top, zoom; int pagenum; double left, top, zoom;
if (PyArg_ParseTuple(ret.get(), "iddd", &pagenum, &left, &top, &zoom)) { if (PyArg_ParseTuple(ret.get(), "iddd", &pagenum, &left, &top, &zoom)) {
PdfPage *page = NULL; const PdfPage *page = get_page(self->doc, pagenum - 1);
try { if (page == NULL) {
page = self->doc->GetPage(pagenum - 1); PyErr_Format(PyExc_ValueError, "No page number %d in the PDF file of %d pages", pagenum, self->doc->GetPages().GetCount());
} catch(const PdfError &err) { return;
(void)err;
PyErr_Format(PyExc_ValueError, "No page number %d in the PDF file of %d pages", pagenum, self->doc->GetPageCount());
return ;
} }
if (page) {
PdfDestination dest(page, left, top, zoom);
link.RemoveKey("A"); link.RemoveKey("A");
PdfDestination dest(*page, left, top, zoom);
dest.AddToDictionary(link); dest.AddToDictionary(link);
}
} }
} }
} }
static PyObject * static PyObject *
@ -504,8 +488,8 @@ PDFDoc_alter_links(PDFDoc *self, PyObject *args) {
bool mark_links = PyObject_IsTrue(py_mark_links); bool mark_links = PyObject_IsTrue(py_mark_links);
try { try {
PdfArray border, link_color; PdfArray border, link_color;
border.push_back((PoDoFo::pdf_int64)16); border.push_back((PoDoFo::pdf_int64)16); border.push_back((PoDoFo::pdf_int64)1); border.Add(int64_t(16)); border.Add(int64_t(16)); border.Add(int64_t(1));
link_color.push_back(1.); link_color.push_back(0.); link_color.push_back(0.); link_color.Add(1.); link_color.Add(0.); link_color.Add(0.);
std::vector<PdfReference> links; std::vector<PdfReference> links;
for (auto &it : self->doc->GetObjects()) { for (auto &it : self->doc->GetObjects()) {
if(it->IsDictionary()) { if(it->IsDictionary()) {
@ -516,7 +500,7 @@ PDFDoc_alter_links(PDFDoc *self, PyObject *args) {
if (dictionary_has_key_name(A, PdfName::KeyType, "Action") && dictionary_has_key_name(A, "S", "URI")) { if (dictionary_has_key_name(A, PdfName::KeyType, "Action") && dictionary_has_key_name(A, "S", "URI")) {
PdfObject *uo = A.GetKey("URI"); PdfObject *uo = A.GetKey("URI");
if (uo && uo->IsString()) { if (uo && uo->IsString()) {
links.push_back(it->Reference()); links.push_back(it->GetReference());
} }
} }
} }
@ -547,153 +531,137 @@ PDFDoc_alter_links(PDFDoc *self, PyObject *args) {
static PyObject * static PyObject *
PDFDoc_pages_getter(PDFDoc *self, void *closure) { PDFDoc_pages_getter(PDFDoc *self, void *closure) {
int pages = self->doc->GetPageCount(); unsigned long pages = self->doc->GetPages().GetCount();
PyObject *ans = PyLong_FromLong(static_cast<long>(pages)); PyObject *ans = PyLong_FromUnsignedLong(pages);
if (ans != NULL) Py_INCREF(ans); if (ans != NULL) Py_INCREF(ans);
return ans; return ans;
} }
static PyObject * static PyObject *
PDFDoc_version_getter(PDFDoc *self, void *closure) { PDFDoc_version_getter(PDFDoc *self, void *closure) {
int version; PdfVersion version;
try { try {
version = self->doc->GetPdfVersion(); version = self->doc->GetMetadata().GetPdfVersion();
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); podofo_set_exception(err);
return NULL; return NULL;
} }
switch(version) { switch(version) {
case ePdfVersion_1_0: case PdfVersion::V1_0:
return Py_BuildValue("s", "1.0"); return PyUnicode_FromString("1.0");
case ePdfVersion_1_1: case PdfVersion::V1_1:
return Py_BuildValue("s", "1.1"); return PyUnicode_FromString("1.1");
case ePdfVersion_1_2: case PdfVersion::V1_2:
return Py_BuildValue("s", "1.2"); return PyUnicode_FromString("1.2");
case ePdfVersion_1_3: case PdfVersion::V1_3:
return Py_BuildValue("s", "1.3"); return PyUnicode_FromString("1.3");
case ePdfVersion_1_4: case PdfVersion::V1_4:
return Py_BuildValue("s", "1.4"); return PyUnicode_FromString("1.4");
case ePdfVersion_1_5: case PdfVersion::V1_5:
return Py_BuildValue("s", "1.5"); return PyUnicode_FromString("1.5");
case ePdfVersion_1_6: case PdfVersion::V1_6:
return Py_BuildValue("s", "1.6"); return PyUnicode_FromString("1.6");
case ePdfVersion_1_7: case PdfVersion::V1_7:
return Py_BuildValue("s", "1.7"); return PyUnicode_FromString("1.7");
default: case PdfVersion::V2_0:
return Py_BuildValue(""); return PyUnicode_FromString("2.0");
case PdfVersion::Unknown:
return PyUnicode_FromString("");
} }
return Py_BuildValue(""); return PyUnicode_FromString("");
} }
static inline PyObject*
static PyObject * string_metadata_getter(const nullable<PdfString>& t) {
PDFDoc_getter(PDFDoc *self, int field) if (t.has_value()) return podofo_convert_pdfstring(t.value());
{ return PyUnicode_FromString("");
PdfString s;
PdfInfo *info = self->doc->GetInfo();
if (info == NULL) {
PyErr_SetString(PyExc_Exception, "You must first load a PDF Document");
return NULL;
}
switch (field) {
case 0:
s = info->GetTitle(); break;
case 1:
s = info->GetAuthor(); break;
case 2:
s = info->GetSubject(); break;
case 3:
s = info->GetKeywords(); break;
case 4:
s = info->GetCreator(); break;
case 5:
s = info->GetProducer(); break;
default:
PyErr_SetString(PyExc_Exception, "Bad field");
return NULL;
}
return podofo_convert_pdfstring(s);
}
static int
PDFDoc_setter(PDFDoc *self, PyObject *val, int field) {
if (val == NULL || !PyUnicode_Check(val)) {
PyErr_SetString(PyExc_ValueError, "Must use unicode objects to set metadata");
return -1;
}
PdfInfo *info = self->doc->GetInfo();
if (!info) { PyErr_SetString(Error, "You must first load a PDF Document"); return -1; }
const PdfString s = podofo_convert_pystring(val);
switch (field) {
case 0:
info->SetTitle(s); break;
case 1:
info->SetAuthor(s); break;
case 2:
info->SetSubject(s); break;
case 3:
info->SetKeywords(s); break;
case 4:
info->SetCreator(s); break;
case 5:
info->SetProducer(s); break;
default:
PyErr_SetString(Error, "Bad field");
return -1;
}
return 0;
} }
static PyObject * static PyObject *
PDFDoc_title_getter(PDFDoc *self, void *closure) { PDFDoc_title_getter(PDFDoc *self, void *closure) {
return PDFDoc_getter(self, 0); return string_metadata_getter(self->doc->GetMetadata().GetTitle());
} }
static PyObject * static PyObject *
PDFDoc_author_getter(PDFDoc *self, void *closure) { PDFDoc_author_getter(PDFDoc *self, void *closure) {
return PDFDoc_getter(self, 1); return string_metadata_getter(self->doc->GetMetadata().GetAuthor());
} }
static PyObject * static PyObject *
PDFDoc_subject_getter(PDFDoc *self, void *closure) { PDFDoc_subject_getter(PDFDoc *self, void *closure) {
return PDFDoc_getter(self, 2); return string_metadata_getter(self->doc->GetMetadata().GetSubject());
} }
static PyObject * static PyObject *
PDFDoc_keywords_getter(PDFDoc *self, void *closure) { PDFDoc_keywords_getter(PDFDoc *self, void *closure) {
return PDFDoc_getter(self, 3); auto kw = self->doc->GetMetadata().GetKeywords();
pyunique_ptr ans(PyTuple_New(kw.size()));
if (!ans) return NULL;
for (size_t i = 0; i < kw.size(); i++) {
pyunique_ptr t(PyUnicode_FromString(kw[i].c_str()));
if (!t) return NULL;
PyTuple_SET_ITEM(ans.get(), i, t.release());
}
return ans.release();
} }
static PyObject * static PyObject *
PDFDoc_creator_getter(PDFDoc *self, void *closure) { PDFDoc_creator_getter(PDFDoc *self, void *closure) {
return PDFDoc_getter(self, 4); return string_metadata_getter(self->doc->GetMetadata().GetCreator());
} }
static PyObject * static PyObject *
PDFDoc_producer_getter(PDFDoc *self, void *closure) { PDFDoc_producer_getter(PDFDoc *self, void *closure) {
return PDFDoc_getter(self, 5); return string_metadata_getter(self->doc->GetMetadata().GetProducer());
} }
static int static int
PDFDoc_title_setter(PDFDoc *self, PyObject *val, void *closure) { PDFDoc_title_setter(PDFDoc *self, PyObject *val, void *closure) {
return PDFDoc_setter(self, val, 0); if (!PyUnicode_Check(val)) { PyErr_SetString(PyExc_TypeError, "Must use unicode to set metadata"); return -1; }
self->doc->GetMetadata().SetTitle(podofo_convert_pystring(val));
return 0;
} }
static int static int
PDFDoc_author_setter(PDFDoc *self, PyObject *val, void *closure) { PDFDoc_author_setter(PDFDoc *self, PyObject *val, void *closure) {
return PDFDoc_setter(self, val, 1); if (!PyUnicode_Check(val)) { PyErr_SetString(PyExc_TypeError, "Must use unicode to set metadata"); return -1; }
self->doc->GetMetadata().SetAuthor(podofo_convert_pystring(val));
return 0;
} }
static int static int
PDFDoc_subject_setter(PDFDoc *self, PyObject *val, void *closure) { PDFDoc_subject_setter(PDFDoc *self, PyObject *val, void *closure) {
return PDFDoc_setter(self, val, 2); if (!PyUnicode_Check(val)) { PyErr_SetString(PyExc_TypeError, "Must use unicode to set metadata"); return -1; }
self->doc->GetMetadata().SetSubject(podofo_convert_pystring(val));
return 0;
} }
static int static int
PDFDoc_keywords_setter(PDFDoc *self, PyObject *val, void *closure) { PDFDoc_keywords_setter(PDFDoc *self, PyObject *val, void *closure) {
return PDFDoc_setter(self, val, 3); pyunique_ptr f(PySequence_Fast(val, "Need a sequence to set keywords"));
if (!f) return -1;
std::vector<std::string> keywords(PySequence_Fast_GET_SIZE(f.get()));
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(f.get()); i++) {
PyObject *x = PySequence_Fast_GET_ITEM(f.get(), i);
if (!PyUnicode_Check(x)) { PyErr_SetString(PyExc_TypeError, "keywords sequence must contain only unicode objects"); return -1; }
keywords.emplace_back(podofo_convert_pystring(x));
}
self->doc->GetMetadata().SetKeywords(keywords);
return 0;
} }
static int static int
PDFDoc_creator_setter(PDFDoc *self, PyObject *val, void *closure) { PDFDoc_creator_setter(PDFDoc *self, PyObject *val, void *closure) {
return PDFDoc_setter(self, val, 4); if (!PyUnicode_Check(val)) { PyErr_SetString(PyExc_TypeError, "Must use unicode to set metadata"); return -1; }
self->doc->GetMetadata().SetCreator(podofo_convert_pystring(val));
return 0;
} }
static int static int
PDFDoc_producer_setter(PDFDoc *self, PyObject *val, void *closure) { PDFDoc_producer_setter(PDFDoc *self, PyObject *val, void *closure) {
return PDFDoc_setter(self, val, 5); if (!PyUnicode_Check(val)) { PyErr_SetString(PyExc_TypeError, "Must use unicode to set metadata"); return -1; }
self->doc->GetMetadata().SetProducer(podofo_convert_pystring(val));
return 0;
} }
static PyGetSetDef PDFDoc_getsetters[] = { static PyGetSetDef PDFDoc_getsetters[] = {

View File

@ -7,6 +7,7 @@
#include "global.h" #include "global.h"
#include <iostream> #include <iostream>
#include <memory>
#include <stack> #include <stack>
using namespace pdf; using namespace pdf;
@ -18,47 +19,61 @@ ref_as_tuple(const PdfReference &ref) {
} }
static inline PdfObject* static inline PdfObject*
get_font_file(const PdfObject *descriptor) { get_font_file(PdfObject *descriptor) {
PdfObject *ff = descriptor->GetIndirectKey("FontFile"); PdfDictionary *dict;
if (!ff) ff = descriptor->GetIndirectKey("FontFile2"); PdfObject *ff = NULL;
if (!ff) ff = descriptor->GetIndirectKey("FontFile3"); if (descriptor->TryGetDictionary(dict)) {
ff = dict->FindKey("FontFile");
if (!ff) ff = dict->FindKey("FontFile2");
if (!ff) ff = dict->FindKey("FontFile3");
}
return ff; return ff;
} }
static inline void static inline const PdfObject*
remove_font(PdfVecObjects &objects, PdfObject *font) { get_font_file(const PdfObject *descriptor) {
PdfObject *descriptor = font->GetIndirectKey("FontDescriptor"); const PdfDictionary *dict;
if (descriptor) { const PdfObject *ff = NULL;
const PdfObject *ff = get_font_file(descriptor); if (descriptor->TryGetDictionary(dict)) {
if (ff) delete objects.RemoveObject(ff->Reference()); ff = dict->FindKey("FontFile");
delete objects.RemoveObject(descriptor->Reference()); if (!ff) ff = dict->FindKey("FontFile2");
if (!ff) ff = dict->FindKey("FontFile3");
} }
delete objects.RemoveObject(font->Reference()); return ff;
} }
static inline uint64_t
ref_as_integer(pdf_objnum num, pdf_gennum gen) {
return static_cast<uint64_t>(num) | (static_cast<uint64_t>(gen) << 32);
}
static inline uint64_t static inline void
ref_as_integer(const PdfReference &ref) { return ref_as_integer(ref.ObjectNumber(), ref.GenerationNumber()); } remove_font(PdfIndirectObjectList &objects, PdfObject *font) {
PdfDictionary *dict;
if (font->TryGetDictionary(dict)) {
PdfObject *descriptor = dict->FindKey("FontDescriptor");
if (descriptor) {
const PdfObject *ff = get_font_file(descriptor);
if (ff) objects.RemoveObject(ff->GetReference()).reset();
objects.RemoveObject(descriptor->GetReference()).reset();
}
}
objects.RemoveObject(font->GetReference()).reset();
}
static void static void
used_fonts_in_canvas(PdfCanvas *canvas, unordered_reference_set &ans) { used_fonts_in_canvas(const PdfCanvas &canvas, unordered_reference_set &ans) {
PdfContentsTokenizer tokenizer(canvas); PdfPostScriptTokenizer tokenizer;
PdfCanvasInputDevice input(canvas);
bool in_text_block = false; bool in_text_block = false;
const char* token = NULL; PdfPostScriptTokenType contents_type;
EPdfContentsType contents_type;
PdfVariant var; PdfVariant var;
std::stack<PdfVariant> stack; std::stack<PdfVariant> stack;
const PdfDictionary &resources = canvas->GetResources()->GetDictionary(); const PdfDictionary &resources = canvas.GetResources()->GetDictionary();
if (!resources.HasKey("Font")) return; if (!resources.HasKey("Font")) return;
const PdfDictionary &fonts_dict = resources.GetKey("Font")->GetDictionary(); const PdfDictionary &fonts_dict = resources.GetKey("Font")->GetDictionary();
std::string_view keyword;
while (tokenizer.ReadNext(contents_type, token, var)) { while (tokenizer.TryReadNext(input, contents_type, keyword, var)) {
if (contents_type == ePdfContentsType_Variant) stack.push(var); if (contents_type == PdfPostScriptTokenType::Variant) stack.push(var);
if (contents_type != ePdfContentsType_Keyword) continue; if (contents_type != PdfPostScriptTokenType::Keyword) continue;
const char *token = keyword.data();
if (strcmp(token, "BT") == 0) { if (strcmp(token, "BT") == 0) {
in_text_block = true; in_text_block = true;
continue; continue;
@ -88,10 +103,10 @@ convert_w_array(const PdfArray &w) {
pyunique_ptr item; pyunique_ptr item;
if ((*it).IsArray()) { if ((*it).IsArray()) {
item.reset(convert_w_array((*it).GetArray())); item.reset(convert_w_array((*it).GetArray()));
} else if ((*it).IsRealStrict()) {
item.reset(PyFloat_FromDouble((*it).GetReal()));
} else if ((*it).IsNumber()) { } else if ((*it).IsNumber()) {
item.reset(PyLong_FromLongLong((long long)(*it).GetNumber())); item.reset(PyLong_FromLongLong((long long)(*it).GetNumber()));
} else if ((*it).IsReal()) {
item.reset(PyFloat_FromDouble((*it).GetReal()));
} else PyErr_SetString(PyExc_ValueError, "Unknown datatype in w array"); } else PyErr_SetString(PyExc_ValueError, "Unknown datatype in w array");
if (!item) return NULL; if (!item) return NULL;
if (PyList_Append(ans.get(), item.get()) != 0) return NULL; if (PyList_Append(ans.get(), item.get()) != 0) return NULL;
@ -105,16 +120,16 @@ list_fonts(PDFDoc *self, PyObject *args) {
if (!PyArg_ParseTuple(args, "|i", &get_font_data)) return NULL; if (!PyArg_ParseTuple(args, "|i", &get_font_data)) return NULL;
pyunique_ptr ans(PyList_New(0)); pyunique_ptr ans(PyList_New(0));
if (!ans) return NULL; if (!ans) return NULL;
const PdfVecObjects &objects = self->doc->GetObjects(); const PdfIndirectObjectList &objects = self->doc->GetObjects();
for (auto &it : objects) { for (auto &it : objects) {
if (it->IsDictionary()) { if (it->IsDictionary()) {
const PdfDictionary &dict = it->GetDictionary(); const PdfDictionary &dict = it->GetDictionary();
if (dictionary_has_key_name(dict, PdfName::KeyType, "Font") && dict.HasKey("BaseFont")) { if (dictionary_has_key_name(dict, PdfName::KeyType, "Font") && dict.HasKey("BaseFont")) {
const std::string &name = dict.GetKey("BaseFont")->GetName().GetName(); const std::string &name = dict.GetKey("BaseFont")->GetName().GetString();
const std::string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetName(); const std::string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetString();
const PdfReference &ref = it->Reference(); const PdfReference &ref = it->GetReference();
unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber(); unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber();
const PdfObject *descriptor = it->GetIndirectKey("FontDescriptor"); const PdfObject *descriptor = dict.FindKey("FontDescriptor");
pyunique_ptr descendant_font, stream_ref, encoding, w, w2; pyunique_ptr descendant_font, stream_ref, encoding, w, w2;
PyBytesOutputStream stream_data, to_unicode, cid_gid_map; PyBytesOutputStream stream_data, to_unicode, cid_gid_map;
if (dict.HasKey("W")) { if (dict.HasKey("W")) {
@ -126,21 +141,21 @@ list_fonts(PDFDoc *self, PyObject *args) {
if (!w2) return NULL; if (!w2) return NULL;
} }
if (dict.HasKey("Encoding") && dict.GetKey("Encoding")->IsName()) { if (dict.HasKey("Encoding") && dict.GetKey("Encoding")->IsName()) {
encoding.reset(PyUnicode_FromString(dict.GetKey("Encoding")->GetName().GetName().c_str())); encoding.reset(PyUnicode_FromString(dict.GetKey("Encoding")->GetName().GetString().c_str()));
if (!encoding) return NULL; if (!encoding) return NULL;
} }
if (dict.HasKey("CIDToGIDMap") && (!dict.GetKey("CIDToGIDMap")->IsName() || strcmp(dict.GetKey("CIDToGIDMap")->GetName().GetName().c_str(), "Identity") != 0)) { if (dict.HasKey("CIDToGIDMap") && (!dict.GetKey("CIDToGIDMap")->IsName() || strcmp(dict.GetKey("CIDToGIDMap")->GetName().GetString().c_str(), "Identity") != 0)) {
const PdfStream *stream = dict.GetKey("CIDToGIDMap")->GetStream(); const PdfObjectStream *stream = dict.GetKey("CIDToGIDMap")->GetStream();
if (stream) stream->GetFilteredCopy(&cid_gid_map); if (stream) stream->CopyToSafe(cid_gid_map);
} }
if (descriptor) { if (descriptor) {
const PdfObject *ff = get_font_file(descriptor); const PdfObject *ff = get_font_file(descriptor);
if (ff) { if (ff) {
stream_ref.reset(ref_as_tuple(ff->Reference())); stream_ref.reset(ref_as_tuple(ff->GetReference()));
if (!stream_ref) return NULL; if (!stream_ref) return NULL;
const PdfStream *stream = ff->GetStream(); const PdfObjectStream *stream = ff->GetStream();
if (stream && get_font_data) { if (stream && get_font_data) {
stream->GetFilteredCopy(&stream_data); stream->CopyToSafe(stream_data);
} }
} }
} else if (dict.HasKey("DescendantFonts")) { } else if (dict.HasKey("DescendantFonts")) {
@ -151,8 +166,8 @@ list_fonts(PDFDoc *self, PyObject *args) {
const PdfReference &uref = dict.GetKey("ToUnicode")->GetReference(); const PdfReference &uref = dict.GetKey("ToUnicode")->GetReference();
PdfObject *t = objects.GetObject(uref); PdfObject *t = objects.GetObject(uref);
if (t) { if (t) {
PdfStream *stream = t->GetStream(); PdfObjectStream *stream = t->GetStream();
if (stream) stream->GetFilteredCopy(&to_unicode); if (stream) stream->CopyToSafe(to_unicode);
} }
} }
} }
@ -186,18 +201,18 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) {
unsigned long count = 0; unsigned long count = 0;
unordered_reference_set used_fonts; unordered_reference_set used_fonts;
// Look in Pages // Look in Pages
for (int i = 0; i < self->doc->GetPageCount(); i++) { PdfPageCollection *pages = &self->doc->GetPages();
PdfPage *page = self->doc->GetPage(i); for (unsigned i = 0; i < pages->GetCount(); i++) {
if (page) used_fonts_in_canvas(page, used_fonts); used_fonts_in_canvas(self->doc->GetPages().GetPageAt(i), used_fonts);
} }
// Look in XObjects // Look in XObjects
PdfVecObjects &objects = self->doc->GetObjects(); PdfIndirectObjectList &objects = self->doc->GetObjects();
for (auto &k : objects) { for (PdfObject *k : objects) {
if (k->IsDictionary()) { if (k->IsDictionary()) {
const PdfDictionary &dict = k->GetDictionary(); const PdfDictionary &dict = k->GetDictionary();
if (dictionary_has_key_name(dict, PdfName::KeyType, "XObject") && dictionary_has_key_name(dict, PdfName::KeySubtype, "Form")) { if (dictionary_has_key_name(dict, PdfName::KeyType, "XObject") && dictionary_has_key_name(dict, PdfName::KeySubtype, "Form")) {
PdfXObject xo(k); std::unique_ptr<PdfXObjectForm> xo;
used_fonts_in_canvas(&xo, used_fonts); if (PdfXObject::TryCreateFromObject<PdfXObjectForm>(*k, xo)) used_fonts_in_canvas(*xo, used_fonts);
} }
} }
} }
@ -208,14 +223,14 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) {
if (k->IsDictionary()) { if (k->IsDictionary()) {
const PdfDictionary &dict = k->GetDictionary(); const PdfDictionary &dict = k->GetDictionary();
if (dictionary_has_key_name(dict, PdfName::KeyType, "Font")) { if (dictionary_has_key_name(dict, PdfName::KeyType, "Font")) {
const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetName(); const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetString();
if (font_type == "Type0") { if (font_type == "Type0") {
all_fonts.insert(k->Reference()); all_fonts.insert(k->GetReference());
} else if (font_type == "Type3") { } else if (font_type == "Type3") {
all_fonts.insert(k->Reference()); all_fonts.insert(k->GetReference());
type3_fonts.insert(k->Reference()); type3_fonts.insert(k->GetReference());
for (auto &x : dict.GetKey("CharProcs")->GetDictionary().GetKeys()) { for (auto &x : dict.GetKey("CharProcs")->GetDictionary()) {
const PdfReference &ref = x.second->GetReference(); const PdfReference &ref = x.second.GetReference();
if (charprocs_usage.find(ref) == charprocs_usage.end()) charprocs_usage[ref] = 1; if (charprocs_usage.find(ref) == charprocs_usage.end()) charprocs_usage[ref] = 1;
else charprocs_usage[ref] += 1; else charprocs_usage[ref] += 1;
} }
@ -229,16 +244,18 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) {
PdfObject *font = objects.GetObject(ref); PdfObject *font = objects.GetObject(ref);
if (font) { if (font) {
count++; count++;
PdfDictionary *dict;
if (font->TryGetDictionary(dict)) {
if (type3_fonts.find(ref) != type3_fonts.end()) { if (type3_fonts.find(ref) != type3_fonts.end()) {
for (auto &x : font->GetIndirectKey("CharProcs")->GetDictionary().GetKeys()) { for (auto &x : dict->FindKey("CharProcs")->GetDictionary()) {
charprocs_usage[x.second->GetReference()] -= 1; charprocs_usage[x.second.GetReference()] -= 1;
} }
} else { } else {
for (auto &x : font->GetIndirectKey("DescendantFonts")->GetArray()) { for (auto &x : dict->FindKey("DescendantFonts")->GetArray()) {
PdfObject *dfont = objects.GetObject(x.GetReference()); PdfObject *dfont = objects.GetObject(x.GetReference());
if (dfont) remove_font(objects, dfont); if (dfont) remove_font(objects, dfont);
} }
} }}
remove_font(objects, font); remove_font(objects, font);
} }
} }
@ -246,7 +263,7 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) {
for (auto &x : charprocs_usage) { for (auto &x : charprocs_usage) {
if (x.second == 0u) { if (x.second == 0u) {
delete objects.RemoveObject(x.first); objects.RemoveObject(x.first).reset();
} }
} }
@ -258,14 +275,16 @@ replace_font_data(PDFDoc *self, PyObject *args) {
const char *data; Py_ssize_t sz; const char *data; Py_ssize_t sz;
unsigned long num, gen; unsigned long num, gen;
if (!PyArg_ParseTuple(args, "y#kk", &data, &sz, &num, &gen)) return NULL; if (!PyArg_ParseTuple(args, "y#kk", &data, &sz, &num, &gen)) return NULL;
const PdfVecObjects &objects = self->doc->GetObjects(); const PdfIndirectObjectList &objects = self->doc->GetObjects();
PdfObject *font = objects.GetObject(PdfReference(num, static_cast<pdf_gennum>(gen))); PdfObject *font = objects.GetObject(PdfReference(num, static_cast<uint16_t>(gen)));
if (!font) { PyErr_SetString(PyExc_KeyError, "No font with the specified reference found"); return NULL; } if (!font) { PyErr_SetString(PyExc_KeyError, "No font with the specified reference found"); return NULL; }
const PdfObject *descriptor = font->GetIndirectKey("FontDescriptor"); PdfDictionary *dict;
if (!font->TryGetDictionary(dict)) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; }
PdfObject *descriptor = dict->FindKey("FontDescriptor");
if (!descriptor) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; } if (!descriptor) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; }
PdfObject *ff = get_font_file(descriptor); PdfObject *ff = get_font_file(descriptor);
PdfStream *stream = ff->GetStream(); PdfObjectStream *stream = ff->GetStream();
stream->Set(data, sz); stream->SetData(bufferview(data, sz));
Py_RETURN_NONE; Py_RETURN_NONE;
} }
@ -274,60 +293,61 @@ merge_fonts(PDFDoc *self, PyObject *args) {
const char *data; Py_ssize_t sz; const char *data; Py_ssize_t sz;
PyObject *references; PyObject *references;
if (!PyArg_ParseTuple(args, "y#O!", &data, &sz, &PyTuple_Type, &references)) return NULL; if (!PyArg_ParseTuple(args, "y#O!", &data, &sz, &PyTuple_Type, &references)) return NULL;
PdfVecObjects &objects = self->doc->GetObjects(); PdfIndirectObjectList &objects = self->doc->GetObjects();
PdfObject *font_file = NULL; PdfObject *font_file = NULL;
PdfDictionary *dict;
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(references); i++) { for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(references); i++) {
unsigned long num, gen; unsigned long num, gen;
if (!PyArg_ParseTuple(PyTuple_GET_ITEM(references, i), "kk", &num, &gen)) return NULL; if (!PyArg_ParseTuple(PyTuple_GET_ITEM(references, i), "kk", &num, &gen)) return NULL;
PdfObject *font = objects.GetObject(PdfReference(num, static_cast<pdf_gennum>(gen))); PdfObject *font = objects.GetObject(PdfReference(num, static_cast<uint16_t>(gen)));
if (!font) { PyErr_SetString(PyExc_KeyError, "No font with the specified reference found"); return NULL; } if (!font) { PyErr_SetString(PyExc_KeyError, "No font with the specified reference found"); return NULL; }
PdfObject *dobj = font->GetIndirectKey("FontDescriptor");
PdfObject *dobj = NULL;
if (font->TryGetDictionary(dict)) { dobj = dict->FindKey("FontDescriptor"); }
if (!dobj) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; } if (!dobj) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; }
if (!dobj->IsDictionary()) { PyErr_SetString(PyExc_ValueError, "Font does not have a dictionary descriptor"); return NULL; } if (!dobj->IsDictionary()) { PyErr_SetString(PyExc_ValueError, "Font does not have a dictionary descriptor"); return NULL; }
PdfDictionary &descriptor = dobj->GetDictionary(); PdfDictionary &descriptor = dobj->GetDictionary();
const char *font_file_key = NULL; const char *font_file_key = NULL;
if (descriptor.HasKey("FontFile")) font_file_key = "FontFile"; PdfObject *ff = NULL;
else if (descriptor.HasKey("FontFile2")) font_file_key = "FontFile2"; if ((ff = descriptor.FindKey("FontFile"))) { font_file_key = "FontFile"; }
else if (descriptor.HasKey("FontFile3")) font_file_key = "FontFile3"; else if ((ff = descriptor.FindKey("FontFile2"))) { font_file_key = "FontFile2"; }
else { PyErr_SetString(PyExc_ValueError, "Font descriptor does not have file data"); return NULL; } else if ((ff = descriptor.FindKey("FontFile3"))) { font_file_key = "FontFile3"; }
PdfObject *ff = dobj->GetIndirectKey(font_file_key); else { PyErr_SetString(PyExc_ValueError, "Font descriptor does not have file data"); return NULL; }
if (i == 0) { if (i == 0) {
font_file = ff; font_file = ff;
PdfStream *stream = ff->GetStream(); PdfObjectStream *stream = ff->GetStream();
stream->Set(data, sz); stream->SetData(bufferview(data, sz));
} else { } else {
delete objects.RemoveObject(ff->Reference()); objects.RemoveObject(ff->GetReference()).reset();
descriptor.AddKey(font_file_key, font_file->Reference()); descriptor.AddKey(font_file_key, font_file->GetReference());
} }
} }
Py_RETURN_NONE; Py_RETURN_NONE;
} }
class CharProc { class CharProc {
char *buf; pdf_long sz; charbuff buf;
PdfReference ref; PdfReference ref;
CharProc( const CharProc & ) ; CharProc( const CharProc & ) ;
CharProc & operator=( const CharProc & ) ; CharProc & operator=( const CharProc & ) ;
public: public:
CharProc(const PdfReference &reference, const PdfObject *o) : buf(NULL), sz(0), ref(reference) { CharProc(const PdfReference &reference, const PdfObject *o) : buf(), ref(reference) {
const PdfStream *stream = o->GetStream(); const PdfObjectStream *stream = o->GetStream();
stream->GetFilteredCopy(&buf, &sz); buf = stream->GetCopySafe();
} }
CharProc(CharProc &&other) noexcept : CharProc(CharProc &&other) noexcept :
buf(other.buf), sz(other.sz), ref(other.ref) { buf(std::move(other.buf)), ref(other.ref) {
other.buf = NULL; other.buf = charbuff();
} }
CharProc& operator=(CharProc &&other) noexcept { CharProc& operator=(CharProc &&other) noexcept {
if (buf) podofo_free(buf); buf = std::move(other.buf); other.buf = charbuff(); ref = other.ref;
buf = other.buf; other.buf = NULL; sz = other.sz; ref = other.ref;
return *this; return *this;
} }
~CharProc() noexcept { if (buf) podofo_free(buf); buf = NULL; }
bool operator==(const CharProc &other) const noexcept { bool operator==(const CharProc &other) const noexcept {
return other.sz == sz && memcmp(buf, other.buf, sz) == 0; return buf.size() == other.buf.size() && memcmp(buf.data(), other.buf.data(), buf.size()) == 0;
} }
std::size_t hash() const noexcept { return sz; } std::size_t hash() const noexcept { return buf.size(); }
const PdfReference& reference() const noexcept { return ref; } const PdfReference& reference() const noexcept { return ref; }
}; };
@ -344,16 +364,16 @@ dedup_type3_fonts(PDFDoc *self, PyObject *args) {
unordered_reference_set all_type3_fonts; unordered_reference_set all_type3_fonts;
char_proc_reference_map cp_map; char_proc_reference_map cp_map;
PdfVecObjects &objects = self->doc->GetObjects(); PdfIndirectObjectList &objects = self->doc->GetObjects();
for (auto &k : objects) { for (auto &k : objects) {
if (!k->IsDictionary()) continue; if (!k->IsDictionary()) continue;
const PdfDictionary &dict = k->GetDictionary(); const PdfDictionary &dict = k->GetDictionary();
if (dictionary_has_key_name(dict, PdfName::KeyType, "Font")) { if (dictionary_has_key_name(dict, PdfName::KeyType, "Font")) {
const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetName(); const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetString();
if (font_type == "Type3") { if (font_type == "Type3") {
all_type3_fonts.insert(k->Reference()); all_type3_fonts.insert(k->GetReference());
for (auto &x : dict.GetKey("CharProcs")->GetDictionary().GetKeys()) { for (auto &x : dict.GetKey("CharProcs")->GetDictionary()) {
const PdfReference &ref = x.second->GetReference(); const PdfReference &ref = x.second.GetReference();
const PdfObject *cpobj = objects.GetObject(ref); const PdfObject *cpobj = objects.GetObject(ref);
if (!cpobj || !cpobj->HasStream()) continue; if (!cpobj || !cpobj->HasStream()) continue;
CharProc cp(ref, cpobj); CharProc cp(ref, cpobj);
@ -373,7 +393,7 @@ dedup_type3_fonts(PDFDoc *self, PyObject *args) {
for (auto &ref : x.second) { for (auto &ref : x.second) {
if (ref != canonical_ref) { if (ref != canonical_ref) {
ref_map[ref] = x.first.reference(); ref_map[ref] = x.first.reference();
delete objects.RemoveObject(ref); objects.RemoveObject(ref).reset();
count++; count++;
} }
} }
@ -382,11 +402,13 @@ dedup_type3_fonts(PDFDoc *self, PyObject *args) {
if (count > 0) { if (count > 0) {
for (auto &ref : all_type3_fonts) { for (auto &ref : all_type3_fonts) {
PdfObject *font = objects.GetObject(ref); PdfObject *font = objects.GetObject(ref);
PdfDictionary dict = font->GetIndirectKey("CharProcs")->GetDictionary(); PdfDictionary *d;
if (!font->TryGetDictionary(d)) continue;
PdfDictionary dict = d->FindKey("CharProcs")->GetDictionary();
PdfDictionary new_dict = PdfDictionary(dict); PdfDictionary new_dict = PdfDictionary(dict);
bool changed = false; bool changed = false;
for (auto &k : dict.GetKeys()) { for (auto &k : dict) {
auto it = ref_map.find(k.second->GetReference()); auto it = ref_map.find(k.second.GetReference());
if (it != ref_map.end()) { if (it != ref_map.end()) {
new_dict.AddKey(k.first, (*it).second); new_dict.AddKey(k.first, (*it).second);
changed = true; changed = true;

View File

@ -15,6 +15,7 @@
#include <unordered_set> #include <unordered_set>
#include <unordered_map> #include <unordered_map>
using namespace PoDoFo; using namespace PoDoFo;
using namespace std::literals;
namespace pdf { namespace pdf {
@ -52,7 +53,7 @@ struct PyObjectDeleter {
// unique_ptr that uses Py_XDECREF as the destructor function. // unique_ptr that uses Py_XDECREF as the destructor function.
typedef std::unique_ptr<PyObject, PyObjectDeleter> pyunique_ptr; typedef std::unique_ptr<PyObject, PyObjectDeleter> pyunique_ptr;
class PyBytesOutputStream : public PdfOutputStream { class PyBytesOutputStream : public OutputStream {
private: private:
pyunique_ptr bytes; pyunique_ptr bytes;
PyBytesOutputStream( const PyBytesOutputStream & ) ; PyBytesOutputStream( const PyBytesOutputStream & ) ;
@ -62,18 +63,18 @@ class PyBytesOutputStream : public PdfOutputStream {
void Close() {} void Close() {}
operator bool() const { return bool(bytes); } operator bool() const { return bool(bytes); }
PyObject* get() const { return bytes.get(); } PyObject* get() const { return bytes.get(); }
pdf_long Write(const char *buf, const pdf_long sz){ protected:
void writeBuffer(const char *buf, size_t sz){
if (!bytes) { if (!bytes) {
bytes.reset(PyBytes_FromStringAndSize(buf, sz)); bytes.reset(PyBytes_FromStringAndSize(buf, sz));
if (!bytes) throw PdfError(ePdfError_OutOfMemory, __FILE__, __LINE__, NULL); if (!bytes) throw PdfError(PdfErrorCode::OutOfMemory, __FILE__, __LINE__, NULL);
} else { } else {
size_t old_sz = PyBytes_GET_SIZE(bytes.get()); size_t old_sz = PyBytes_GET_SIZE(bytes.get());
PyObject *old = bytes.release(); PyObject *old = bytes.release();
if (_PyBytes_Resize(&old, old_sz + sz) != 0) throw PdfError(ePdfError_OutOfMemory, __FILE__, __LINE__, NULL); if (_PyBytes_Resize(&old, old_sz + sz) != 0) throw PdfError(PdfErrorCode::OutOfMemory, __FILE__, __LINE__, NULL);
memcpy(PyBytes_AS_STRING(old) + old_sz, buf, sz); memcpy(PyBytes_AS_STRING(old) + old_sz, buf, sz);
bytes.reset(old); bytes.reset(old);
} }
return sz;
} }
}; };
@ -82,10 +83,44 @@ template<typename T>
static inline bool static inline bool
dictionary_has_key_name(const PdfDictionary &d, T key, const char *name) { dictionary_has_key_name(const PdfDictionary &d, T key, const char *name) {
const PdfObject *val = d.GetKey(key); const PdfObject *val = d.GetKey(key);
if (val && val->IsName() && val->GetName().GetName() == name) return true; if (val && val->IsName() && val->GetName().GetString() == name) return true;
return false; return false;
} }
static inline const PdfPage*
get_page(const PdfPageCollection &pages, const PdfReference &ref) {
try {
return &pages.GetPage(ref);
} catch(PdfError &) { }
return nullptr;
}
static inline const PdfPage*
get_page(const PdfDocument *doc, const PdfReference &ref) {
try {
return &doc->GetPages().GetPage(ref);
} catch(PdfError &) { }
return nullptr;
}
static inline const PdfPage*
get_page(const PdfDocument *doc, const unsigned num) {
try {
return &doc->GetPages().GetPageAt(num);
} catch(PdfError &) { }
return nullptr;
}
static inline PdfPage*
get_page(PdfDocument *doc, const unsigned num) {
try {
return &doc->GetPages().GetPageAt(num);
} catch(PdfError &) { }
return nullptr;
}
class PdfReferenceHasher { class PdfReferenceHasher {
public: public:
size_t operator()(const PdfReference & obj) const { size_t operator()(const PdfReference & obj) const {

View File

@ -10,39 +10,40 @@
using namespace pdf; using namespace pdf;
class Image { class Image {
char *buf; pdf_long sz; charbuff buf;
pdf_int64 width, height; int64_t width, height;
PdfReference ref; PdfReference ref;
Image( const Image & ) ; Image( const Image & ) ;
Image & operator=( const Image & ) ; Image & operator=( const Image & ) ;
bool is_valid;
public: public:
Image(const PdfReference &reference, const PdfObject *o) : buf(NULL), sz(0), width(0), height(0), ref(reference) { Image(const PdfReference &reference, const PdfObject *o) : buf(), width(0), height(0), ref(reference) {
const PdfStream *stream = o->GetStream(); const PdfObjectStream *stream = o->GetStream();
try { try {
stream->GetFilteredCopy(&buf, &sz); buf = stream->GetCopySafe();
is_valid = true;
} catch(...) { } catch(...) {
buf = NULL; sz = -1; buf = charbuff();
is_valid = false;
} }
const PdfDictionary &dict = o->GetDictionary(); const PdfDictionary &dict = o->GetDictionary();
if (dict.HasKey("Width") && dict.GetKey("Width")->IsNumber()) width = dict.GetKey("Width")->GetNumber(); if (dict.HasKey("Width") && dict.GetKey("Width")->IsNumber()) width = dict.GetKey("Width")->GetNumber();
if (dict.HasKey("Height") && dict.GetKey("Height")->IsNumber()) height = dict.GetKey("Height")->GetNumber(); if (dict.HasKey("Height") && dict.GetKey("Height")->IsNumber()) height = dict.GetKey("Height")->GetNumber();
} }
Image(Image &&other) noexcept : Image(Image &&other) noexcept :
buf(other.buf), sz(other.sz), width(other.width), height(other.height), ref(other.ref) { buf(std::move(other.buf)), width(other.width), height(other.height), ref(other.ref) {
other.buf = NULL; other.buf = charbuff(); is_valid = other.is_valid;
} }
Image& operator=(Image &&other) noexcept { Image& operator=(Image &&other) noexcept {
if (buf) podofo_free(buf); buf = std::move(other.buf); other.buf = charbuff(); ref = other.ref;
buf = other.buf; other.buf = NULL; sz = other.sz; ref = other.ref; width = other.width; height = other.height; is_valid = other.is_valid;
width = other.width; height = other.height;
return *this; return *this;
} }
~Image() noexcept { if (buf) podofo_free(buf); buf = NULL; }
bool operator==(const Image &other) const noexcept { bool operator==(const Image &other) const noexcept {
return other.sz == sz && sz > -1 && other.width == width && other.height == height && memcmp(buf, other.buf, sz) == 0; return other.width == width && is_valid && other.is_valid && other.height == height && other.buf == buf;
} }
std::size_t hash() const noexcept { return sz; } std::size_t hash() const noexcept { return buf.size(); }
const PdfReference& reference() const noexcept { return ref; } const PdfReference& reference() const noexcept { return ref; }
}; };
@ -56,14 +57,14 @@ typedef std::unordered_map<Image, std::vector<PdfReference>, ImageHasher> image_
static PyObject* static PyObject*
dedup_images(PDFDoc *self, PyObject *args) { dedup_images(PDFDoc *self, PyObject *args) {
unsigned long count = 0; unsigned long count = 0;
PdfVecObjects &objects = self->doc->GetObjects(); PdfIndirectObjectList &objects = self->doc->GetObjects();
image_reference_map image_map; image_reference_map image_map;
for (auto &k : objects) { for (auto &k : objects) {
if (!k->IsDictionary()) continue; if (!k->IsDictionary()) continue;
const PdfDictionary &dict = k->GetDictionary(); const PdfDictionary &dict = k->GetDictionary();
if (dictionary_has_key_name(dict, PdfName::KeyType, "XObject") && dictionary_has_key_name(dict, PdfName::KeySubtype, "Image")) { if (dictionary_has_key_name(dict, PdfName::KeyType, "XObject") && dictionary_has_key_name(dict, PdfName::KeySubtype, "Image")) {
Image img(k->Reference(), k); Image img(k->GetReference(), k);
auto it = image_map.find(img); auto it = image_map.find(img);
if (it == image_map.end()) { if (it == image_map.end()) {
std::vector<PdfReference> vals; std::vector<PdfReference> vals;
@ -78,7 +79,7 @@ dedup_images(PDFDoc *self, PyObject *args) {
for (auto &ref : x.second) { for (auto &ref : x.second) {
if (ref != canonical_ref) { if (ref != canonical_ref) {
ref_map[ref] = x.first.reference(); ref_map[ref] = x.first.reference();
delete objects.RemoveObject(ref); objects.RemoveObject(ref).reset();
count++; count++;
} }
} }
@ -95,11 +96,11 @@ dedup_images(PDFDoc *self, PyObject *args) {
const PdfDictionary &xobject = resources.GetKey("XObject")->GetDictionary(); const PdfDictionary &xobject = resources.GetKey("XObject")->GetDictionary();
PdfDictionary new_xobject = PdfDictionary(xobject); PdfDictionary new_xobject = PdfDictionary(xobject);
bool changed = false; bool changed = false;
for (auto &x : xobject.GetKeys()) { for (const auto &x : xobject) {
if (x.second->IsReference()) { if (x.second.IsReference()) {
try { try {
const PdfReference &r = ref_map.at(x.second->GetReference()); const PdfReference &r = ref_map.at(x.second.GetReference());
new_xobject.AddKey(x.first.GetName(), r); new_xobject.AddKey(x.first, r);
changed = true; changed = true;
} catch (const std::out_of_range &err) { (void)err; continue; } } catch (const std::out_of_range &err) { (void)err; continue; }
} }

View File

@ -6,24 +6,19 @@
*/ */
#include "global.h" #include "global.h"
#include <string>
using namespace pdf; using namespace pdf;
static void static void
impose_page(PdfMemDocument *doc, unsigned long dest_page_num, unsigned long src_page_num) { impose_page(PdfMemDocument *doc, unsigned int dest_page_num, unsigned int src_page_num) {
PdfXObject *xobj = new PdfXObject(doc, src_page_num, "HeaderFooter"); auto xobj = doc->CreateXObjectForm(Rect(), "HeaderFooter");
PdfPage *dest = doc->GetPage(dest_page_num); xobj->FillFromPage(doc->GetPages().GetPageAt(src_page_num));
dest->AddResource(xobj->GetIdentifier(), xobj->GetObject()->Reference(), "XObject"); auto dest = &doc->GetPages().GetPageAt(dest_page_num);
PdfStream *stream = dest->GetContents()->GetStream(); static unsigned counter = 0;
char *buffer = NULL; pdf_long sz; dest->GetOrCreateResources().AddResource("XObject", "Imp"s + std::to_string(++counter), xobj->GetObject());
stream->GetFilteredCopy(&buffer, &sz); auto data = "q\n1 0 0 1 0 0 cm\n/"s + xobj->GetIdentifier().GetEscapedName() + " Do\nQ\n"s;
stream->BeginAppend(); dest->GetOrCreateContents().GetStreamForAppending().SetData(data);
stream->Append("q\n1 0 0 1 0 0 cm\n/");
stream->Append(xobj->GetIdentifier().GetName());
stream->Append(" Do\nQ\n");
stream->Append(buffer, sz);
stream->EndAppend();
podofo_free(buffer);
} }
static PyObject* static PyObject*
@ -33,7 +28,8 @@ impose(PDFDoc *self, PyObject *args) {
for (unsigned long i = 0; i < count; i++) { for (unsigned long i = 0; i < count; i++) {
impose_page(self->doc, dest_page_num - 1 + i, src_page_num - 1 + i); impose_page(self->doc, dest_page_num - 1 + i, src_page_num - 1 + i);
} }
self->doc->DeletePages(src_page_num - 1, count); auto& pages = self->doc->GetPages();
while (count-- && src_page_num <= pages.GetCount()) pages.RemovePageAt(src_page_num - 1);
Py_RETURN_NONE; Py_RETURN_NONE;
} }

View File

@ -6,6 +6,7 @@
*/ */
#include "global.h" #include "global.h"
#include <memory>
using namespace pdf; using namespace pdf;
@ -45,43 +46,36 @@ erase(PDFOutlineItem *self, PyObject *args) {
static PyObject * static PyObject *
create(PDFOutlineItem *self, PyObject *args) { create(PDFOutlineItem *self, PyObject *args) {
PyObject *as_child; PyObject *as_child;
PDFOutlineItem *ans; PDFOutlineItem *ans = NULL;
unsigned int num; unsigned int num;
double left = 0, top = 0, zoom = 0; double left = 0, top = 0, zoom = 0;
PdfPage *page;
PyObject *title_buf; PyObject *title_buf;
if (!PyArg_ParseTuple(args, "UIO|ddd", &title_buf, &num, &as_child, &left, &top, &zoom)) return NULL; if (!PyArg_ParseTuple(args, "UIO|ddd", &title_buf, &num, &as_child, &left, &top, &zoom)) return NULL;
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType); ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
if (ans == NULL) goto error; if (ans == NULL) return NULL;
ans->doc = self->doc; ans->doc = self->doc;
pyunique_ptr decref_ans_on_exit((PyObject*)ans);
try { try {
PdfString title = podofo_convert_pystring(title_buf); PdfString title = podofo_convert_pystring(title_buf);
try { const PdfPage *page = get_page(self->doc, num - 1);
page = self->doc->GetPage(num - 1); if (!page) { PyErr_Format(PyExc_ValueError, "Invalid page number: %u", num); return NULL; }
} catch(const PdfError &err) { (void)err; page = NULL; } auto dest = std::make_shared<PdfDestination>(*page, left, top, zoom);
if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %u", num); goto error; }
PdfDestination dest(page, left, top, zoom);
if (PyObject_IsTrue(as_child)) { if (PyObject_IsTrue(as_child)) {
ans->item = self->item->CreateChild(title, dest); ans->item = self->item->CreateChild(title, dest);
} else } else
ans->item = self->item->CreateNext(title, dest); ans->item = self->item->CreateNext(title, dest);
} catch (const PdfError &err) { } catch (const PdfError &err) {
podofo_set_exception(err); goto error; podofo_set_exception(err); return NULL;
} catch(const std::exception & err) { } catch(const std::exception & err) {
PyErr_Format(PyExc_ValueError, "An error occurred while trying to create the outline: %s", err.what()); PyErr_Format(PyExc_ValueError, "An error occurred while trying to create the outline: %s", err.what()); return NULL;
goto error;
} catch (...) { } catch (...) {
PyErr_SetString(PyExc_Exception, "An unknown error occurred while trying to create the outline item"); PyErr_SetString(PyExc_Exception, "An unknown error occurred while trying to create the outline item"); return NULL;
goto error;
} }
return (PyObject*) ans; return (PyObject*) decref_ans_on_exit.release();
error:
Py_XDECREF(ans);
return NULL;
} }
static PyMethodDef methods[] = { static PyMethodDef methods[] = {

View File

@ -15,43 +15,37 @@ create_outline(PDFDoc *self, PyObject *args) {
PyObject *title_buf; PyObject *title_buf;
unsigned int pagenum; unsigned int pagenum;
double left = 0, top = 0, zoom = 0; double left = 0, top = 0, zoom = 0;
PdfPage *page;
if (!PyArg_ParseTuple(args, "UI|ddd", &title_buf, &pagenum, &left, &top, &zoom)) return NULL; if (!PyArg_ParseTuple(args, "UI|ddd", &title_buf, &pagenum, &left, &top, &zoom)) return NULL;
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType); ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
if (ans == NULL) goto error; if (ans == NULL) return NULL;
pyunique_ptr decref_ans_on_exit((PyObject*)ans);
try { try {
PdfString title = podofo_convert_pystring(title_buf); PdfString title = podofo_convert_pystring(title_buf);
PdfOutlines *outlines = self->doc->GetOutlines(); PdfOutlines *outlines = self->doc->GetOutlines();
if (outlines == NULL) {PyErr_NoMemory(); goto error;} if (outlines == NULL) {PyErr_NoMemory(); return NULL;}
ans->item = outlines->CreateRoot(title); ans->item = outlines->CreateRoot(title);
if (ans->item == NULL) {PyErr_NoMemory(); goto error;} if (ans->item == NULL) {PyErr_NoMemory(); return NULL;}
ans->doc = self->doc; ans->doc = self->doc;
try { auto page = get_page(self->doc, pagenum -1);
page = self->doc->GetPage(pagenum - 1); if (!page) {
} catch (const PdfError &err) { PyErr_Format(PyExc_ValueError, "Invalid page number: %u", pagenum - 1); return NULL;
(void)err;
PyErr_Format(PyExc_ValueError, "Invalid page number: %u", pagenum - 1); goto error;
} }
PdfDestination dest(page, left, top, zoom); auto dest = std::make_shared<PdfDestination>(*page, left, top, zoom);
ans->item->SetDestination(dest); ans->item->SetDestination(dest);
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); goto error; podofo_set_exception(err); return NULL;
} catch(const std::exception & err) { } catch(const std::exception & err) {
PyErr_Format(PyExc_ValueError, "An error occurred while trying to create the outline: %s", err.what()); PyErr_Format(PyExc_ValueError, "An error occurred while trying to create the outline: %s", err.what());
goto error; return NULL;
} catch (...) { } catch (...) {
PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to create the outline"); PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to create the outline");
goto error; return NULL;
} }
return (PyObject*)ans; return decref_ans_on_exit.release();
error:
Py_XDECREF(ans);
return NULL;
} }
static PyObject* static PyObject*
@ -71,9 +65,9 @@ convert_outline(PDFDoc *self, PyObject *parent, PdfOutlineItem *item) {
pyunique_ptr node(create_outline_node()); pyunique_ptr node(create_outline_node());
if (!node) return; if (!node) return;
if (PyDict_SetItemString(node.get(), "title", title.get()) != 0) return; if (PyDict_SetItemString(node.get(), "title", title.get()) != 0) return;
PdfDestination* dest = item->GetDestination(self->doc); auto dest = item->GetDestination();
if (dest) { if (dest) {
PdfPage *page = dest->GetPage(self->doc); PdfPage *page = dest->GetPage();
long pnum = page ? page->GetPageNumber() : -1; long pnum = page ? page->GetPageNumber() : -1;
pyunique_ptr d(Py_BuildValue("{sl sd sd sd}", "page", pnum, "top", dest->GetTop(), "left", dest->GetLeft(), "zoom", dest->GetZoom())); pyunique_ptr d(Py_BuildValue("{sl sd sd sd}", "page", pnum, "top", dest->GetTop(), "left", dest->GetLeft(), "zoom", dest->GetZoom()));
if (!d) return; if (!d) return;
@ -95,7 +89,7 @@ convert_outline(PDFDoc *self, PyObject *parent, PdfOutlineItem *item) {
static PyObject * static PyObject *
get_outline(PDFDoc *self, PyObject *args) { get_outline(PDFDoc *self, PyObject *args) {
PdfOutlines *root = self->doc->GetOutlines(PoDoFo::ePdfDontCreateObject); PdfOutlines *root = self->doc->GetOutlines();
if (!root || !root->First()) Py_RETURN_NONE; if (!root || !root->First()) Py_RETURN_NONE;
PyObject *ans = create_outline_node(); PyObject *ans = create_outline_node();
if (!ans) return NULL; if (!ans) return NULL;

View File

@ -10,11 +10,12 @@
using namespace PoDoFo; using namespace PoDoFo;
#define NUKE(x) { Py_XDECREF(x); x = NULL; } #define NUKE(x) { Py_XDECREF(x); x = NULL; }
#define PODOFO_RAISE_ERROR(code) throw ::PoDoFo::PdfError(code, __FILE__, __LINE__)
class pyerr : public std::exception { class pyerr : public std::exception {
}; };
class OutputDevice : public PdfOutputDevice { class MyOutputDevice : public OutputStreamDevice {
private: private:
PyObject *tell_func; PyObject *tell_func;
@ -26,12 +27,13 @@ class OutputDevice : public PdfOutputDevice {
void update_written() { void update_written() {
size_t pos; size_t pos;
pos = Tell(); pos = GetPosition();
if (pos > written) written = pos; if (pos > written) written = pos;
} }
public: public:
OutputDevice(PyObject *file) : tell_func(0), seek_func(0), read_func(0), write_func(0), flush_func(0), written(0) { MyOutputDevice(PyObject *file) : tell_func(0), seek_func(0), read_func(0), write_func(0), flush_func(0), written(0) {
SetAccess(DeviceAccess::Write);
#define GA(f, a) { if((f = PyObject_GetAttrString(file, a)) == NULL) throw pyerr(); } #define GA(f, a) { if((f = PyObject_GetAttrString(file, a)) == NULL) throw pyerr(); }
GA(tell_func, "tell"); GA(tell_func, "tell");
GA(seek_func, "seek"); GA(seek_func, "seek");
@ -39,7 +41,7 @@ class OutputDevice : public PdfOutputDevice {
GA(write_func, "write"); GA(write_func, "write");
GA(flush_func, "flush"); GA(flush_func, "flush");
} }
~OutputDevice() { ~MyOutputDevice() {
NUKE(tell_func); NUKE(seek_func); NUKE(read_func); NUKE(write_func); NUKE(flush_func); NUKE(tell_func); NUKE(seek_func); NUKE(read_func); NUKE(write_func); NUKE(flush_func);
} }
@ -47,7 +49,7 @@ class OutputDevice : public PdfOutputDevice {
long PrintVLen(const char* pszFormat, va_list args) { long PrintVLen(const char* pszFormat, va_list args) {
if( !pszFormat ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } if( !pszFormat ) { PODOFO_RAISE_ERROR(PdfErrorCode::InvalidHandle); }
#ifdef _MSC_VER #ifdef _MSC_VER
return _vscprintf(pszFormat, args) + 1; return _vscprintf(pszFormat, args) + 1;
@ -60,7 +62,7 @@ class OutputDevice : public PdfOutputDevice {
char *buf; char *buf;
int res; int res;
if( !pszFormat ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } if( !pszFormat ) { PODOFO_RAISE_ERROR(PdfErrorCode::InvalidHandle); }
buf = new (std::nothrow) char[lBytes+1]; buf = new (std::nothrow) char[lBytes+1];
if (buf == NULL) { PyErr_NoMemory(); throw pyerr(); } if (buf == NULL) { PyErr_NoMemory(); throw pyerr(); }
@ -129,7 +131,7 @@ class OutputDevice : public PdfOutputDevice {
Py_DECREF(ret); Py_DECREF(ret);
} }
size_t Tell() const { size_t GetPosition() const {
PyObject *ret; PyObject *ret;
unsigned long ans; unsigned long ans;
@ -151,7 +153,9 @@ class OutputDevice : public PdfOutputDevice {
return static_cast<size_t>(ans); return static_cast<size_t>(ans);
} }
void Write(const char* pBuffer, size_t lLen) { bool Eof() const { return false; }
void writeBuffer(const char* pBuffer, size_t lLen) {
PyObject *ret, *temp = NULL; PyObject *ret, *temp = NULL;
temp = PyBytes_FromStringAndSize(pBuffer, static_cast<Py_ssize_t>(lLen)); temp = PyBytes_FromStringAndSize(pBuffer, static_cast<Py_ssize_t>(lLen));
@ -177,10 +181,10 @@ class OutputDevice : public PdfOutputDevice {
PyObject* pdf::write_doc(PdfMemDocument *doc, PyObject *f) { PyObject* pdf::write_doc(PdfMemDocument *doc, PyObject *f) {
OutputDevice d(f); MyOutputDevice d(f);
try { try {
doc->Write(&d); doc->Save(d);
} catch(const PdfError & err) { } catch(const PdfError & err) {
podofo_set_exception(err); return NULL; podofo_set_exception(err); return NULL;
} catch (...) { } catch (...) {

View File

@ -10,30 +10,6 @@ using namespace PoDoFo;
PyObject *pdf::Error = NULL; PyObject *pdf::Error = NULL;
class PyLogMessage : public PdfError::LogMessageCallback {
public:
~PyLogMessage() {}
void LogMessage(ELogSeverity severity, const char* prefix, const char* msg, va_list & args ) {
if (severity > eLogSeverity_Warning) return;
if (prefix)
fprintf(stderr, "%s", prefix);
vfprintf(stderr, msg, args);
}
void LogMessage(ELogSeverity severity, const wchar_t* prefix, const wchar_t* msg, va_list & args ) {
if (severity > eLogSeverity_Warning) return;
if (prefix)
fwprintf(stderr, prefix);
vfwprintf(stderr, msg, args);
}
};
PyLogMessage log_message;
static char podofo_doc[] = "Wrapper for the PoDoFo PDF library"; static char podofo_doc[] = "Wrapper for the PoDoFo PDF library";
static int static int
@ -45,9 +21,6 @@ exec_module(PyObject *m) {
if (pdf::Error == NULL) return -1; if (pdf::Error == NULL) return -1;
PyModule_AddObject(m, "Error", pdf::Error); PyModule_AddObject(m, "Error", pdf::Error);
PdfError::SetLogMessageCallback((PdfError::LogMessageCallback*)&log_message);
PdfError::EnableDebug(false);
Py_INCREF(&pdf::PDFDocType); Py_INCREF(&pdf::PDFDocType);
PyModule_AddObject(m, "PDFDoc", (PyObject *)&pdf::PDFDocType); PyModule_AddObject(m, "PDFDoc", (PyObject *)&pdf::PDFDocType);
return 0; return 0;

View File

@ -6,29 +6,28 @@
*/ */
#include "global.h" #include "global.h"
#include <sstream>
using namespace pdf; using namespace pdf;
void void
pdf::podofo_set_exception(const PdfError &err) { pdf::podofo_set_exception(const PdfError &err) {
const char *msg = PdfError::ErrorMessage(err.GetError()); const char *msg = err.what();
if (msg == NULL) msg = err.what();
std::stringstream stream; std::stringstream stream;
stream << msg << "\n"; stream << msg << "\n";
const TDequeErrorInfo &s = err.GetCallstack(); const PdErrorInfoStack &s = err.GetCallStack();
for (TDequeErrorInfo::const_iterator it = s.begin(); it != s.end(); it++) { for (auto info : s) {
const PdfErrorInfo &info = (*it); stream << "File: " << info.GetFilePath() << " Line: " << info.GetLine() << " " << info.GetInformation() << "\n";
stream << "File: " << info.GetFilename() << " Line: " << info.GetLine() << " " << info.GetInformation() << "\n";
} }
PyErr_SetString(Error, stream.str().c_str()); PyErr_SetString(Error, stream.str().c_str());
} }
PyObject * PyObject *
pdf::podofo_convert_pdfstring(const PdfString &s) { pdf::podofo_convert_pdfstring(const PdfString &s) {
return PyUnicode_FromString(s.GetStringUtf8().c_str()); return PyUnicode_FromString(s.GetString().c_str());
} }
const PdfString const PdfString
pdf::podofo_convert_pystring(PyObject *val) { pdf::podofo_convert_pystring(PyObject *val) {
return PdfString(reinterpret_cast<const pdf_utf8*>(PyUnicode_AsUTF8(val))); return PdfString(reinterpret_cast<const char*>(PyUnicode_AsUTF8(val)));
} }