Add support for reading/writing XMP metadata packets to the PoDoFo bindings

This commit is contained in:
Kovid Goyal 2014-02-15 20:30:28 +05:30
parent 67a415e6e9
commit b143f637e3
2 changed files with 83 additions and 3 deletions

View File

@ -89,7 +89,7 @@ def delete_all_but(path, pages):
raw = f.read()
p.load(raw)
total = p.page_count()
pages = { total + x if x < 0 else x for x in pages }
pages = {total + x if x < 0 else x for x in pages}
for page in xrange(total-1, -1, -1):
if page not in pages:
p.delete_page(page)
@ -97,6 +97,14 @@ def delete_all_but(path, pages):
with open(path, 'wb') as f:
f.save_to_fileobj(path)
def get_xmp_metadata(path):
podofo = get_podofo()
p = podofo.PDFDoc()
with open(path, 'rb') as f:
raw = f.read()
p.load(raw)
return p.get_xmp_metadata()
def test_outline(src):
podofo = get_podofo()
p = podofo.PDFDoc()
@ -123,7 +131,7 @@ def test_save_to(src, dest):
p.save_to_fileobj(out)
print ('Wrote PDF of size:', out.tell())
if __name__ == '__main__':
import sys
test_save_to(sys.argv[-2], sys.argv[-1])
get_xmp_metadata(sys.argv[-1])

View File

@ -241,6 +241,72 @@ error:
} // }}}
// get_xmp_metadata() {{{
static PyObject *
PDFDoc_get_xmp_metadata(PDFDoc *self, PyObject *args) {
PoDoFo::PdfObject *metadata = NULL;
PoDoFo::PdfStream *str = NULL;
PoDoFo::pdf_long len = 0;
char *buf = NULL;
PyObject *ans = NULL;
try {
if ((metadata = self->doc->GetMetadata()) != NULL) {
if ((str = metadata->GetStream()) != NULL) {
str->GetFilteredCopy(&buf, &len);
if (buf != NULL) {
ans = Py_BuildValue("s#", buf, len);
free(buf); buf = NULL;
if (ans == NULL) goto error;
}
}
}
} catch(const PdfError & err) {
podofo_set_exception(err); goto error;
} catch (...) {
PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to read the XML metadata"); goto error;
}
if (ans != NULL) return ans;
Py_RETURN_NONE;
error:
return NULL;
} // }}}
// set_xmp_metadata() {{{
static PyObject *
PDFDoc_set_xmp_metadata(PDFDoc *self, PyObject *args) {
const char *raw = NULL;
long len = 0;
PoDoFo::PdfObject *metadata = NULL, *catalog = NULL;
PoDoFo::PdfStream *str = NULL;
if (!PyArg_ParseTuple(args, "s#", &raw, &len)) return NULL;
try {
if ((metadata = self->doc->GetMetadata()) != NULL) {
if ((str = metadata->GetStream()) == NULL) { PyErr_NoMemory(); goto error; }
str->Set(raw, len, PoDoFo::TVecFilters());
} else {
if ((catalog = self->doc->GetCatalog()) == NULL) { PyErr_SetString(PyExc_ValueError, "Cannot set XML metadata as this document has no catalog"); goto error; }
if ((metadata = self->doc->GetObjects().CreateObject("Metadata")) == NULL) { PyErr_NoMemory(); goto error; }
if ((str = metadata->GetStream()) == NULL) { PyErr_NoMemory(); goto error; }
metadata->GetDictionary().AddKey(PoDoFo::PdfName("Subtype"), PoDoFo::PdfName("XML"));
str->Set(raw, len, PoDoFo::TVecFilters());
catalog->GetDictionary().AddKey(PoDoFo::PdfName("Metadata"), metadata->Reference());
}
} catch(const PdfError & err) {
podofo_set_exception(err); goto error;
} catch (...) {
PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the XML metadata");
goto error;
}
Py_RETURN_NONE;
error:
return NULL;
} // }}}
// Properties {{{
static PyObject *
@ -483,6 +549,12 @@ static PyMethodDef PDFDoc_methods[] = {
{"create_outline", (PyCFunction)PDFDoc_create_outline, METH_VARARGS,
"create_outline(title, pagenum) -> Create an outline, return the first outline item."
},
{"get_xmp_metadata", (PyCFunction)PDFDoc_get_xmp_metadata, METH_VARARGS,
"get_xmp_metadata(raw) -> Get the XMP metadata as raw bytes"
},
{"set_xmp_metadata", (PyCFunction)PDFDoc_set_xmp_metadata, METH_VARARGS,
"set_xmp_metadata(raw) -> Set the XMP metadata to the raw bytes (which must be a valid XML packet)"
},
{NULL} /* Sentinel */
};