diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index 13c12a9bb3..bbc1a29f73 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -89,7 +89,7 @@ def delete_all_but(path, pages): raw = f.read() p.load(raw) total = p.page_count() - pages = { total + x if x < 0 else x for x in pages } + pages = {total + x if x < 0 else x for x in pages} for page in xrange(total-1, -1, -1): if page not in pages: p.delete_page(page) @@ -97,6 +97,14 @@ def delete_all_but(path, pages): with open(path, 'wb') as f: f.save_to_fileobj(path) +def get_xmp_metadata(path): + podofo = get_podofo() + p = podofo.PDFDoc() + with open(path, 'rb') as f: + raw = f.read() + p.load(raw) + return p.get_xmp_metadata() + def test_outline(src): podofo = get_podofo() p = podofo.PDFDoc() @@ -123,7 +131,7 @@ def test_save_to(src, dest): p.save_to_fileobj(out) print ('Wrote PDF of size:', out.tell()) + if __name__ == '__main__': import sys - test_save_to(sys.argv[-2], sys.argv[-1]) - + get_xmp_metadata(sys.argv[-1]) diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 8a2b4428dc..a3df1ce53e 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -241,6 +241,72 @@ error: } // }}} +// get_xmp_metadata() {{{ +static PyObject * +PDFDoc_get_xmp_metadata(PDFDoc *self, PyObject *args) { + PoDoFo::PdfObject *metadata = NULL; + PoDoFo::PdfStream *str = NULL; + PoDoFo::pdf_long len = 0; + char *buf = NULL; + PyObject *ans = NULL; + + try { + if ((metadata = self->doc->GetMetadata()) != NULL) { + if ((str = metadata->GetStream()) != NULL) { + str->GetFilteredCopy(&buf, &len); + if (buf != NULL) { + ans = Py_BuildValue("s#", buf, len); + free(buf); buf = NULL; + if (ans == NULL) goto error; + } + } + } + } catch(const PdfError & err) { + podofo_set_exception(err); goto error; + } catch (...) { + PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to read the XML metadata"); goto error; + } + + if (ans != NULL) return ans; + Py_RETURN_NONE; +error: + return NULL; +} // }}} + +// set_xmp_metadata() {{{ +static PyObject * +PDFDoc_set_xmp_metadata(PDFDoc *self, PyObject *args) { + const char *raw = NULL; + long len = 0; + PoDoFo::PdfObject *metadata = NULL, *catalog = NULL; + PoDoFo::PdfStream *str = NULL; + + if (!PyArg_ParseTuple(args, "s#", &raw, &len)) return NULL; + try { + if ((metadata = self->doc->GetMetadata()) != NULL) { + if ((str = metadata->GetStream()) == NULL) { PyErr_NoMemory(); goto error; } + str->Set(raw, len, PoDoFo::TVecFilters()); + } else { + if ((catalog = self->doc->GetCatalog()) == NULL) { PyErr_SetString(PyExc_ValueError, "Cannot set XML metadata as this document has no catalog"); goto error; } + if ((metadata = self->doc->GetObjects().CreateObject("Metadata")) == NULL) { PyErr_NoMemory(); goto error; } + if ((str = metadata->GetStream()) == NULL) { PyErr_NoMemory(); goto error; } + metadata->GetDictionary().AddKey(PoDoFo::PdfName("Subtype"), PoDoFo::PdfName("XML")); + str->Set(raw, len, PoDoFo::TVecFilters()); + catalog->GetDictionary().AddKey(PoDoFo::PdfName("Metadata"), metadata->Reference()); + } + } catch(const PdfError & err) { + podofo_set_exception(err); goto error; + } catch (...) { + PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the XML metadata"); + goto error; + } + + Py_RETURN_NONE; +error: + return NULL; + +} // }}} + // Properties {{{ static PyObject * @@ -483,6 +549,12 @@ static PyMethodDef PDFDoc_methods[] = { {"create_outline", (PyCFunction)PDFDoc_create_outline, METH_VARARGS, "create_outline(title, pagenum) -> Create an outline, return the first outline item." }, + {"get_xmp_metadata", (PyCFunction)PDFDoc_get_xmp_metadata, METH_VARARGS, + "get_xmp_metadata(raw) -> Get the XMP metadata as raw bytes" + }, + {"set_xmp_metadata", (PyCFunction)PDFDoc_set_xmp_metadata, METH_VARARGS, + "set_xmp_metadata(raw) -> Set the XMP metadata to the raw bytes (which must be a valid XML packet)" + }, {NULL} /* Sentinel */ };