From 1136f261865a0279b6e9dce798ad0d25d51d5f92 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 10:13:34 +0530 Subject: [PATCH] Refactor the PoDoFo module --- session.vim | 1 + setup/extensions.py | 9 +- src/calibre/utils/podofo/doc.cpp | 396 ++++++++++++++++++++++++ src/calibre/utils/podofo/global.h | 39 +++ src/calibre/utils/podofo/podofo.cpp | 448 +--------------------------- src/calibre/utils/podofo/utils.cpp | 46 +++ 6 files changed, 501 insertions(+), 438 deletions(-) create mode 100644 src/calibre/utils/podofo/doc.cpp create mode 100644 src/calibre/utils/podofo/global.h create mode 100644 src/calibre/utils/podofo/utils.cpp diff --git a/session.vim b/session.vim index ae2c55bf06..1a94d6bf07 100644 --- a/session.vim +++ b/session.vim @@ -3,6 +3,7 @@ let $PYFLAKES_BUILTINS = "_,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,ic " Include directories for C++ modules let g:syntastic_cpp_include_dirs = [ + \'/usr/include/python2.7', \'/usr/include/podofo', \'/usr/include/qt4/QtCore', \'/usr/include/qt4/QtGui', diff --git a/setup/extensions.py b/setup/extensions.py index 9b852d10c5..a7b01bcd19 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -137,7 +137,14 @@ extensions = [ ['calibre/ebooks/compression/palmdoc.c']), Extension('podofo', - ['calibre/utils/podofo/podofo.cpp'], + [ + 'calibre/utils/podofo/utils.cpp', + 'calibre/utils/podofo/doc.cpp', + 'calibre/utils/podofo/podofo.cpp', + ], + headers=[ + 'calibre/utils/podofo/global.h', + ], libraries=['podofo'], lib_dirs=[podofo_lib], inc_dirs=[podofo_inc, os.path.dirname(podofo_inc)], diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp new file mode 100644 index 0000000000..d591d6fc65 --- /dev/null +++ b/src/calibre/utils/podofo/doc.cpp @@ -0,0 +1,396 @@ +/* + * doc.cpp + * Copyright (C) 2012 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "global.h" + +using namespace pdf; + +static void +PDFDoc_dealloc(PDFDoc* self) +{ + if (self->doc != NULL) delete self->doc; + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject * +PDFDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PDFDoc *self; + + self = (PDFDoc *)type->tp_alloc(type, 0); + if (self != NULL) { + self->doc = new PdfMemDocument(); + if (self->doc == NULL) { Py_DECREF(self); return NULL; } + } + + return (PyObject *)self; +} + +static PyObject * +PDFDoc_load(PDFDoc *self, PyObject *args, PyObject *kwargs) { + char *buffer; Py_ssize_t size; + + if (PyArg_ParseTuple(args, "s#", &buffer, &size)) { + try { + self->doc->Load(buffer, size); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } +} else return NULL; + + + Py_RETURN_NONE; +} + +static PyObject * +PDFDoc_open(PDFDoc *self, PyObject *args, PyObject *kwargs) { + char *fname; + + if (PyArg_ParseTuple(args, "s", &fname)) { + try { + self->doc->Load(fname); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } +} else return NULL; + + + Py_RETURN_NONE; +} + +static PyObject * +PDFDoc_save(PDFDoc *self, PyObject *args, PyObject *kwargs) { + char *buffer; + + if (PyArg_ParseTuple(args, "s", &buffer)) { + try { + self->doc->Write(buffer); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + } else return NULL; + + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +PDFDoc_pages_getter(PDFDoc *self, void *closure) { + int pages = self->doc->GetPageCount(); + PyObject *ans = PyInt_FromLong(static_cast(pages)); + if (ans != NULL) Py_INCREF(ans); + return ans; +} + +static PyObject * +PDFDoc_version_getter(PDFDoc *self, void *closure) { + int version; + try { + version = self->doc->GetPdfVersion(); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + switch(version) { + case ePdfVersion_1_0: + return Py_BuildValue("s", "1.0"); + case ePdfVersion_1_1: + return Py_BuildValue("s", "1.1"); + case ePdfVersion_1_2: + return Py_BuildValue("s", "1.2"); + case ePdfVersion_1_3: + return Py_BuildValue("s", "1.3"); + case ePdfVersion_1_4: + return Py_BuildValue("s", "1.4"); + case ePdfVersion_1_5: + return Py_BuildValue("s", "1.5"); + case ePdfVersion_1_6: + return Py_BuildValue("s", "1.6"); + case ePdfVersion_1_7: + return Py_BuildValue("s", "1.7"); + default: + return Py_BuildValue(""); + } + return Py_BuildValue(""); +} + + + +static PyObject * +PDFDoc_extract_first_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { + try { + while (self->doc->GetPageCount() > 1) self->doc->GetPagesTree()->DeletePage(1); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject * +PDFDoc_page_count(PDFDoc *self, PyObject *args, PyObject *kwargs) { + int count; + try { + count = self->doc->GetPageCount(); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + return Py_BuildValue("i", count); +} + +static PyObject * +PDFDoc_delete_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { + int num = 0; + if (PyArg_ParseTuple(args, "i", &num)) { + try { + self->doc->DeletePages(num, 1); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + } else return NULL; + + Py_RETURN_NONE; +} + + +static PyObject * +PDFDoc_getter(PDFDoc *self, int field) +{ + PyObject *ans; + PdfString s; + PdfInfo *info = self->doc->GetInfo(); + if (info == NULL) { + PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); + return NULL; + } + switch (field) { + case 0: + s = info->GetTitle(); break; + case 1: + s = info->GetAuthor(); break; + case 2: + s = info->GetSubject(); break; + case 3: + s = info->GetKeywords(); break; + case 4: + s = info->GetCreator(); break; + case 5: + s = info->GetProducer(); break; + default: + PyErr_SetString(PyExc_Exception, "Bad field"); + return NULL; + } + + ans = podofo_convert_pdfstring(s); + if (ans == NULL) {PyErr_NoMemory(); return NULL;} + PyObject *uans = PyUnicode_FromEncodedObject(ans, "utf-8", "replace"); + Py_DECREF(ans); + if (uans == NULL) {return NULL;} + Py_INCREF(uans); + return uans; +} + +static int +PDFDoc_setter(PDFDoc *self, PyObject *val, int field) { + if (val == NULL || !PyUnicode_Check(val)) { + PyErr_SetString(PyExc_ValueError, "Must use unicode objects to set metadata"); + return -1; + } + PdfInfo *info = new PdfInfo(*self->doc->GetInfo()); + if (info == NULL) { + PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); + return -1; + } + PdfString *s = NULL; + + if (self->doc->GetEncrypted()) s = podofo_convert_pystring_single_byte(val); + else s = podofo_convert_pystring(val); + if (s == NULL) return -1; + + + switch (field) { + case 0: + info->SetTitle(*s); break; + case 1: + info->SetAuthor(*s); break; + case 2: + info->SetSubject(*s); break; + case 3: + info->SetKeywords(*s); break; + case 4: + info->SetCreator(*s); break; + case 5: + info->SetProducer(*s); break; + default: + PyErr_SetString(PyExc_Exception, "Bad field"); + return -1; + } + + return 0; +} + +static PyObject * +PDFDoc_title_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 0); +} +static PyObject * +PDFDoc_author_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 1); +} +static PyObject * +PDFDoc_subject_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 2); +} +static PyObject * +PDFDoc_keywords_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 3); +} +static PyObject * +PDFDoc_creator_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 4); +} +static PyObject * +PDFDoc_producer_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 5); +} +static int +PDFDoc_title_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 0); +} +static int +PDFDoc_author_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 1); +} +static int +PDFDoc_subject_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 2); +} +static int +PDFDoc_keywords_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 3); +} +static int +PDFDoc_creator_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 4); +} +static int +PDFDoc_producer_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 5); +} + + +static PyMethodDef PDFDoc_methods[] = { + {"load", (PyCFunction)PDFDoc_load, METH_VARARGS, + "Load a PDF document from a byte buffer (string)" + }, + {"open", (PyCFunction)PDFDoc_open, METH_VARARGS, + "Load a PDF document from a file path (string)" + }, + {"save", (PyCFunction)PDFDoc_save, METH_VARARGS, + "Save the PDF document to a path on disk" + }, + {"extract_first_page", (PyCFunction)PDFDoc_extract_first_page, METH_VARARGS, + "extract_first_page() -> Remove all but the first page." + }, + {"page_count", (PyCFunction)PDFDoc_page_count, METH_VARARGS, + "page_count() -> Number of pages in the PDF." + }, + {"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS, + "delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)." + }, + + + {NULL} /* Sentinel */ +}; + +static PyGetSetDef PDFDoc_getsetters[] = { + {(char *)"title", + (getter)PDFDoc_title_getter, (setter)PDFDoc_title_setter, + (char *)"Document title", + NULL}, + {(char *)"author", + (getter)PDFDoc_author_getter, (setter)PDFDoc_author_setter, + (char *)"Document author", + NULL}, + {(char *)"subject", + (getter)PDFDoc_subject_getter, (setter)PDFDoc_subject_setter, + (char *)"Document subject", + NULL}, + {(char *)"keywords", + (getter)PDFDoc_keywords_getter, (setter)PDFDoc_keywords_setter, + (char *)"Document keywords", + NULL}, + {(char *)"creator", + (getter)PDFDoc_creator_getter, (setter)PDFDoc_creator_setter, + (char *)"Document creator", + NULL}, + {(char *)"producer", + (getter)PDFDoc_producer_getter, (setter)PDFDoc_producer_setter, + (char *)"Document producer", + NULL}, + {(char *)"pages", + (getter)PDFDoc_pages_getter, NULL, + (char *)"Number of pages in document (read only)", + NULL}, + {(char *)"version", + (getter)PDFDoc_version_getter, NULL, + (char *)"The PDF version (read only)", + NULL}, + + {NULL} /* Sentinel */ +}; + +PyTypeObject pdf::PDFDocType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "podofo.PDFDoc", /*tp_name*/ + sizeof(PDFDoc), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)PDFDoc_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "PDF Documents", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + PDFDoc_methods, /* tp_methods */ + 0, /* tp_members */ + PDFDoc_getsetters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + PDFDoc_new, /* tp_new */ + +}; + + diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h new file mode 100644 index 0000000000..c7a5696ad6 --- /dev/null +++ b/src/calibre/utils/podofo/global.h @@ -0,0 +1,39 @@ +/* + * global.h + * Copyright (C) 2012 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ +#pragma once + +#define UNICODE +#define PY_SSIZE_T_CLEAN +#include + +#define USING_SHARED_PODOFO +#include +using namespace PoDoFo; + +namespace pdf { + +// Module exception types +extern PyObject *Error; + +typedef struct { + PyObject_HEAD + /* Type-specific fields go here. */ + PdfMemDocument *doc; + +} PDFDoc; + +extern PyTypeObject PDFDocType; +extern PyObject *Error; + +// Utilities +extern void podofo_set_exception(const PdfError &err); +extern PyObject * podofo_convert_pdfstring(const PdfString &s); +extern PdfString * podofo_convert_pystring(PyObject *py); +extern PdfString * podofo_convert_pystring_single_byte(PyObject *py); + +} + diff --git a/src/calibre/utils/podofo/podofo.cpp b/src/calibre/utils/podofo/podofo.cpp index 3764fce829..b79ca8cfe3 100644 --- a/src/calibre/utils/podofo/podofo.cpp +++ b/src/calibre/utils/podofo/podofo.cpp @@ -6,458 +6,32 @@ #include using namespace PoDoFo; -typedef struct { - PyObject_HEAD - /* Type-specific fields go here. */ - PdfMemDocument *doc; +#include "global.h" -} podofo_PDFDoc; - -extern "C" { -static void -podofo_PDFDoc_dealloc(podofo_PDFDoc* self) -{ - if (self->doc != NULL) delete self->doc; - self->ob_type->tp_free((PyObject*)self); -} - -static PyObject * -podofo_PDFDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - podofo_PDFDoc *self; - - self = (podofo_PDFDoc *)type->tp_alloc(type, 0); - if (self != NULL) { - self->doc = new PdfMemDocument(); - if (self->doc == NULL) { Py_DECREF(self); return NULL; } - } - - return (PyObject *)self; -} - -static void podofo_set_exception(const PdfError &err) { - const char *msg = PdfError::ErrorMessage(err.GetError()); - if (msg == NULL) msg = err.what(); - PyErr_SetString(PyExc_ValueError, msg); -} - -static PyObject * -podofo_PDFDoc_load(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - char *buffer; Py_ssize_t size; - - if (PyArg_ParseTuple(args, "s#", &buffer, &size)) { - try { - self->doc->Load(buffer, size); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } -} else return NULL; - - - Py_RETURN_NONE; -} - -static PyObject * -podofo_PDFDoc_open(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - char *fname; - - if (PyArg_ParseTuple(args, "s", &fname)) { - try { - self->doc->Load(fname); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } -} else return NULL; - - - Py_RETURN_NONE; -} - -static PyObject * -podofo_PDFDoc_save(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - char *buffer; - - if (PyArg_ParseTuple(args, "s", &buffer)) { - try { - self->doc->Write(buffer); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - } else return NULL; - - - Py_INCREF(Py_None); - return Py_None; -} - -static PyObject * -podofo_PDFDoc_pages_getter(podofo_PDFDoc *self, void *closure) { - int pages = self->doc->GetPageCount(); - PyObject *ans = PyInt_FromLong(static_cast(pages)); - if (ans != NULL) Py_INCREF(ans); - return ans; -} - -static PyObject * -podofo_PDFDoc_version_getter(podofo_PDFDoc *self, void *closure) { - int version; - try { - version = self->doc->GetPdfVersion(); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - switch(version) { - case ePdfVersion_1_0: - return Py_BuildValue("s", "1.0"); - case ePdfVersion_1_1: - return Py_BuildValue("s", "1.1"); - case ePdfVersion_1_2: - return Py_BuildValue("s", "1.2"); - case ePdfVersion_1_3: - return Py_BuildValue("s", "1.3"); - case ePdfVersion_1_4: - return Py_BuildValue("s", "1.4"); - case ePdfVersion_1_5: - return Py_BuildValue("s", "1.5"); - case ePdfVersion_1_6: - return Py_BuildValue("s", "1.6"); - case ePdfVersion_1_7: - return Py_BuildValue("s", "1.7"); - default: - return Py_BuildValue(""); - } - return Py_BuildValue(""); -} - - - -static PyObject * -podofo_PDFDoc_extract_first_page(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - try { - while (self->doc->GetPageCount() > 1) self->doc->GetPagesTree()->DeletePage(1); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - Py_RETURN_NONE; -} - -static PyObject * -podofo_PDFDoc_page_count(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - int count; - try { - count = self->doc->GetPageCount(); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - return Py_BuildValue("i", count); -} - -static PyObject * -podofo_PDFDoc_delete_page(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - int num = 0; - if (PyArg_ParseTuple(args, "i", &num)) { - try { - self->doc->DeletePages(num, 1); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - } else return NULL; - - Py_RETURN_NONE; -} - -static PyObject * -podofo_convert_pdfstring(const PdfString &s) { - std::string raw = s.GetStringUtf8(); - return PyString_FromStringAndSize(raw.c_str(), raw.length()); -} - -static PdfString * -podofo_convert_pystring(PyObject *py) { - Py_UNICODE* u = PyUnicode_AS_UNICODE(py); - PyObject *u8 = PyUnicode_EncodeUTF8(u, PyUnicode_GET_SIZE(py), "replace"); - if (u8 == NULL) { PyErr_NoMemory(); return NULL; } - pdf_utf8 *s8 = reinterpret_cast(PyString_AS_STRING(u8)); - PdfString *ans = new PdfString(s8); - Py_DECREF(u8); - if (ans == NULL) PyErr_NoMemory(); - return ans; -} - -static PdfString * -podofo_convert_pystring_single_byte(PyObject *py) { - Py_UNICODE* u = PyUnicode_AS_UNICODE(py); - PyObject *s = PyUnicode_Encode(u, PyUnicode_GET_SIZE(py), "cp1252", "replace"); - if (s == NULL) { PyErr_NoMemory(); return NULL; } - PdfString *ans = new PdfString(PyString_AS_STRING(s)); - Py_DECREF(s); - if (ans == NULL) PyErr_NoMemory(); - return ans; -} - - - -static PyObject * -podofo_PDFDoc_getter(podofo_PDFDoc *self, int field) -{ - PyObject *ans; - PdfString s; - PdfInfo *info = self->doc->GetInfo(); - if (info == NULL) { - PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); - return NULL; - } - switch (field) { - case 0: - s = info->GetTitle(); break; - case 1: - s = info->GetAuthor(); break; - case 2: - s = info->GetSubject(); break; - case 3: - s = info->GetKeywords(); break; - case 4: - s = info->GetCreator(); break; - case 5: - s = info->GetProducer(); break; - default: - PyErr_SetString(PyExc_Exception, "Bad field"); - return NULL; - } - - ans = podofo_convert_pdfstring(s); - if (ans == NULL) {PyErr_NoMemory(); return NULL;} - PyObject *uans = PyUnicode_FromEncodedObject(ans, "utf-8", "replace"); - Py_DECREF(ans); - if (uans == NULL) {return NULL;} - Py_INCREF(uans); - return uans; -} - -static int -podofo_PDFDoc_setter(podofo_PDFDoc *self, PyObject *val, int field) { - if (val == NULL || !PyUnicode_Check(val)) { - PyErr_SetString(PyExc_ValueError, "Must use unicode objects to set metadata"); - return -1; - } - PdfInfo *info = new PdfInfo(*self->doc->GetInfo()); - if (info == NULL) { - PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); - return -1; - } - PdfString *s = NULL; - - if (self->doc->GetEncrypted()) s = podofo_convert_pystring_single_byte(val); - else s = podofo_convert_pystring(val); - if (s == NULL) return -1; - - - switch (field) { - case 0: - info->SetTitle(*s); break; - case 1: - info->SetAuthor(*s); break; - case 2: - info->SetSubject(*s); break; - case 3: - info->SetKeywords(*s); break; - case 4: - info->SetCreator(*s); break; - case 5: - info->SetProducer(*s); break; - default: - PyErr_SetString(PyExc_Exception, "Bad field"); - return -1; - } - - return 0; -} - -static PyObject * -podofo_PDFDoc_title_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 0); -} -static PyObject * -podofo_PDFDoc_author_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 1); -} -static PyObject * -podofo_PDFDoc_subject_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 2); -} -static PyObject * -podofo_PDFDoc_keywords_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 3); -} -static PyObject * -podofo_PDFDoc_creator_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 4); -} -static PyObject * -podofo_PDFDoc_producer_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 5); -} -static int -podofo_PDFDoc_title_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 0); -} -static int -podofo_PDFDoc_author_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 1); -} -static int -podofo_PDFDoc_subject_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 2); -} -static int -podofo_PDFDoc_keywords_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 3); -} -static int -podofo_PDFDoc_creator_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 4); -} -static int -podofo_PDFDoc_producer_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 5); -} - - - - - -} /* extern "C" */ - -static PyMethodDef podofo_PDFDoc_methods[] = { - {"load", (PyCFunction)podofo_PDFDoc_load, METH_VARARGS, - "Load a PDF document from a byte buffer (string)" - }, - {"open", (PyCFunction)podofo_PDFDoc_open, METH_VARARGS, - "Load a PDF document from a file path (string)" - }, - {"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS, - "Save the PDF document to a path on disk" - }, - {"extract_first_page", (PyCFunction)podofo_PDFDoc_extract_first_page, METH_VARARGS, - "extract_first_page() -> Remove all but the first page." - }, - {"page_count", (PyCFunction)podofo_PDFDoc_page_count, METH_VARARGS, - "page_count() -> Number of pages in the PDF." - }, - {"delete_page", (PyCFunction)podofo_PDFDoc_delete_page, METH_VARARGS, - "delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)." - }, - - - {NULL} /* Sentinel */ -}; - -static PyGetSetDef podofo_PDFDoc_getsetters[] = { - {(char *)"title", - (getter)podofo_PDFDoc_title_getter, (setter)podofo_PDFDoc_title_setter, - (char *)"Document title", - NULL}, - {(char *)"author", - (getter)podofo_PDFDoc_author_getter, (setter)podofo_PDFDoc_author_setter, - (char *)"Document author", - NULL}, - {(char *)"subject", - (getter)podofo_PDFDoc_subject_getter, (setter)podofo_PDFDoc_subject_setter, - (char *)"Document subject", - NULL}, - {(char *)"keywords", - (getter)podofo_PDFDoc_keywords_getter, (setter)podofo_PDFDoc_keywords_setter, - (char *)"Document keywords", - NULL}, - {(char *)"creator", - (getter)podofo_PDFDoc_creator_getter, (setter)podofo_PDFDoc_creator_setter, - (char *)"Document creator", - NULL}, - {(char *)"producer", - (getter)podofo_PDFDoc_producer_getter, (setter)podofo_PDFDoc_producer_setter, - (char *)"Document producer", - NULL}, - {(char *)"pages", - (getter)podofo_PDFDoc_pages_getter, NULL, - (char *)"Number of pages in document (read only)", - NULL}, - {(char *)"version", - (getter)podofo_PDFDoc_version_getter, NULL, - (char *)"The PDF version (read only)", - NULL}, - - {NULL} /* Sentinel */ -}; - -static PyTypeObject podofo_PDFDocType = { - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "podofo.PDFDoc", /*tp_name*/ - sizeof(podofo_PDFDoc), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)podofo_PDFDoc_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT, /*tp_flags*/ - "PDF Documents", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - podofo_PDFDoc_methods, /* tp_methods */ - 0, /* tp_members */ - podofo_PDFDoc_getsetters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - podofo_PDFDoc_new, /* tp_new */ - -}; +PyObject *pdf::Error = NULL; static PyMethodDef podofo_methods[] = { {NULL} /* Sentinel */ }; -extern "C" { - PyMODINIT_FUNC initpodofo(void) { PyObject* m; - if (PyType_Ready(&podofo_PDFDocType) < 0) + if (PyType_Ready(&pdf::PDFDocType) < 0) return; + pdf::Error = PyErr_NewException((char*)"podofo.Error", NULL, NULL); + if (pdf::Error == NULL) return; + m = Py_InitModule3("podofo", podofo_methods, "Wrapper for the PoDoFo PDF library"); - Py_INCREF(&podofo_PDFDocType); - PyModule_AddObject(m, "PDFDoc", (PyObject *)&podofo_PDFDocType); -} + Py_INCREF(&pdf::PDFDocType); + PyModule_AddObject(m, "PDFDoc", (PyObject *)&pdf::PDFDocType); + + PyModule_AddObject(m, "Error", pdf::Error); } + diff --git a/src/calibre/utils/podofo/utils.cpp b/src/calibre/utils/podofo/utils.cpp new file mode 100644 index 0000000000..473eeac195 --- /dev/null +++ b/src/calibre/utils/podofo/utils.cpp @@ -0,0 +1,46 @@ +/* + * utils.cpp + * Copyright (C) 2012 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "global.h" + +using namespace pdf; + +void pdf::podofo_set_exception(const PdfError &err) { + const char *msg = PdfError::ErrorMessage(err.GetError()); + if (msg == NULL) msg = err.what(); + PyErr_SetString(Error, msg); +} + +PyObject * +pdf::podofo_convert_pdfstring(const PdfString &s) { + std::string raw = s.GetStringUtf8(); + return PyString_FromStringAndSize(raw.c_str(), raw.length()); +} + +PdfString * +pdf::podofo_convert_pystring(PyObject *py) { + Py_UNICODE* u = PyUnicode_AS_UNICODE(py); + PyObject *u8 = PyUnicode_EncodeUTF8(u, PyUnicode_GET_SIZE(py), "replace"); + if (u8 == NULL) { PyErr_NoMemory(); return NULL; } + pdf_utf8 *s8 = reinterpret_cast(PyString_AS_STRING(u8)); + PdfString *ans = new PdfString(s8); + Py_DECREF(u8); + if (ans == NULL) PyErr_NoMemory(); + return ans; +} + +PdfString * +pdf::podofo_convert_pystring_single_byte(PyObject *py) { + Py_UNICODE* u = PyUnicode_AS_UNICODE(py); + PyObject *s = PyUnicode_Encode(u, PyUnicode_GET_SIZE(py), "cp1252", "replace"); + if (s == NULL) { PyErr_NoMemory(); return NULL; } + PdfString *ans = new PdfString(PyString_AS_STRING(s)); + Py_DECREF(s); + if (ans == NULL) PyErr_NoMemory(); + return ans; +} +