diff --git a/setup.py b/setup.py index d7d0d6cb1c..35d6dc6810 100644 --- a/setup.py +++ b/setup.py @@ -65,10 +65,10 @@ if __name__ == '__main__': '/Users/kovid/podofo/include/podofo' podofo_lib = '/usr/lib' if islinux else r'C:\podofo' if iswindows else \ '/Users/kovid/podofo/lib' - if os.path.exists(os.path.join(podofo_inc, 'PdfString.h')): + if os.path.exists(os.path.join(podofo_inc, 'podofo.h')): eca = ['/EHsc'] if iswindows else [] - optional.append(PyQtExtension('calibre.plugins.podofo', [], - ['src/calibre/utils/podofo/podofo.sip'], + optional.append(Extension('calibre.plugins.podofo', + sources=['src/calibre/utils/podofo/podofo.cpp'], libraries=['podofo'], extra_compile_args=eca, library_dirs=[os.environ.get('PODOFO_LIB_DIR', podofo_lib)], include_dirs=\ diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index 20ba98ff54..89491e2ace 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -9,20 +9,20 @@ from threading import Thread from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser from calibre.utils.pdftk import set_metadata as pdftk_set_metadata from calibre.utils.podofo import get_metadata as podofo_get_metadata, \ - set_metadata as podofo_set_metadata + set_metadata as podofo_set_metadata, Unavailable def get_metadata(stream): try: return podofo_get_metadata(stream) - except: + except Unavailable: return get_metadata_pypdf(stream) def set_metadata(stream, mi): stream.seek(0) try: return podofo_set_metadata(stream, mi) - except: + except Unavailable: pass try: return pdftk_set_metadata(stream, mi) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 717effd455..d77c89f8cb 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -105,7 +105,7 @@ Device Integration What devices does |app| support? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1/2, Netronix EB600, Ectaco Jetbook and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. +At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1/2, Netronix EB600, Ectaco Jetbook, BeBook/BeBook Mini and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index 6c2846cee2..3db6699dfe 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -21,17 +21,16 @@ def get_metadata(stream): raise Unavailable(podofo_err) raw = stream.read() stream.seek(0) - p = podofo.PdfMemDocument() - p.Load(raw, len(raw)) - info = p.GetInfo() - title = info.GetTitle().decode('utf-8').strip() + p = podofo.PDFDoc() + p.load(raw) + title = p.title if not title: title = getattr(stream, 'name', _('Unknown')) title = os.path.splitext(os.path.basename(title))[0] - author = info.GetAuthor().decode('utf-8').strip() + author = p.author authors = string_to_authors(author) if author else [_('Unknown')] mi = MetaInformation(title, authors) - creator = info.GetCreator().decode('utf-8').strip() + creator = p.creator if creator: mi.book_producer = creator return mi @@ -47,31 +46,28 @@ def set_metadata(stream, mi): if not podofo: raise Unavailable(podofo_err) raw = stream.read() - p = podofo.PdfMemDocument() - p.Load(raw, len(raw)) - info = p.GetInfo() + p = podofo.PDFDoc() + p.load(raw) title = prep(mi.title) touched = False if title: - info.SetTitle(title) + p.title = title touched = True author = prep(authors_to_string(mi.authors)) if author: - print repr(author) - info.SetAuthor(author) + p.author = author touched = True bkp = prep(mi.book_producer) if bkp: - info.SetCreator(bkp) + p.creator = bkp touched = True if touched: - p.SetInfo(info) from calibre.ptempfile import TemporaryFile with TemporaryFile('_pdf_set_metadata.pdf') as f: - p.Write(f) + p.save(f) raw = open(f, 'rb').read() stream.seek(0) stream.truncate() diff --git a/src/calibre/utils/podofo/podofo.cpp b/src/calibre/utils/podofo/podofo.cpp new file mode 100644 index 0000000000..11d9b94868 --- /dev/null +++ b/src/calibre/utils/podofo/podofo.cpp @@ -0,0 +1,330 @@ +#define UNICODE +#define PY_SSIZE_T_CLEAN +#include + +#define USING_SHARED_PODOFO +#include +using namespace PoDoFo; + +#include + +class podofo_pdfmem_wrapper : public PdfMemDocument { + public: + inline void set_info(PdfInfo *i) { this->SetInfo(i); } +}; + +typedef struct { + PyObject_HEAD + /* Type-specific fields go here. */ + podofo_pdfmem_wrapper *doc; + +} podofo_PDFDoc; + +extern "C" { +static void +podofo_PDFDoc_dealloc(podofo_PDFDoc* self) +{ + if (self->doc != NULL) delete self->doc; + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject * +podofo_PDFDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + podofo_PDFDoc *self; + + self = (podofo_PDFDoc *)type->tp_alloc(type, 0); + if (self != NULL) { + self->doc = new podofo_pdfmem_wrapper(); + if (self->doc == NULL) { Py_DECREF(self); return NULL; } + } + + return (PyObject *)self; +} + +static PyObject * +podofo_PDFDoc_load(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { + char *buffer; Py_ssize_t size; + + if (PyArg_ParseTuple(args, "s#", &buffer, &size)) { + try { + self->doc->Load(buffer, size); + } catch(const PdfError & err) { + PyErr_SetString(PyExc_ValueError, PdfError::ErrorMessage(err.GetError())); + return NULL; + } + } else return NULL; + + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +podofo_PDFDoc_save(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { + char *buffer; + + if (PyArg_ParseTuple(args, "s", &buffer)) { + try { + self->doc->Write(buffer); + } catch(const PdfError & err) { + PyErr_SetString(PyExc_ValueError, PdfError::ErrorMessage(err.GetError())); + return NULL; + } + } else return NULL; + + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +podofo_convert_pdfstring(const PdfString &s) { + std::string raw = s.GetStringUtf8(); + return PyString_FromStringAndSize(raw.c_str(), raw.length()); +} + +static PdfString * +podofo_convert_pystring(PyObject *py) { + Py_UNICODE* u = PyUnicode_AS_UNICODE(py); + PyObject *u8 = PyUnicode_EncodeUTF8(u, PyUnicode_GET_SIZE(py), "replace"); + if (u8 == NULL) { PyErr_NoMemory(); return NULL; } + pdf_utf8 *s8 = (pdf_utf8 *)PyString_AS_STRING(u8); + PdfString *ans = new PdfString(s8); + Py_DECREF(u8); + if (ans == NULL) PyErr_NoMemory(); + return ans; +} + +static PyObject * +podofo_PDFDoc_getter(podofo_PDFDoc *self, int field) +{ + PyObject *ans; + PdfString s; + PdfInfo *info = self->doc->GetInfo(); + if (info == NULL) { + PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); + return NULL; + } + switch (field) { + case 0: + s = info->GetTitle(); break; + case 1: + s = info->GetAuthor(); break; + case 2: + s = info->GetSubject(); break; + case 3: + s = info->GetKeywords(); break; + case 4: + s = info->GetCreator(); break; + case 5: + s = info->GetProducer(); break; + default: + PyErr_SetString(PyExc_Exception, "Bad field"); + return NULL; + } + + ans = podofo_convert_pdfstring(s); + if (ans == NULL) {PyErr_NoMemory(); return NULL;} + PyObject *uans = PyUnicode_FromEncodedObject(ans, "utf-8", "replace"); + Py_DECREF(ans); + if (uans == NULL) {return NULL;} + Py_INCREF(uans); + return uans; +} + +static int +podofo_PDFDoc_setter(podofo_PDFDoc *self, PyObject *val, int field) { + if (val == NULL || !PyUnicode_Check(val)) { + PyErr_SetString(PyExc_ValueError, "Must use unicode objects to set metadata"); + return -1; + } + PdfInfo *info = new PdfInfo(*self->doc->GetInfo()); + if (info == NULL) { + PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); + return -1; + } + + PdfString *s = podofo_convert_pystring(val); + if (s == NULL) return -1; + switch (field) { + case 0: + info->SetTitle(*s); break; + case 1: + info->SetAuthor(*s); break; + case 2: + info->SetSubject(*s); break; + case 3: + info->SetKeywords(*s); break; + case 4: + info->SetCreator(*s); break; + case 5: + info->SetProducer(*s); break; + default: + PyErr_SetString(PyExc_Exception, "Bad field"); + return -1; + } + + self->doc->set_info(info); + return 0; +} + +static PyObject * +podofo_PDFDoc_title_getter(podofo_PDFDoc *self, void *closure) { + return podofo_PDFDoc_getter(self, 0); +} +static PyObject * +podofo_PDFDoc_author_getter(podofo_PDFDoc *self, void *closure) { + return podofo_PDFDoc_getter(self, 1); +} +static PyObject * +podofo_PDFDoc_subject_getter(podofo_PDFDoc *self, void *closure) { + return podofo_PDFDoc_getter(self, 2); +} +static PyObject * +podofo_PDFDoc_keywords_getter(podofo_PDFDoc *self, void *closure) { + return podofo_PDFDoc_getter(self, 3); +} +static PyObject * +podofo_PDFDoc_creator_getter(podofo_PDFDoc *self, void *closure) { + return podofo_PDFDoc_getter(self, 4); +} +static PyObject * +podofo_PDFDoc_producer_getter(podofo_PDFDoc *self, void *closure) { + return podofo_PDFDoc_getter(self, 5); +} +static int +podofo_PDFDoc_title_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { + return podofo_PDFDoc_setter(self, val, 0); +} +static int +podofo_PDFDoc_author_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { + return podofo_PDFDoc_setter(self, val, 1); +} +static int +podofo_PDFDoc_subject_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { + return podofo_PDFDoc_setter(self, val, 2); +} +static int +podofo_PDFDoc_keywords_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { + return podofo_PDFDoc_setter(self, val, 3); +} +static int +podofo_PDFDoc_creator_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { + return podofo_PDFDoc_setter(self, val, 4); +} +static int +podofo_PDFDoc_producer_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { + return podofo_PDFDoc_setter(self, val, 5); +} + + + + + +} /* extern "C" */ + +static PyMethodDef podofo_PDFDoc_methods[] = { + {"load", (PyCFunction)podofo_PDFDoc_load, METH_VARARGS, + "Load a PDF document from a byte buffer (string)" + }, + {"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS, + "Save the PDF document to a path on disk" + }, + {NULL} /* Sentinel */ +}; + +static PyGetSetDef podofo_PDFDoc_getseters[] = { + {"title", + (getter)podofo_PDFDoc_title_getter, (setter)podofo_PDFDoc_title_setter, + "Document title", + NULL}, + {"author", + (getter)podofo_PDFDoc_author_getter, (setter)podofo_PDFDoc_author_setter, + "Document author", + NULL}, + {"subject", + (getter)podofo_PDFDoc_subject_getter, (setter)podofo_PDFDoc_subject_setter, + "Document subject", + NULL}, + {"keywords", + (getter)podofo_PDFDoc_keywords_getter, (setter)podofo_PDFDoc_keywords_setter, + "Document keywords", + NULL}, + {"creator", + (getter)podofo_PDFDoc_creator_getter, (setter)podofo_PDFDoc_creator_setter, + "Document creator", + NULL}, + {"producer", + (getter)podofo_PDFDoc_producer_getter, (setter)podofo_PDFDoc_producer_setter, + "Document producer", + NULL}, + + {NULL} /* Sentinel */ +}; + +static PyTypeObject podofo_PDFDocType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "podofo.PDFDoc", /*tp_name*/ + sizeof(podofo_PDFDoc), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)podofo_PDFDoc_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "PDF Documents", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + podofo_PDFDoc_methods, /* tp_methods */ + 0, /* tp_members */ + podofo_PDFDoc_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + podofo_PDFDoc_new, /* tp_new */ + +}; + +static PyMethodDef podofo_methods[] = { + {NULL} /* Sentinel */ +}; + +extern "C" { + + +PyMODINIT_FUNC +initpodofo(void) +{ + PyObject* m; + + if (PyType_Ready(&podofo_PDFDocType) < 0) + return; + + m = Py_InitModule3("podofo", podofo_methods, + "Wrapper for the PoDoFo pDF library"); + + Py_INCREF(&podofo_PDFDocType); + PyModule_AddObject(m, "PDFDoc", (PyObject *)&podofo_PDFDocType); +} +} diff --git a/src/calibre/utils/podofo/podofo.sip b/src/calibre/utils/podofo/podofo.sip deleted file mode 100644 index 0964e3a7e6..0000000000 --- a/src/calibre/utils/podofo/podofo.sip +++ /dev/null @@ -1,128 +0,0 @@ -%Module podofo 0 - -%MappedType PdfString -{ -%TypeHeaderCode -#define USING_SHARED_PODOFO -#include -using namespace PoDoFo; -%End -%ConvertFromTypeCode - if (sipCpp -> IsValid()) { - std::string raw = sipCpp->GetStringUtf8(); - return PyString_FromStringAndSize(raw.c_str(), raw.length()); - } else return PyString_FromString(""); -%End -%ConvertToTypeCode - if (sipIsErr == NULL) { - if (sipIsErr == NULL) - return (PyUnicode_Check(sipPy) || PyString_Check(sipPy)); - } - if (sipPy == Py_None) { - *sipCppPtr = NULL; - return 0; - } - if (PyString_Check(sipPy)) { - *sipCppPtr = new PdfString((pdf_utf8 *)PyString_AS_STRING(sipPy)); - return sipGetState(sipTransferObj); - } - if (PyUnicode_Check(sipPy)) { - Py_UNICODE* u = PyUnicode_AS_UNICODE(sipPy); - PyObject *u8 = PyUnicode_EncodeUTF8(u, PyUnicode_GET_SIZE(sipPy), "replace"); - pdf_utf8 *s8 = (pdf_utf8 *)PyString_AS_STRING(u8); - *sipCppPtr = new PdfString(s8); - return sipGetState(sipTransferObj); - } - *sipCppPtr = (PdfString *)sipForceConvertTo_PdfString(sipPy,sipIsErr); - return 1; -%End -}; - -class PdfObject { -%TypeHeaderCode -#define USING_SHARED_PODOFO -#include -using namespace PoDoFo; -%End - public: - PdfObject(); - -}; - -class PdfInfo { -%TypeHeaderCode -#define USING_SHARED_PODOFO -#include -using namespace PoDoFo; -%End - public: - PdfInfo(PdfObject *); - - PdfString GetAuthor() const; - PdfString GetSubject() const; - PdfString GetTitle() const; - PdfString GetKeywords() const; - PdfString GetCreator() const; - PdfString GetProducer() const; - - void SetAuthor(PdfString &); - void SetSubject(PdfString &); - void SetTitle(PdfString &); - void SetKeywords(PdfString &); - void SetCreator(PdfString &); - void SetProducer(PdfString &); - -}; - -class PdfOutputDevice { -%TypeHeaderCode -#define USING_SHARED_PODOFO -#include -using namespace PoDoFo; -%End - public: - PdfOutputDevice(char *, long); - unsigned long GetLength(); - unsigned long Tell(); - void Flush(); -}; - - -class PdfMemDocument { -%TypeHeaderCode -#define USING_SHARED_PODOFO -#include -using namespace PoDoFo; -%End - public: - PdfMemDocument(); - - void Load(const char *filename); - void Load(const char *buffer, long size); - void Write(const char *filename); - PdfInfo *GetInfo() const; - - protected: - void SetInfo(PdfInfo * /TransferThis/); - - private: - PdfMemDocument(PdfMemDocument &); - -}; - - -%Exception PoDoFo::PdfError /PyName=PdfError/ -{ -%TypeHeaderCode -#define USING_SHARED_PODOFO -#include -%End -%RaiseCode - const char *detail = sipExceptionRef.what(); - - SIP_BLOCK_THREADS - PyErr_SetString(sipException_PoDoFo_PdfError, detail); - SIP_UNBLOCK_THREADS -%End -}; -