From 76fbbef9d08d4d335c5408584ed26bfc165b5fdc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 11 May 2023 13:50:59 +0530 Subject: [PATCH] Start work on porting to new PoDoFo API --- setup/build.py | 8 +- setup/build_environment.py | 1 + setup/extensions.json | 4 +- src/calibre/utils/podofo/doc.cpp | 442 ++++++++++++-------------- src/calibre/utils/podofo/fonts.cpp | 216 +++++++------ src/calibre/utils/podofo/global.h | 47 ++- src/calibre/utils/podofo/images.cpp | 43 +-- src/calibre/utils/podofo/impose.cpp | 26 +- src/calibre/utils/podofo/outline.cpp | 28 +- src/calibre/utils/podofo/outlines.cpp | 36 +-- src/calibre/utils/podofo/output.cpp | 24 +- src/calibre/utils/podofo/podofo.cpp | 27 -- src/calibre/utils/podofo/utils.cpp | 15 +- 13 files changed, 454 insertions(+), 463 deletions(-) diff --git a/setup/build.py b/setup/build.py index b67803b90f..b6e7aed512 100644 --- a/setup/build.py +++ b/setup/build.py @@ -231,8 +231,12 @@ class Environment(NamedTuple): def lib_dirs_to_ldflags(self, dirs) -> List[str]: return [self.libdir_prefix+x for x in dirs if x] - def libraries_to_ldflags(self, dirs): - return [self.lib_prefix+x+self.lib_suffix for x in dirs] + def libraries_to_ldflags(self, libs): + def map_name(x): + if '/' in x: + return x + return self.lib_prefix+x+self.lib_suffix + return list(map(map_name, libs)) diff --git a/setup/build_environment.py b/setup/build_environment.py index dc0027bb72..e9d5ddfa85 100644 --- a/setup/build_environment.py +++ b/setup/build_environment.py @@ -209,6 +209,7 @@ else: podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib) podofo_inc = os.environ.get('PODOFO_INC_DIR', podofo_inc) +podofo = os.environ.get('PODOFO_LIB_NAME', 'podofo') podofo_error = None if os.path.exists(os.path.join(podofo_inc, 'podofo.h')) else \ ('PoDoFo not found on your system. Various PDF related', ' functionality will not work. Use the PODOFO_INC_DIR and', diff --git a/setup/extensions.json b/setup/extensions.json index c7260f8d51..cf7d145b66 100644 --- a/setup/extensions.json +++ b/setup/extensions.json @@ -120,11 +120,11 @@ "name": "podofo", "sources": "calibre/utils/podofo/utils.cpp calibre/utils/podofo/output.cpp calibre/utils/podofo/doc.cpp calibre/utils/podofo/outline.cpp calibre/utils/podofo/fonts.cpp calibre/utils/podofo/impose.cpp calibre/utils/podofo/images.cpp calibre/utils/podofo/outlines.cpp calibre/utils/podofo/podofo.cpp", "headers": "calibre/utils/podofo/global.h", - "libraries": "podofo", + "libraries": "!podofo", "lib_dirs": "!podofo_lib_dirs", "inc_dirs": "!podofo_inc_dirs", "error": "!podofo_error", - "needs_c++": "11" + "needs_c++": "17" }, { "name": "html_as_json", diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 25f814a3c4..1b9eeac68b 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -7,6 +7,8 @@ #include "global.h" #include +#include +#include using namespace pdf; @@ -41,11 +43,7 @@ PDFDoc_load(PDFDoc *self, PyObject *args) { if (!PyArg_ParseTuple(args, "y#", &buffer, &size)) return NULL; try { -#if PODOFO_VERSION <= 0x000905 - self->doc->Load(buffer, (long)size); -#else - self->doc->LoadFromBuffer(buffer, (long)size); -#endif + self->doc->LoadFromBuffer(bufferview(buffer, size)); } catch(const PdfError & err) { podofo_set_exception(err); return NULL; @@ -84,7 +82,7 @@ PDFDoc_save(PDFDoc *self, PyObject *args) { if (PyArg_ParseTuple(args, "s", &buffer)) { try { - self->doc->Write(buffer); + self->doc->Save(buffer); } catch(const PdfError & err) { podofo_set_exception(err); return NULL; @@ -94,16 +92,43 @@ PDFDoc_save(PDFDoc *self, PyObject *args) { Py_RETURN_NONE; } +class BytesOutputDevice : public OutputStreamDevice { + private: + pyunique_ptr bytes; + size_t written; + public: + BytesOutputDevice() : bytes(PyBytes_FromStringAndSize(NULL, 1 * 1024 *1024)) { SetAccess(DeviceAccess::Write); } + size_t GetLength() const { return written; } + size_t GetPosition() const { return written; } + size_t capacity() const { return bytes ? PyBytes_GET_SIZE(bytes.get()) : 0; } + bool Eof() const { return false; } + + void writeBuffer(const char* src, size_t src_sz) { + if (written + src_sz > capacity()) { + PyObject* old = bytes.release(); + if (_PyBytes_Resize(&old, std::max(written + src_sz, 2 * capacity())) != 0) { + return; + } + bytes.reset(old); + } + if (bytes) { + memcpy(PyBytes_AS_STRING(bytes.get()), src, src_sz); + written += src_sz; + } + } + + void Flush() { } + PyObject* Release() { return bytes.release(); } +}; + static PyObject * PDFDoc_write(PDFDoc *self, PyObject *args) { PyObject *ans; + BytesOutputDevice d; try { - PdfRefCountedBuffer buffer(1*1024*1024); - PdfOutputDevice out(&buffer); - self->doc->Write(&out); - ans = PyBytes_FromStringAndSize(buffer.GetBuffer(), out.Tell()); - if (ans == NULL) PyErr_NoMemory(); + self->doc->Save(d); + return d.Release(); } catch(const PdfError &err) { podofo_set_exception(err); return NULL; @@ -124,11 +149,25 @@ PDFDoc_save_to_fileobj(PDFDoc *self, PyObject *args) { static PyObject * PDFDoc_uncompress_pdf(PDFDoc *self, PyObject *args) { - for (auto &it : self->doc->GetObjects()) { - if(it->HasStream()) { - PdfMemStream* stream = dynamic_cast(it->GetStream()); - stream->Uncompress(); + try { + auto& objects = self->doc->GetObjects(); + for (auto obj : objects) { + auto stream = obj->GetStream(); + if (stream == nullptr) continue; + try { + try { + stream->Unwrap(); + } catch (PdfError& e) { + if (e.GetCode() != PdfErrorCode::Flate) throw e; + } + } + catch (PdfError& e) { + if (e.GetCode() != PdfErrorCode::UnsupportedFilter) throw e; + } } + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; } Py_RETURN_NONE; } @@ -140,7 +179,8 @@ PDFDoc_uncompress_pdf(PDFDoc *self, PyObject *args) { static PyObject * PDFDoc_extract_first_page(PDFDoc *self, PyObject *args) { try { - while (self->doc->GetPageCount() > 1) self->doc->GetPagesTree()->DeletePage(1); + auto pages = &self->doc->GetPages(); + while (pages->GetCount() > 1) pages->RemovePageAt(1); } catch(const PdfError & err) { podofo_set_exception(err); return NULL; @@ -154,7 +194,7 @@ static PyObject * PDFDoc_page_count(PDFDoc *self, PyObject *args) { int count; try { - count = self->doc->GetPageCount(); + count = self->doc->GetPages().GetCount(); } catch(const PdfError & err) { podofo_set_exception(err); return NULL; @@ -173,8 +213,8 @@ PDFDoc_image_count(PDFDoc *self, PyObject *args) { if( it->IsDictionary() ) { obj_type = it->GetDictionary().GetKey( PdfName::KeyType ); obj_sub_type = it->GetDictionary().GetKey( PdfName::KeySubtype ); - if( ( obj_type && obj_type->IsName() && ( obj_type->GetName().GetName() == "XObject" ) ) || - ( obj_sub_type && obj_sub_type->IsName() && ( obj_sub_type->GetName().GetName() == "Image" ) ) ) count++; + if( ( obj_type && obj_type->IsName() && ( obj_type->GetName().GetString() == "XObject" ) ) || + ( obj_sub_type && obj_sub_type->IsName() && ( obj_sub_type->GetName().GetString() == "Image" ) ) ) count++; } } } catch(const PdfError & err) { @@ -190,7 +230,9 @@ PDFDoc_delete_pages(PDFDoc *self, PyObject *args) { int page = 0, count = 1; if (PyArg_ParseTuple(args, "i|i", &page, &count)) { try { - self->doc->DeletePages(page - 1, count); + while (count > 0) { + self->doc->GetPages().RemovePageAt(page - 1); + } } catch(const PdfError & err) { podofo_set_exception(err); return NULL; @@ -207,10 +249,9 @@ PDFDoc_get_page_box(PDFDoc *self, PyObject *args) { const char *which; if (PyArg_ParseTuple(args, "si", &which, &pagenum)) { try { - PdfPagesTree* tree = self->doc->GetPagesTree(); - PdfPage* page = tree->GetPage(pagenum - 1); - if (!page) { PyErr_Format(PyExc_ValueError, "page number %d not found in PDF file", pagenum); return NULL; } - PdfRect rect; + auto page = get_page(self->doc, pagenum-1); + if (!page) { PyErr_Format(PyExc_ValueError, "page number %d not found in PDF file", pagenum); return NULL; } + Rect rect; if (strcmp(which, "MediaBox") == 0) { rect = page->GetMediaBox(); } else if (strcmp(which, "CropBox") == 0) { @@ -225,7 +266,7 @@ PDFDoc_get_page_box(PDFDoc *self, PyObject *args) { PyErr_Format(PyExc_KeyError, "%s is not a known box", which); return NULL; } - return Py_BuildValue("dddd", rect.GetLeft(), rect.GetBottom(), rect.GetWidth(), rect.GetHeight()); + return Py_BuildValue("dddd", rect.GetLeft(), rect.GetBottom(), rect.Width, rect.Height); } catch(const PdfError & err) { podofo_set_exception(err); return NULL; @@ -243,13 +284,12 @@ PDFDoc_set_page_box(PDFDoc *self, PyObject *args) { const char *which; if (PyArg_ParseTuple(args, "sidddd", &which, &pagenum, &left, &bottom, &width, &height)) { try { - PdfPagesTree* tree = self->doc->GetPagesTree(); - PdfPage* page = tree->GetPage(pagenum - 1); - if (!page) { PyErr_Format(PyExc_ValueError, "page number %d not found in PDF file", pagenum); return NULL; } - PdfRect rect(left, bottom, width, height); - PdfObject box; - rect.ToVariant(box); - page->GetObject()->GetDictionary().AddKey(PdfName(which), box); + PdfPage* page = get_page(self->doc, pagenum-1); + if (!page) { PyErr_Format(PyExc_ValueError, "page number %d not found in PDF file", pagenum); return NULL; } + Rect rect(left, bottom, width, height); + PdfArray box; + rect.ToArray(box); + page->GetObject().GetDictionary().AddKey(PdfName(which), box); Py_RETURN_NONE; } catch(const PdfError & err) { podofo_set_exception(err); @@ -266,9 +306,7 @@ PDFDoc_copy_page(PDFDoc *self, PyObject *args) { int from = 0, to = 0; if (!PyArg_ParseTuple(args, "ii", &from, &to)) return NULL; try { - PdfPagesTree* tree = self->doc->GetPagesTree(); - PdfPage* page = tree->GetPage(from - 1); - tree->InsertPage(to - 1, page); + self->doc->GetPages().InsertDocumentPageAt(to - 1, *self->doc, from - 1); } catch(const PdfError & err) { podofo_set_exception(err); return NULL; @@ -287,14 +325,14 @@ PDFDoc_append(PDFDoc *self, PyObject *args) { typ = PyObject_IsInstance(doc, (PyObject*)&PDFDocType); if (typ == -1) return NULL; if (typ == 0) { PyErr_SetString(PyExc_TypeError, "You must pass a PDFDoc instance to this method"); return NULL; } + PDFDoc *pdfdoc = (PDFDoc*)doc; try { - self->doc->Append(*((PDFDoc*)doc)->doc, true); + self->doc->GetPages().AppendDocumentPages(*pdfdoc->doc); } catch (const PdfError & err) { podofo_set_exception(err); return NULL; } - Py_RETURN_NONE; } // }}} @@ -307,7 +345,7 @@ PDFDoc_insert_existing_page(PDFDoc *self, PyObject *args) { if (!PyArg_ParseTuple(args, "O!|ii", &PDFDocType, &src_doc, &src_page, &at)) return NULL; try { - self->doc->InsertExistingPageAt(*src_doc->doc, src_page, at); + self->doc->GetPages().InsertDocumentPageAt(at, *src_doc->doc, src_page); } catch (const PdfError & err) { podofo_set_exception(err); return NULL; @@ -323,12 +361,11 @@ PDFDoc_set_box(PDFDoc *self, PyObject *args) { double left, bottom, width, height; char *box; if (!PyArg_ParseTuple(args, "isdddd", &num, &box, &left, &bottom, &width, &height)) return NULL; - try { - PdfRect r(left, bottom, width, height); - PdfObject o; - r.ToVariant(o); - self->doc->GetPage(num)->GetObject()->GetDictionary().AddKey(PdfName(box), o); + Rect r(left, bottom, width, height); + PdfArray o; + r.ToArray(o); + self->doc->GetPages().GetPageAt(num).GetObject().GetDictionary().AddKey(PdfName(box), o); } catch(const PdfError & err) { podofo_set_exception(err); return NULL; @@ -336,41 +373,21 @@ PDFDoc_set_box(PDFDoc *self, PyObject *args) { PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the box"); return NULL; } - Py_RETURN_NONE; } // }}} // get_xmp_metadata() {{{ static PyObject * PDFDoc_get_xmp_metadata(PDFDoc *self, PyObject *args) { - PoDoFo::PdfObject *metadata = NULL; - PoDoFo::PdfStream *str = NULL; - PoDoFo::pdf_long len = 0; - char *buf = NULL; - PyObject *ans = NULL; - try { - if ((metadata = self->doc->GetMetadata()) != NULL) { - if ((str = metadata->GetStream()) != NULL) { - str->GetFilteredCopy(&buf, &len); - if (buf != NULL) { - Py_ssize_t psz = len; - ans = Py_BuildValue("y#", buf, psz); - free(buf); buf = NULL; - if (ans == NULL) goto error; - } - } - } + auto s = self->doc->GetCatalog().GetMetadataStreamValue(); + return PyBytes_FromStringAndSize(s.data(), s.size()); } catch(const PdfError & err) { - podofo_set_exception(err); goto error; + podofo_set_exception(err); return NULL; } catch (...) { - PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to read the XML metadata"); goto error; + PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to read the XML metadata"); return NULL; } - - if (ans != NULL) return ans; Py_RETURN_NONE; -error: - return NULL; } // }}} // set_xmp_metadata() {{{ @@ -378,85 +395,58 @@ static PyObject * PDFDoc_set_xmp_metadata(PDFDoc *self, PyObject *args) { const char *raw = NULL; Py_ssize_t len = 0; - PoDoFo::PdfObject *metadata = NULL, *catalog = NULL; - PoDoFo::PdfStream *str = NULL; - TVecFilters compressed(1); - compressed[0] = ePdfFilter_FlateDecode; - if (!PyArg_ParseTuple(args, "y#", &raw, &len)) return NULL; try { - if ((metadata = self->doc->GetMetadata()) != NULL) { - if ((str = metadata->GetStream()) == NULL) { PyErr_NoMemory(); goto error; } - str->Set(raw, len, compressed); - } else { - if ((catalog = self->doc->GetCatalog()) == NULL) { PyErr_SetString(PyExc_ValueError, "Cannot set XML metadata as this document has no catalog"); goto error; } - if ((metadata = self->doc->GetObjects().CreateObject("Metadata")) == NULL) { PyErr_NoMemory(); goto error; } - if ((str = metadata->GetStream()) == NULL) { PyErr_NoMemory(); goto error; } - metadata->GetDictionary().AddKey(PoDoFo::PdfName("Subtype"), PoDoFo::PdfName("XML")); - str->Set(raw, len, compressed); - catalog->GetDictionary().AddKey(PoDoFo::PdfName("Metadata"), metadata->Reference()); - } + self->doc->GetCatalog().SetMetadataStreamValue(std::string_view(raw, len)); } catch(const PdfError & err) { - podofo_set_exception(err); goto error; + podofo_set_exception(err); return NULL; } catch (...) { - PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the XML metadata"); - goto error; + PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the XML metadata"); return NULL; } Py_RETURN_NONE; -error: - return NULL; - } // }}} // extract_anchors() {{{ static PyObject * PDFDoc_extract_anchors(PDFDoc *self, PyObject *args) { - const PdfObject* catalog = NULL; PyObject *ans = PyDict_New(); if (ans == NULL) return NULL; try { - if ((catalog = self->doc->GetCatalog()) != NULL) { - const PdfObject *dests_ref = catalog->GetDictionary().GetKey("Dests"); - PdfPagesTree *tree = self->doc->GetPagesTree(); - if (dests_ref && dests_ref->IsReference()) { - const PdfObject *dests_obj = self->doc->GetObjects().GetObject(dests_ref->GetReference()); - if (dests_obj && dests_obj->IsDictionary()) { - const PdfDictionary &dests = dests_obj->GetDictionary(); - const TKeyMap &keys = dests.GetKeys(); - for (TCIKeyMap itres = keys.begin(); itres != keys.end(); ++itres) { - if (itres->second->IsArray()) { - const PdfArray &dest = itres->second->GetArray(); - // see section 8.2 of PDF spec for different types of destination arrays - // but chromium apparently generates only [page /XYZ left top zoom] type arrays - if (dest.GetSize() > 4 && dest[1].IsName() && dest[1].GetName().GetName() == "XYZ") { - const PdfPage *page = tree->GetPage(dest[0].GetReference()); - if (page) { - unsigned int pagenum = page->GetPageNumber(); - double left = dest[2].GetReal(), top = dest[3].GetReal(); - long long zoom = dest[4].GetNumber(); - const std::string &anchor = itres->first.GetName(); - PyObject *key = PyUnicode_DecodeUTF8(anchor.c_str(), anchor.length(), "replace"); - PyObject *tuple = Py_BuildValue("IddL", pagenum, left, top, zoom); - if (!tuple || !key) { break; } - int ret = PyDict_SetItem(ans, key, tuple); - Py_DECREF(key); Py_DECREF(tuple); - if (ret != 0) break; - } - } - } - } - } - } - } + const PdfObject *dests_ref = self->doc->GetCatalog().GetDictionary().GetKey("Dests"); + auto& pages = self->doc->GetPages(); + if (dests_ref && dests_ref->IsReference()) { + const PdfObject *dests_obj = self->doc->GetObjects().GetObject(dests_ref->GetReference()); + if (dests_obj && dests_obj->IsDictionary()) { + const PdfDictionary &dests = dests_obj->GetDictionary(); + for (auto itres: dests) { + if (itres.second.IsArray()) { + const PdfArray &dest = itres.second.GetArray(); + // see section 8.2 of PDF spec for different types of destination arrays + // but chromium apparently generates only [page /XYZ left top zoom] type arrays + if (dest.GetSize() > 4 && dest[1].IsName() && dest[1].GetName().GetString() == "XYZ") { + const PdfPage *page = get_page(pages, dest[0].GetReference()); + if (page) { + unsigned int pagenum = page->GetPageNumber(); + double left = dest[2].GetReal(), top = dest[3].GetReal(); + long long zoom = dest[4].GetNumber(); + const std::string &anchor = itres.first.GetString(); + PyObject *key = PyUnicode_DecodeUTF8(anchor.c_str(), anchor.length(), "replace"); + PyObject *tuple = Py_BuildValue("IddL", pagenum, left, top, zoom); + if (!tuple || !key) { break; } + int ret = PyDict_SetItem(ans, key, tuple); + Py_DECREF(key); Py_DECREF(tuple); + if (ret != 0) break; + } + } + } + } + } + } } catch(const PdfError & err) { podofo_set_exception(err); - Py_CLEAR(ans); - return NULL; } catch (...) { PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the box"); - Py_CLEAR(ans); - return NULL; } if (PyErr_Occurred()) { Py_CLEAR(ans); return NULL; } return ans; @@ -472,28 +462,22 @@ alter_link(PDFDoc *self, PdfDictionary &link, PyObject *alter_callback, bool mar } PdfDictionary &A = link.GetKey("A")->GetDictionary(); PdfObject *uo = A.GetKey("URI"); - const std::string &uri = uo->GetString().GetStringUtf8(); + const std::string &uri = uo->GetString().GetString(); pyunique_ptr ret(PyObject_CallObject(alter_callback, Py_BuildValue("(N)", PyUnicode_DecodeUTF8(uri.c_str(), uri.length(), "replace")))); if (!ret) { return; } if (PyTuple_Check(ret.get()) && PyTuple_GET_SIZE(ret.get()) == 4) { int pagenum; double left, top, zoom; if (PyArg_ParseTuple(ret.get(), "iddd", &pagenum, &left, &top, &zoom)) { - PdfPage *page = NULL; - try { - page = self->doc->GetPage(pagenum - 1); - } catch(const PdfError &err) { - (void)err; - PyErr_Format(PyExc_ValueError, "No page number %d in the PDF file of %d pages", pagenum, self->doc->GetPageCount()); - return ; + const PdfPage *page = get_page(self->doc, pagenum - 1); + if (page == NULL) { + PyErr_Format(PyExc_ValueError, "No page number %d in the PDF file of %d pages", pagenum, self->doc->GetPages().GetCount()); + return; } - if (page) { - PdfDestination dest(page, left, top, zoom); link.RemoveKey("A"); + PdfDestination dest(*page, left, top, zoom); dest.AddToDictionary(link); - } } } - } static PyObject * @@ -504,8 +488,8 @@ PDFDoc_alter_links(PDFDoc *self, PyObject *args) { bool mark_links = PyObject_IsTrue(py_mark_links); try { PdfArray border, link_color; - border.push_back((PoDoFo::pdf_int64)16); border.push_back((PoDoFo::pdf_int64)16); border.push_back((PoDoFo::pdf_int64)1); - link_color.push_back(1.); link_color.push_back(0.); link_color.push_back(0.); + border.Add(int64_t(16)); border.Add(int64_t(16)); border.Add(int64_t(1)); + link_color.Add(1.); link_color.Add(0.); link_color.Add(0.); std::vector links; for (auto &it : self->doc->GetObjects()) { if(it->IsDictionary()) { @@ -516,7 +500,7 @@ PDFDoc_alter_links(PDFDoc *self, PyObject *args) { if (dictionary_has_key_name(A, PdfName::KeyType, "Action") && dictionary_has_key_name(A, "S", "URI")) { PdfObject *uo = A.GetKey("URI"); if (uo && uo->IsString()) { - links.push_back(it->Reference()); + links.push_back(it->GetReference()); } } } @@ -547,153 +531,137 @@ PDFDoc_alter_links(PDFDoc *self, PyObject *args) { static PyObject * PDFDoc_pages_getter(PDFDoc *self, void *closure) { - int pages = self->doc->GetPageCount(); - PyObject *ans = PyLong_FromLong(static_cast(pages)); + unsigned long pages = self->doc->GetPages().GetCount(); + PyObject *ans = PyLong_FromUnsignedLong(pages); if (ans != NULL) Py_INCREF(ans); return ans; } static PyObject * PDFDoc_version_getter(PDFDoc *self, void *closure) { - int version; + PdfVersion version; try { - version = self->doc->GetPdfVersion(); + version = self->doc->GetMetadata().GetPdfVersion(); } catch(const PdfError & err) { podofo_set_exception(err); return NULL; } switch(version) { - case ePdfVersion_1_0: - return Py_BuildValue("s", "1.0"); - case ePdfVersion_1_1: - return Py_BuildValue("s", "1.1"); - case ePdfVersion_1_2: - return Py_BuildValue("s", "1.2"); - case ePdfVersion_1_3: - return Py_BuildValue("s", "1.3"); - case ePdfVersion_1_4: - return Py_BuildValue("s", "1.4"); - case ePdfVersion_1_5: - return Py_BuildValue("s", "1.5"); - case ePdfVersion_1_6: - return Py_BuildValue("s", "1.6"); - case ePdfVersion_1_7: - return Py_BuildValue("s", "1.7"); - default: - return Py_BuildValue(""); + case PdfVersion::V1_0: + return PyUnicode_FromString("1.0"); + case PdfVersion::V1_1: + return PyUnicode_FromString("1.1"); + case PdfVersion::V1_2: + return PyUnicode_FromString("1.2"); + case PdfVersion::V1_3: + return PyUnicode_FromString("1.3"); + case PdfVersion::V1_4: + return PyUnicode_FromString("1.4"); + case PdfVersion::V1_5: + return PyUnicode_FromString("1.5"); + case PdfVersion::V1_6: + return PyUnicode_FromString("1.6"); + case PdfVersion::V1_7: + return PyUnicode_FromString("1.7"); + case PdfVersion::V2_0: + return PyUnicode_FromString("2.0"); + case PdfVersion::Unknown: + return PyUnicode_FromString(""); } - return Py_BuildValue(""); + return PyUnicode_FromString(""); } - -static PyObject * -PDFDoc_getter(PDFDoc *self, int field) -{ - PdfString s; - PdfInfo *info = self->doc->GetInfo(); - if (info == NULL) { - PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); - return NULL; - } - switch (field) { - case 0: - s = info->GetTitle(); break; - case 1: - s = info->GetAuthor(); break; - case 2: - s = info->GetSubject(); break; - case 3: - s = info->GetKeywords(); break; - case 4: - s = info->GetCreator(); break; - case 5: - s = info->GetProducer(); break; - default: - PyErr_SetString(PyExc_Exception, "Bad field"); - return NULL; - } - - return podofo_convert_pdfstring(s); -} - -static int -PDFDoc_setter(PDFDoc *self, PyObject *val, int field) { - if (val == NULL || !PyUnicode_Check(val)) { - PyErr_SetString(PyExc_ValueError, "Must use unicode objects to set metadata"); - return -1; - } - PdfInfo *info = self->doc->GetInfo(); - if (!info) { PyErr_SetString(Error, "You must first load a PDF Document"); return -1; } - const PdfString s = podofo_convert_pystring(val); - - switch (field) { - case 0: - info->SetTitle(s); break; - case 1: - info->SetAuthor(s); break; - case 2: - info->SetSubject(s); break; - case 3: - info->SetKeywords(s); break; - case 4: - info->SetCreator(s); break; - case 5: - info->SetProducer(s); break; - default: - PyErr_SetString(Error, "Bad field"); - return -1; - } - - return 0; +static inline PyObject* +string_metadata_getter(const nullable& t) { + if (t.has_value()) return podofo_convert_pdfstring(t.value()); + return PyUnicode_FromString(""); } static PyObject * PDFDoc_title_getter(PDFDoc *self, void *closure) { - return PDFDoc_getter(self, 0); + return string_metadata_getter(self->doc->GetMetadata().GetTitle()); } + static PyObject * PDFDoc_author_getter(PDFDoc *self, void *closure) { - return PDFDoc_getter(self, 1); + return string_metadata_getter(self->doc->GetMetadata().GetAuthor()); } + static PyObject * PDFDoc_subject_getter(PDFDoc *self, void *closure) { - return PDFDoc_getter(self, 2); + return string_metadata_getter(self->doc->GetMetadata().GetSubject()); } + static PyObject * PDFDoc_keywords_getter(PDFDoc *self, void *closure) { - return PDFDoc_getter(self, 3); + auto kw = self->doc->GetMetadata().GetKeywords(); + pyunique_ptr ans(PyTuple_New(kw.size())); + if (!ans) return NULL; + for (size_t i = 0; i < kw.size(); i++) { + pyunique_ptr t(PyUnicode_FromString(kw[i].c_str())); + if (!t) return NULL; + PyTuple_SET_ITEM(ans.get(), i, t.release()); + } + return ans.release(); } + static PyObject * PDFDoc_creator_getter(PDFDoc *self, void *closure) { - return PDFDoc_getter(self, 4); + return string_metadata_getter(self->doc->GetMetadata().GetCreator()); } + static PyObject * PDFDoc_producer_getter(PDFDoc *self, void *closure) { - return PDFDoc_getter(self, 5); + return string_metadata_getter(self->doc->GetMetadata().GetProducer()); } + static int PDFDoc_title_setter(PDFDoc *self, PyObject *val, void *closure) { - return PDFDoc_setter(self, val, 0); + if (!PyUnicode_Check(val)) { PyErr_SetString(PyExc_TypeError, "Must use unicode to set metadata"); return -1; } + self->doc->GetMetadata().SetTitle(podofo_convert_pystring(val)); + return 0; } + static int PDFDoc_author_setter(PDFDoc *self, PyObject *val, void *closure) { - return PDFDoc_setter(self, val, 1); + if (!PyUnicode_Check(val)) { PyErr_SetString(PyExc_TypeError, "Must use unicode to set metadata"); return -1; } + self->doc->GetMetadata().SetAuthor(podofo_convert_pystring(val)); + return 0; } + static int PDFDoc_subject_setter(PDFDoc *self, PyObject *val, void *closure) { - return PDFDoc_setter(self, val, 2); + if (!PyUnicode_Check(val)) { PyErr_SetString(PyExc_TypeError, "Must use unicode to set metadata"); return -1; } + self->doc->GetMetadata().SetSubject(podofo_convert_pystring(val)); + return 0; } + static int PDFDoc_keywords_setter(PDFDoc *self, PyObject *val, void *closure) { - return PDFDoc_setter(self, val, 3); + pyunique_ptr f(PySequence_Fast(val, "Need a sequence to set keywords")); + if (!f) return -1; + std::vector keywords(PySequence_Fast_GET_SIZE(f.get())); + for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(f.get()); i++) { + PyObject *x = PySequence_Fast_GET_ITEM(f.get(), i); + if (!PyUnicode_Check(x)) { PyErr_SetString(PyExc_TypeError, "keywords sequence must contain only unicode objects"); return -1; } + keywords.emplace_back(podofo_convert_pystring(x)); + } + self->doc->GetMetadata().SetKeywords(keywords); + return 0; } + static int PDFDoc_creator_setter(PDFDoc *self, PyObject *val, void *closure) { - return PDFDoc_setter(self, val, 4); + if (!PyUnicode_Check(val)) { PyErr_SetString(PyExc_TypeError, "Must use unicode to set metadata"); return -1; } + self->doc->GetMetadata().SetCreator(podofo_convert_pystring(val)); + return 0; } + static int PDFDoc_producer_setter(PDFDoc *self, PyObject *val, void *closure) { - return PDFDoc_setter(self, val, 5); + if (!PyUnicode_Check(val)) { PyErr_SetString(PyExc_TypeError, "Must use unicode to set metadata"); return -1; } + self->doc->GetMetadata().SetProducer(podofo_convert_pystring(val)); + return 0; } static PyGetSetDef PDFDoc_getsetters[] = { diff --git a/src/calibre/utils/podofo/fonts.cpp b/src/calibre/utils/podofo/fonts.cpp index 6c18dca914..ba673f4d92 100644 --- a/src/calibre/utils/podofo/fonts.cpp +++ b/src/calibre/utils/podofo/fonts.cpp @@ -7,6 +7,7 @@ #include "global.h" #include +#include #include using namespace pdf; @@ -18,47 +19,61 @@ ref_as_tuple(const PdfReference &ref) { } static inline PdfObject* -get_font_file(const PdfObject *descriptor) { - PdfObject *ff = descriptor->GetIndirectKey("FontFile"); - if (!ff) ff = descriptor->GetIndirectKey("FontFile2"); - if (!ff) ff = descriptor->GetIndirectKey("FontFile3"); +get_font_file(PdfObject *descriptor) { + PdfDictionary *dict; + PdfObject *ff = NULL; + if (descriptor->TryGetDictionary(dict)) { + ff = dict->FindKey("FontFile"); + if (!ff) ff = dict->FindKey("FontFile2"); + if (!ff) ff = dict->FindKey("FontFile3"); + } return ff; } -static inline void -remove_font(PdfVecObjects &objects, PdfObject *font) { - PdfObject *descriptor = font->GetIndirectKey("FontDescriptor"); - if (descriptor) { - const PdfObject *ff = get_font_file(descriptor); - if (ff) delete objects.RemoveObject(ff->Reference()); - delete objects.RemoveObject(descriptor->Reference()); +static inline const PdfObject* +get_font_file(const PdfObject *descriptor) { + const PdfDictionary *dict; + const PdfObject *ff = NULL; + if (descriptor->TryGetDictionary(dict)) { + ff = dict->FindKey("FontFile"); + if (!ff) ff = dict->FindKey("FontFile2"); + if (!ff) ff = dict->FindKey("FontFile3"); } - delete objects.RemoveObject(font->Reference()); + return ff; } -static inline uint64_t -ref_as_integer(pdf_objnum num, pdf_gennum gen) { - return static_cast(num) | (static_cast(gen) << 32); -} -static inline uint64_t -ref_as_integer(const PdfReference &ref) { return ref_as_integer(ref.ObjectNumber(), ref.GenerationNumber()); } +static inline void +remove_font(PdfIndirectObjectList &objects, PdfObject *font) { + PdfDictionary *dict; + if (font->TryGetDictionary(dict)) { + PdfObject *descriptor = dict->FindKey("FontDescriptor"); + if (descriptor) { + const PdfObject *ff = get_font_file(descriptor); + if (ff) objects.RemoveObject(ff->GetReference()).reset(); + objects.RemoveObject(descriptor->GetReference()).reset(); + } + } + objects.RemoveObject(font->GetReference()).reset(); +} static void -used_fonts_in_canvas(PdfCanvas *canvas, unordered_reference_set &ans) { - PdfContentsTokenizer tokenizer(canvas); +used_fonts_in_canvas(const PdfCanvas &canvas, unordered_reference_set &ans) { + PdfPostScriptTokenizer tokenizer; + PdfCanvasInputDevice input(canvas); bool in_text_block = false; - const char* token = NULL; - EPdfContentsType contents_type; + PdfPostScriptTokenType contents_type; PdfVariant var; std::stack stack; - const PdfDictionary &resources = canvas->GetResources()->GetDictionary(); + const PdfDictionary &resources = canvas.GetResources()->GetDictionary(); if (!resources.HasKey("Font")) return; const PdfDictionary &fonts_dict = resources.GetKey("Font")->GetDictionary(); + std::string_view keyword; - while (tokenizer.ReadNext(contents_type, token, var)) { - if (contents_type == ePdfContentsType_Variant) stack.push(var); - if (contents_type != ePdfContentsType_Keyword) continue; + while (tokenizer.TryReadNext(input, contents_type, keyword, var)) { + if (contents_type == PdfPostScriptTokenType::Variant) stack.push(var); + if (contents_type != PdfPostScriptTokenType::Keyword) continue; + const char *token = keyword.data(); if (strcmp(token, "BT") == 0) { in_text_block = true; continue; @@ -88,10 +103,10 @@ convert_w_array(const PdfArray &w) { pyunique_ptr item; if ((*it).IsArray()) { item.reset(convert_w_array((*it).GetArray())); + } else if ((*it).IsRealStrict()) { + item.reset(PyFloat_FromDouble((*it).GetReal())); } else if ((*it).IsNumber()) { item.reset(PyLong_FromLongLong((long long)(*it).GetNumber())); - } else if ((*it).IsReal()) { - item.reset(PyFloat_FromDouble((*it).GetReal())); } else PyErr_SetString(PyExc_ValueError, "Unknown datatype in w array"); if (!item) return NULL; if (PyList_Append(ans.get(), item.get()) != 0) return NULL; @@ -105,16 +120,16 @@ list_fonts(PDFDoc *self, PyObject *args) { if (!PyArg_ParseTuple(args, "|i", &get_font_data)) return NULL; pyunique_ptr ans(PyList_New(0)); if (!ans) return NULL; - const PdfVecObjects &objects = self->doc->GetObjects(); + const PdfIndirectObjectList &objects = self->doc->GetObjects(); for (auto &it : objects) { if (it->IsDictionary()) { const PdfDictionary &dict = it->GetDictionary(); if (dictionary_has_key_name(dict, PdfName::KeyType, "Font") && dict.HasKey("BaseFont")) { - const std::string &name = dict.GetKey("BaseFont")->GetName().GetName(); - const std::string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetName(); - const PdfReference &ref = it->Reference(); + const std::string &name = dict.GetKey("BaseFont")->GetName().GetString(); + const std::string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetString(); + const PdfReference &ref = it->GetReference(); unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber(); - const PdfObject *descriptor = it->GetIndirectKey("FontDescriptor"); + const PdfObject *descriptor = dict.FindKey("FontDescriptor"); pyunique_ptr descendant_font, stream_ref, encoding, w, w2; PyBytesOutputStream stream_data, to_unicode, cid_gid_map; if (dict.HasKey("W")) { @@ -126,21 +141,21 @@ list_fonts(PDFDoc *self, PyObject *args) { if (!w2) return NULL; } if (dict.HasKey("Encoding") && dict.GetKey("Encoding")->IsName()) { - encoding.reset(PyUnicode_FromString(dict.GetKey("Encoding")->GetName().GetName().c_str())); + encoding.reset(PyUnicode_FromString(dict.GetKey("Encoding")->GetName().GetString().c_str())); if (!encoding) return NULL; } - if (dict.HasKey("CIDToGIDMap") && (!dict.GetKey("CIDToGIDMap")->IsName() || strcmp(dict.GetKey("CIDToGIDMap")->GetName().GetName().c_str(), "Identity") != 0)) { - const PdfStream *stream = dict.GetKey("CIDToGIDMap")->GetStream(); - if (stream) stream->GetFilteredCopy(&cid_gid_map); + if (dict.HasKey("CIDToGIDMap") && (!dict.GetKey("CIDToGIDMap")->IsName() || strcmp(dict.GetKey("CIDToGIDMap")->GetName().GetString().c_str(), "Identity") != 0)) { + const PdfObjectStream *stream = dict.GetKey("CIDToGIDMap")->GetStream(); + if (stream) stream->CopyToSafe(cid_gid_map); } if (descriptor) { const PdfObject *ff = get_font_file(descriptor); if (ff) { - stream_ref.reset(ref_as_tuple(ff->Reference())); + stream_ref.reset(ref_as_tuple(ff->GetReference())); if (!stream_ref) return NULL; - const PdfStream *stream = ff->GetStream(); + const PdfObjectStream *stream = ff->GetStream(); if (stream && get_font_data) { - stream->GetFilteredCopy(&stream_data); + stream->CopyToSafe(stream_data); } } } else if (dict.HasKey("DescendantFonts")) { @@ -151,8 +166,8 @@ list_fonts(PDFDoc *self, PyObject *args) { const PdfReference &uref = dict.GetKey("ToUnicode")->GetReference(); PdfObject *t = objects.GetObject(uref); if (t) { - PdfStream *stream = t->GetStream(); - if (stream) stream->GetFilteredCopy(&to_unicode); + PdfObjectStream *stream = t->GetStream(); + if (stream) stream->CopyToSafe(to_unicode); } } } @@ -186,18 +201,18 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) { unsigned long count = 0; unordered_reference_set used_fonts; // Look in Pages - for (int i = 0; i < self->doc->GetPageCount(); i++) { - PdfPage *page = self->doc->GetPage(i); - if (page) used_fonts_in_canvas(page, used_fonts); + PdfPageCollection *pages = &self->doc->GetPages(); + for (unsigned i = 0; i < pages->GetCount(); i++) { + used_fonts_in_canvas(self->doc->GetPages().GetPageAt(i), used_fonts); } // Look in XObjects - PdfVecObjects &objects = self->doc->GetObjects(); - for (auto &k : objects) { + PdfIndirectObjectList &objects = self->doc->GetObjects(); + for (PdfObject *k : objects) { if (k->IsDictionary()) { const PdfDictionary &dict = k->GetDictionary(); if (dictionary_has_key_name(dict, PdfName::KeyType, "XObject") && dictionary_has_key_name(dict, PdfName::KeySubtype, "Form")) { - PdfXObject xo(k); - used_fonts_in_canvas(&xo, used_fonts); + std::unique_ptr xo; + if (PdfXObject::TryCreateFromObject(*k, xo)) used_fonts_in_canvas(*xo, used_fonts); } } } @@ -208,14 +223,14 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) { if (k->IsDictionary()) { const PdfDictionary &dict = k->GetDictionary(); if (dictionary_has_key_name(dict, PdfName::KeyType, "Font")) { - const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetName(); + const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetString(); if (font_type == "Type0") { - all_fonts.insert(k->Reference()); + all_fonts.insert(k->GetReference()); } else if (font_type == "Type3") { - all_fonts.insert(k->Reference()); - type3_fonts.insert(k->Reference()); - for (auto &x : dict.GetKey("CharProcs")->GetDictionary().GetKeys()) { - const PdfReference &ref = x.second->GetReference(); + all_fonts.insert(k->GetReference()); + type3_fonts.insert(k->GetReference()); + for (auto &x : dict.GetKey("CharProcs")->GetDictionary()) { + const PdfReference &ref = x.second.GetReference(); if (charprocs_usage.find(ref) == charprocs_usage.end()) charprocs_usage[ref] = 1; else charprocs_usage[ref] += 1; } @@ -229,16 +244,18 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) { PdfObject *font = objects.GetObject(ref); if (font) { count++; + PdfDictionary *dict; + if (font->TryGetDictionary(dict)) { if (type3_fonts.find(ref) != type3_fonts.end()) { - for (auto &x : font->GetIndirectKey("CharProcs")->GetDictionary().GetKeys()) { - charprocs_usage[x.second->GetReference()] -= 1; + for (auto &x : dict->FindKey("CharProcs")->GetDictionary()) { + charprocs_usage[x.second.GetReference()] -= 1; } } else { - for (auto &x : font->GetIndirectKey("DescendantFonts")->GetArray()) { + for (auto &x : dict->FindKey("DescendantFonts")->GetArray()) { PdfObject *dfont = objects.GetObject(x.GetReference()); if (dfont) remove_font(objects, dfont); } - } + }} remove_font(objects, font); } } @@ -246,7 +263,7 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) { for (auto &x : charprocs_usage) { if (x.second == 0u) { - delete objects.RemoveObject(x.first); + objects.RemoveObject(x.first).reset(); } } @@ -258,14 +275,16 @@ replace_font_data(PDFDoc *self, PyObject *args) { const char *data; Py_ssize_t sz; unsigned long num, gen; if (!PyArg_ParseTuple(args, "y#kk", &data, &sz, &num, &gen)) return NULL; - const PdfVecObjects &objects = self->doc->GetObjects(); - PdfObject *font = objects.GetObject(PdfReference(num, static_cast(gen))); + const PdfIndirectObjectList &objects = self->doc->GetObjects(); + PdfObject *font = objects.GetObject(PdfReference(num, static_cast(gen))); if (!font) { PyErr_SetString(PyExc_KeyError, "No font with the specified reference found"); return NULL; } - const PdfObject *descriptor = font->GetIndirectKey("FontDescriptor"); + PdfDictionary *dict; + if (!font->TryGetDictionary(dict)) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; } + PdfObject *descriptor = dict->FindKey("FontDescriptor"); if (!descriptor) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; } PdfObject *ff = get_font_file(descriptor); - PdfStream *stream = ff->GetStream(); - stream->Set(data, sz); + PdfObjectStream *stream = ff->GetStream(); + stream->SetData(bufferview(data, sz)); Py_RETURN_NONE; } @@ -274,60 +293,61 @@ merge_fonts(PDFDoc *self, PyObject *args) { const char *data; Py_ssize_t sz; PyObject *references; if (!PyArg_ParseTuple(args, "y#O!", &data, &sz, &PyTuple_Type, &references)) return NULL; - PdfVecObjects &objects = self->doc->GetObjects(); + PdfIndirectObjectList &objects = self->doc->GetObjects(); PdfObject *font_file = NULL; + PdfDictionary *dict; for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(references); i++) { unsigned long num, gen; if (!PyArg_ParseTuple(PyTuple_GET_ITEM(references, i), "kk", &num, &gen)) return NULL; - PdfObject *font = objects.GetObject(PdfReference(num, static_cast(gen))); + PdfObject *font = objects.GetObject(PdfReference(num, static_cast(gen))); if (!font) { PyErr_SetString(PyExc_KeyError, "No font with the specified reference found"); return NULL; } - PdfObject *dobj = font->GetIndirectKey("FontDescriptor"); + + PdfObject *dobj = NULL; + if (font->TryGetDictionary(dict)) { dobj = dict->FindKey("FontDescriptor"); } if (!dobj) { PyErr_SetString(PyExc_ValueError, "Font does not have a descriptor"); return NULL; } if (!dobj->IsDictionary()) { PyErr_SetString(PyExc_ValueError, "Font does not have a dictionary descriptor"); return NULL; } PdfDictionary &descriptor = dobj->GetDictionary(); const char *font_file_key = NULL; - if (descriptor.HasKey("FontFile")) font_file_key = "FontFile"; - else if (descriptor.HasKey("FontFile2")) font_file_key = "FontFile2"; - else if (descriptor.HasKey("FontFile3")) font_file_key = "FontFile3"; - else { PyErr_SetString(PyExc_ValueError, "Font descriptor does not have file data"); return NULL; } - PdfObject *ff = dobj->GetIndirectKey(font_file_key); + PdfObject *ff = NULL; + if ((ff = descriptor.FindKey("FontFile"))) { font_file_key = "FontFile"; } + else if ((ff = descriptor.FindKey("FontFile2"))) { font_file_key = "FontFile2"; } + else if ((ff = descriptor.FindKey("FontFile3"))) { font_file_key = "FontFile3"; } + else { PyErr_SetString(PyExc_ValueError, "Font descriptor does not have file data"); return NULL; } if (i == 0) { font_file = ff; - PdfStream *stream = ff->GetStream(); - stream->Set(data, sz); + PdfObjectStream *stream = ff->GetStream(); + stream->SetData(bufferview(data, sz)); } else { - delete objects.RemoveObject(ff->Reference()); - descriptor.AddKey(font_file_key, font_file->Reference()); + objects.RemoveObject(ff->GetReference()).reset(); + descriptor.AddKey(font_file_key, font_file->GetReference()); } } Py_RETURN_NONE; } class CharProc { - char *buf; pdf_long sz; + charbuff buf; PdfReference ref; CharProc( const CharProc & ) ; CharProc & operator=( const CharProc & ) ; public: - CharProc(const PdfReference &reference, const PdfObject *o) : buf(NULL), sz(0), ref(reference) { - const PdfStream *stream = o->GetStream(); - stream->GetFilteredCopy(&buf, &sz); + CharProc(const PdfReference &reference, const PdfObject *o) : buf(), ref(reference) { + const PdfObjectStream *stream = o->GetStream(); + buf = stream->GetCopySafe(); } CharProc(CharProc &&other) noexcept : - buf(other.buf), sz(other.sz), ref(other.ref) { - other.buf = NULL; + buf(std::move(other.buf)), ref(other.ref) { + other.buf = charbuff(); } CharProc& operator=(CharProc &&other) noexcept { - if (buf) podofo_free(buf); - buf = other.buf; other.buf = NULL; sz = other.sz; ref = other.ref; + buf = std::move(other.buf); other.buf = charbuff(); ref = other.ref; return *this; } - ~CharProc() noexcept { if (buf) podofo_free(buf); buf = NULL; } bool operator==(const CharProc &other) const noexcept { - return other.sz == sz && memcmp(buf, other.buf, sz) == 0; + return buf.size() == other.buf.size() && memcmp(buf.data(), other.buf.data(), buf.size()) == 0; } - std::size_t hash() const noexcept { return sz; } + std::size_t hash() const noexcept { return buf.size(); } const PdfReference& reference() const noexcept { return ref; } }; @@ -344,16 +364,16 @@ dedup_type3_fonts(PDFDoc *self, PyObject *args) { unordered_reference_set all_type3_fonts; char_proc_reference_map cp_map; - PdfVecObjects &objects = self->doc->GetObjects(); + PdfIndirectObjectList &objects = self->doc->GetObjects(); for (auto &k : objects) { if (!k->IsDictionary()) continue; const PdfDictionary &dict = k->GetDictionary(); if (dictionary_has_key_name(dict, PdfName::KeyType, "Font")) { - const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetName(); + const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetString(); if (font_type == "Type3") { - all_type3_fonts.insert(k->Reference()); - for (auto &x : dict.GetKey("CharProcs")->GetDictionary().GetKeys()) { - const PdfReference &ref = x.second->GetReference(); + all_type3_fonts.insert(k->GetReference()); + for (auto &x : dict.GetKey("CharProcs")->GetDictionary()) { + const PdfReference &ref = x.second.GetReference(); const PdfObject *cpobj = objects.GetObject(ref); if (!cpobj || !cpobj->HasStream()) continue; CharProc cp(ref, cpobj); @@ -373,7 +393,7 @@ dedup_type3_fonts(PDFDoc *self, PyObject *args) { for (auto &ref : x.second) { if (ref != canonical_ref) { ref_map[ref] = x.first.reference(); - delete objects.RemoveObject(ref); + objects.RemoveObject(ref).reset(); count++; } } @@ -382,11 +402,13 @@ dedup_type3_fonts(PDFDoc *self, PyObject *args) { if (count > 0) { for (auto &ref : all_type3_fonts) { PdfObject *font = objects.GetObject(ref); - PdfDictionary dict = font->GetIndirectKey("CharProcs")->GetDictionary(); + PdfDictionary *d; + if (!font->TryGetDictionary(d)) continue; + PdfDictionary dict = d->FindKey("CharProcs")->GetDictionary(); PdfDictionary new_dict = PdfDictionary(dict); bool changed = false; - for (auto &k : dict.GetKeys()) { - auto it = ref_map.find(k.second->GetReference()); + for (auto &k : dict) { + auto it = ref_map.find(k.second.GetReference()); if (it != ref_map.end()) { new_dict.AddKey(k.first, (*it).second); changed = true; diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h index b723ef8504..77905af8a2 100644 --- a/src/calibre/utils/podofo/global.h +++ b/src/calibre/utils/podofo/global.h @@ -15,6 +15,7 @@ #include #include using namespace PoDoFo; +using namespace std::literals; namespace pdf { @@ -52,7 +53,7 @@ struct PyObjectDeleter { // unique_ptr that uses Py_XDECREF as the destructor function. typedef std::unique_ptr pyunique_ptr; -class PyBytesOutputStream : public PdfOutputStream { +class PyBytesOutputStream : public OutputStream { private: pyunique_ptr bytes; PyBytesOutputStream( const PyBytesOutputStream & ) ; @@ -62,18 +63,18 @@ class PyBytesOutputStream : public PdfOutputStream { void Close() {} operator bool() const { return bool(bytes); } PyObject* get() const { return bytes.get(); } - pdf_long Write(const char *buf, const pdf_long sz){ + protected: + void writeBuffer(const char *buf, size_t sz){ if (!bytes) { bytes.reset(PyBytes_FromStringAndSize(buf, sz)); - if (!bytes) throw PdfError(ePdfError_OutOfMemory, __FILE__, __LINE__, NULL); + if (!bytes) throw PdfError(PdfErrorCode::OutOfMemory, __FILE__, __LINE__, NULL); } else { size_t old_sz = PyBytes_GET_SIZE(bytes.get()); PyObject *old = bytes.release(); - if (_PyBytes_Resize(&old, old_sz + sz) != 0) throw PdfError(ePdfError_OutOfMemory, __FILE__, __LINE__, NULL); + if (_PyBytes_Resize(&old, old_sz + sz) != 0) throw PdfError(PdfErrorCode::OutOfMemory, __FILE__, __LINE__, NULL); memcpy(PyBytes_AS_STRING(old) + old_sz, buf, sz); bytes.reset(old); } - return sz; } }; @@ -82,10 +83,44 @@ template static inline bool dictionary_has_key_name(const PdfDictionary &d, T key, const char *name) { const PdfObject *val = d.GetKey(key); - if (val && val->IsName() && val->GetName().GetName() == name) return true; + if (val && val->IsName() && val->GetName().GetString() == name) return true; return false; } +static inline const PdfPage* +get_page(const PdfPageCollection &pages, const PdfReference &ref) { + try { + return &pages.GetPage(ref); + } catch(PdfError &) { } + return nullptr; +} + +static inline const PdfPage* +get_page(const PdfDocument *doc, const PdfReference &ref) { + try { + return &doc->GetPages().GetPage(ref); + } catch(PdfError &) { } + return nullptr; +} + +static inline const PdfPage* +get_page(const PdfDocument *doc, const unsigned num) { + try { + return &doc->GetPages().GetPageAt(num); + } catch(PdfError &) { } + return nullptr; +} + +static inline PdfPage* +get_page(PdfDocument *doc, const unsigned num) { + try { + return &doc->GetPages().GetPageAt(num); + } catch(PdfError &) { } + return nullptr; +} + + + class PdfReferenceHasher { public: size_t operator()(const PdfReference & obj) const { diff --git a/src/calibre/utils/podofo/images.cpp b/src/calibre/utils/podofo/images.cpp index 6868b2c3a7..f5d948a33d 100644 --- a/src/calibre/utils/podofo/images.cpp +++ b/src/calibre/utils/podofo/images.cpp @@ -10,39 +10,40 @@ using namespace pdf; class Image { - char *buf; pdf_long sz; - pdf_int64 width, height; + charbuff buf; + int64_t width, height; PdfReference ref; Image( const Image & ) ; Image & operator=( const Image & ) ; + bool is_valid; public: - Image(const PdfReference &reference, const PdfObject *o) : buf(NULL), sz(0), width(0), height(0), ref(reference) { - const PdfStream *stream = o->GetStream(); + Image(const PdfReference &reference, const PdfObject *o) : buf(), width(0), height(0), ref(reference) { + const PdfObjectStream *stream = o->GetStream(); try { - stream->GetFilteredCopy(&buf, &sz); + buf = stream->GetCopySafe(); + is_valid = true; } catch(...) { - buf = NULL; sz = -1; + buf = charbuff(); + is_valid = false; } const PdfDictionary &dict = o->GetDictionary(); if (dict.HasKey("Width") && dict.GetKey("Width")->IsNumber()) width = dict.GetKey("Width")->GetNumber(); if (dict.HasKey("Height") && dict.GetKey("Height")->IsNumber()) height = dict.GetKey("Height")->GetNumber(); } Image(Image &&other) noexcept : - buf(other.buf), sz(other.sz), width(other.width), height(other.height), ref(other.ref) { - other.buf = NULL; + buf(std::move(other.buf)), width(other.width), height(other.height), ref(other.ref) { + other.buf = charbuff(); is_valid = other.is_valid; } Image& operator=(Image &&other) noexcept { - if (buf) podofo_free(buf); - buf = other.buf; other.buf = NULL; sz = other.sz; ref = other.ref; - width = other.width; height = other.height; + buf = std::move(other.buf); other.buf = charbuff(); ref = other.ref; + width = other.width; height = other.height; is_valid = other.is_valid; return *this; } - ~Image() noexcept { if (buf) podofo_free(buf); buf = NULL; } bool operator==(const Image &other) const noexcept { - return other.sz == sz && sz > -1 && other.width == width && other.height == height && memcmp(buf, other.buf, sz) == 0; + return other.width == width && is_valid && other.is_valid && other.height == height && other.buf == buf; } - std::size_t hash() const noexcept { return sz; } + std::size_t hash() const noexcept { return buf.size(); } const PdfReference& reference() const noexcept { return ref; } }; @@ -56,14 +57,14 @@ typedef std::unordered_map, ImageHasher> image_ static PyObject* dedup_images(PDFDoc *self, PyObject *args) { unsigned long count = 0; - PdfVecObjects &objects = self->doc->GetObjects(); + PdfIndirectObjectList &objects = self->doc->GetObjects(); image_reference_map image_map; for (auto &k : objects) { if (!k->IsDictionary()) continue; const PdfDictionary &dict = k->GetDictionary(); if (dictionary_has_key_name(dict, PdfName::KeyType, "XObject") && dictionary_has_key_name(dict, PdfName::KeySubtype, "Image")) { - Image img(k->Reference(), k); + Image img(k->GetReference(), k); auto it = image_map.find(img); if (it == image_map.end()) { std::vector vals; @@ -78,7 +79,7 @@ dedup_images(PDFDoc *self, PyObject *args) { for (auto &ref : x.second) { if (ref != canonical_ref) { ref_map[ref] = x.first.reference(); - delete objects.RemoveObject(ref); + objects.RemoveObject(ref).reset(); count++; } } @@ -95,11 +96,11 @@ dedup_images(PDFDoc *self, PyObject *args) { const PdfDictionary &xobject = resources.GetKey("XObject")->GetDictionary(); PdfDictionary new_xobject = PdfDictionary(xobject); bool changed = false; - for (auto &x : xobject.GetKeys()) { - if (x.second->IsReference()) { + for (const auto &x : xobject) { + if (x.second.IsReference()) { try { - const PdfReference &r = ref_map.at(x.second->GetReference()); - new_xobject.AddKey(x.first.GetName(), r); + const PdfReference &r = ref_map.at(x.second.GetReference()); + new_xobject.AddKey(x.first, r); changed = true; } catch (const std::out_of_range &err) { (void)err; continue; } } diff --git a/src/calibre/utils/podofo/impose.cpp b/src/calibre/utils/podofo/impose.cpp index 2eea6d525c..67050375d4 100644 --- a/src/calibre/utils/podofo/impose.cpp +++ b/src/calibre/utils/podofo/impose.cpp @@ -6,24 +6,19 @@ */ #include "global.h" +#include using namespace pdf; static void -impose_page(PdfMemDocument *doc, unsigned long dest_page_num, unsigned long src_page_num) { - PdfXObject *xobj = new PdfXObject(doc, src_page_num, "HeaderFooter"); - PdfPage *dest = doc->GetPage(dest_page_num); - dest->AddResource(xobj->GetIdentifier(), xobj->GetObject()->Reference(), "XObject"); - PdfStream *stream = dest->GetContents()->GetStream(); - char *buffer = NULL; pdf_long sz; - stream->GetFilteredCopy(&buffer, &sz); - stream->BeginAppend(); - stream->Append("q\n1 0 0 1 0 0 cm\n/"); - stream->Append(xobj->GetIdentifier().GetName()); - stream->Append(" Do\nQ\n"); - stream->Append(buffer, sz); - stream->EndAppend(); - podofo_free(buffer); +impose_page(PdfMemDocument *doc, unsigned int dest_page_num, unsigned int src_page_num) { + auto xobj = doc->CreateXObjectForm(Rect(), "HeaderFooter"); + xobj->FillFromPage(doc->GetPages().GetPageAt(src_page_num)); + auto dest = &doc->GetPages().GetPageAt(dest_page_num); + static unsigned counter = 0; + dest->GetOrCreateResources().AddResource("XObject", "Imp"s + std::to_string(++counter), xobj->GetObject()); + auto data = "q\n1 0 0 1 0 0 cm\n/"s + xobj->GetIdentifier().GetEscapedName() + " Do\nQ\n"s; + dest->GetOrCreateContents().GetStreamForAppending().SetData(data); } static PyObject* @@ -33,7 +28,8 @@ impose(PDFDoc *self, PyObject *args) { for (unsigned long i = 0; i < count; i++) { impose_page(self->doc, dest_page_num - 1 + i, src_page_num - 1 + i); } - self->doc->DeletePages(src_page_num - 1, count); + auto& pages = self->doc->GetPages(); + while (count-- && src_page_num <= pages.GetCount()) pages.RemovePageAt(src_page_num - 1); Py_RETURN_NONE; } diff --git a/src/calibre/utils/podofo/outline.cpp b/src/calibre/utils/podofo/outline.cpp index 42c6c7b810..c3de7efced 100644 --- a/src/calibre/utils/podofo/outline.cpp +++ b/src/calibre/utils/podofo/outline.cpp @@ -6,6 +6,7 @@ */ #include "global.h" +#include using namespace pdf; @@ -45,43 +46,36 @@ erase(PDFOutlineItem *self, PyObject *args) { static PyObject * create(PDFOutlineItem *self, PyObject *args) { PyObject *as_child; - PDFOutlineItem *ans; + PDFOutlineItem *ans = NULL; unsigned int num; double left = 0, top = 0, zoom = 0; - PdfPage *page; PyObject *title_buf; if (!PyArg_ParseTuple(args, "UIO|ddd", &title_buf, &num, &as_child, &left, &top, &zoom)) return NULL; ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType); - if (ans == NULL) goto error; + if (ans == NULL) return NULL; ans->doc = self->doc; + pyunique_ptr decref_ans_on_exit((PyObject*)ans); try { PdfString title = podofo_convert_pystring(title_buf); - try { - page = self->doc->GetPage(num - 1); - } catch(const PdfError &err) { (void)err; page = NULL; } - if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %u", num); goto error; } - PdfDestination dest(page, left, top, zoom); + const PdfPage *page = get_page(self->doc, num - 1); + if (!page) { PyErr_Format(PyExc_ValueError, "Invalid page number: %u", num); return NULL; } + auto dest = std::make_shared(*page, left, top, zoom); if (PyObject_IsTrue(as_child)) { ans->item = self->item->CreateChild(title, dest); } else ans->item = self->item->CreateNext(title, dest); } catch (const PdfError &err) { - podofo_set_exception(err); goto error; + podofo_set_exception(err); return NULL; } catch(const std::exception & err) { - PyErr_Format(PyExc_ValueError, "An error occurred while trying to create the outline: %s", err.what()); - goto error; + PyErr_Format(PyExc_ValueError, "An error occurred while trying to create the outline: %s", err.what()); return NULL; } catch (...) { - PyErr_SetString(PyExc_Exception, "An unknown error occurred while trying to create the outline item"); - goto error; + PyErr_SetString(PyExc_Exception, "An unknown error occurred while trying to create the outline item"); return NULL; } - return (PyObject*) ans; -error: - Py_XDECREF(ans); - return NULL; + return (PyObject*) decref_ans_on_exit.release(); } static PyMethodDef methods[] = { diff --git a/src/calibre/utils/podofo/outlines.cpp b/src/calibre/utils/podofo/outlines.cpp index 674cf5d432..c8ef0999e0 100644 --- a/src/calibre/utils/podofo/outlines.cpp +++ b/src/calibre/utils/podofo/outlines.cpp @@ -15,43 +15,37 @@ create_outline(PDFDoc *self, PyObject *args) { PyObject *title_buf; unsigned int pagenum; double left = 0, top = 0, zoom = 0; - PdfPage *page; if (!PyArg_ParseTuple(args, "UI|ddd", &title_buf, &pagenum, &left, &top, &zoom)) return NULL; ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType); - if (ans == NULL) goto error; + if (ans == NULL) return NULL; + pyunique_ptr decref_ans_on_exit((PyObject*)ans); try { PdfString title = podofo_convert_pystring(title_buf); PdfOutlines *outlines = self->doc->GetOutlines(); - if (outlines == NULL) {PyErr_NoMemory(); goto error;} + if (outlines == NULL) {PyErr_NoMemory(); return NULL;} ans->item = outlines->CreateRoot(title); - if (ans->item == NULL) {PyErr_NoMemory(); goto error;} + if (ans->item == NULL) {PyErr_NoMemory(); return NULL;} ans->doc = self->doc; - try { - page = self->doc->GetPage(pagenum - 1); - } catch (const PdfError &err) { - (void)err; - PyErr_Format(PyExc_ValueError, "Invalid page number: %u", pagenum - 1); goto error; + auto page = get_page(self->doc, pagenum -1); + if (!page) { + PyErr_Format(PyExc_ValueError, "Invalid page number: %u", pagenum - 1); return NULL; } - PdfDestination dest(page, left, top, zoom); + auto dest = std::make_shared(*page, left, top, zoom); ans->item->SetDestination(dest); } catch(const PdfError & err) { - podofo_set_exception(err); goto error; + podofo_set_exception(err); return NULL; } catch(const std::exception & err) { PyErr_Format(PyExc_ValueError, "An error occurred while trying to create the outline: %s", err.what()); - goto error; + return NULL; } catch (...) { PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to create the outline"); - goto error; + return NULL; } - return (PyObject*)ans; -error: - Py_XDECREF(ans); - return NULL; - + return decref_ans_on_exit.release(); } static PyObject* @@ -71,9 +65,9 @@ convert_outline(PDFDoc *self, PyObject *parent, PdfOutlineItem *item) { pyunique_ptr node(create_outline_node()); if (!node) return; if (PyDict_SetItemString(node.get(), "title", title.get()) != 0) return; - PdfDestination* dest = item->GetDestination(self->doc); + auto dest = item->GetDestination(); if (dest) { - PdfPage *page = dest->GetPage(self->doc); + PdfPage *page = dest->GetPage(); long pnum = page ? page->GetPageNumber() : -1; pyunique_ptr d(Py_BuildValue("{sl sd sd sd}", "page", pnum, "top", dest->GetTop(), "left", dest->GetLeft(), "zoom", dest->GetZoom())); if (!d) return; @@ -95,7 +89,7 @@ convert_outline(PDFDoc *self, PyObject *parent, PdfOutlineItem *item) { static PyObject * get_outline(PDFDoc *self, PyObject *args) { - PdfOutlines *root = self->doc->GetOutlines(PoDoFo::ePdfDontCreateObject); + PdfOutlines *root = self->doc->GetOutlines(); if (!root || !root->First()) Py_RETURN_NONE; PyObject *ans = create_outline_node(); if (!ans) return NULL; diff --git a/src/calibre/utils/podofo/output.cpp b/src/calibre/utils/podofo/output.cpp index b464833b84..492bee16ef 100644 --- a/src/calibre/utils/podofo/output.cpp +++ b/src/calibre/utils/podofo/output.cpp @@ -10,11 +10,12 @@ using namespace PoDoFo; #define NUKE(x) { Py_XDECREF(x); x = NULL; } +#define PODOFO_RAISE_ERROR(code) throw ::PoDoFo::PdfError(code, __FILE__, __LINE__) class pyerr : public std::exception { }; -class OutputDevice : public PdfOutputDevice { +class MyOutputDevice : public OutputStreamDevice { private: PyObject *tell_func; @@ -26,12 +27,13 @@ class OutputDevice : public PdfOutputDevice { void update_written() { size_t pos; - pos = Tell(); + pos = GetPosition(); if (pos > written) written = pos; } public: - OutputDevice(PyObject *file) : tell_func(0), seek_func(0), read_func(0), write_func(0), flush_func(0), written(0) { + MyOutputDevice(PyObject *file) : tell_func(0), seek_func(0), read_func(0), write_func(0), flush_func(0), written(0) { + SetAccess(DeviceAccess::Write); #define GA(f, a) { if((f = PyObject_GetAttrString(file, a)) == NULL) throw pyerr(); } GA(tell_func, "tell"); GA(seek_func, "seek"); @@ -39,7 +41,7 @@ class OutputDevice : public PdfOutputDevice { GA(write_func, "write"); GA(flush_func, "flush"); } - ~OutputDevice() { + ~MyOutputDevice() { NUKE(tell_func); NUKE(seek_func); NUKE(read_func); NUKE(write_func); NUKE(flush_func); } @@ -47,7 +49,7 @@ class OutputDevice : public PdfOutputDevice { long PrintVLen(const char* pszFormat, va_list args) { - if( !pszFormat ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } + if( !pszFormat ) { PODOFO_RAISE_ERROR(PdfErrorCode::InvalidHandle); } #ifdef _MSC_VER return _vscprintf(pszFormat, args) + 1; @@ -60,7 +62,7 @@ class OutputDevice : public PdfOutputDevice { char *buf; int res; - if( !pszFormat ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } + if( !pszFormat ) { PODOFO_RAISE_ERROR(PdfErrorCode::InvalidHandle); } buf = new (std::nothrow) char[lBytes+1]; if (buf == NULL) { PyErr_NoMemory(); throw pyerr(); } @@ -129,7 +131,7 @@ class OutputDevice : public PdfOutputDevice { Py_DECREF(ret); } - size_t Tell() const { + size_t GetPosition() const { PyObject *ret; unsigned long ans; @@ -151,7 +153,9 @@ class OutputDevice : public PdfOutputDevice { return static_cast(ans); } - void Write(const char* pBuffer, size_t lLen) { + bool Eof() const { return false; } + + void writeBuffer(const char* pBuffer, size_t lLen) { PyObject *ret, *temp = NULL; temp = PyBytes_FromStringAndSize(pBuffer, static_cast(lLen)); @@ -177,10 +181,10 @@ class OutputDevice : public PdfOutputDevice { PyObject* pdf::write_doc(PdfMemDocument *doc, PyObject *f) { - OutputDevice d(f); + MyOutputDevice d(f); try { - doc->Write(&d); + doc->Save(d); } catch(const PdfError & err) { podofo_set_exception(err); return NULL; } catch (...) { diff --git a/src/calibre/utils/podofo/podofo.cpp b/src/calibre/utils/podofo/podofo.cpp index 7769f9e1a6..4be6c410b9 100644 --- a/src/calibre/utils/podofo/podofo.cpp +++ b/src/calibre/utils/podofo/podofo.cpp @@ -10,30 +10,6 @@ using namespace PoDoFo; PyObject *pdf::Error = NULL; -class PyLogMessage : public PdfError::LogMessageCallback { - - public: - ~PyLogMessage() {} - - void LogMessage(ELogSeverity severity, const char* prefix, const char* msg, va_list & args ) { - if (severity > eLogSeverity_Warning) return; - if (prefix) - fprintf(stderr, "%s", prefix); - - vfprintf(stderr, msg, args); - } - - void LogMessage(ELogSeverity severity, const wchar_t* prefix, const wchar_t* msg, va_list & args ) { - if (severity > eLogSeverity_Warning) return; - if (prefix) - fwprintf(stderr, prefix); - - vfwprintf(stderr, msg, args); - } -}; - -PyLogMessage log_message; - static char podofo_doc[] = "Wrapper for the PoDoFo PDF library"; static int @@ -45,9 +21,6 @@ exec_module(PyObject *m) { if (pdf::Error == NULL) return -1; PyModule_AddObject(m, "Error", pdf::Error); - PdfError::SetLogMessageCallback((PdfError::LogMessageCallback*)&log_message); - PdfError::EnableDebug(false); - Py_INCREF(&pdf::PDFDocType); PyModule_AddObject(m, "PDFDoc", (PyObject *)&pdf::PDFDocType); return 0; diff --git a/src/calibre/utils/podofo/utils.cpp b/src/calibre/utils/podofo/utils.cpp index 1f706b6386..8db22fc29f 100644 --- a/src/calibre/utils/podofo/utils.cpp +++ b/src/calibre/utils/podofo/utils.cpp @@ -6,29 +6,28 @@ */ #include "global.h" +#include using namespace pdf; void pdf::podofo_set_exception(const PdfError &err) { - const char *msg = PdfError::ErrorMessage(err.GetError()); - if (msg == NULL) msg = err.what(); + const char *msg = err.what(); std::stringstream stream; stream << msg << "\n"; - const TDequeErrorInfo &s = err.GetCallstack(); - for (TDequeErrorInfo::const_iterator it = s.begin(); it != s.end(); it++) { - const PdfErrorInfo &info = (*it); - stream << "File: " << info.GetFilename() << " Line: " << info.GetLine() << " " << info.GetInformation() << "\n"; + const PdErrorInfoStack &s = err.GetCallStack(); + for (auto info : s) { + stream << "File: " << info.GetFilePath() << " Line: " << info.GetLine() << " " << info.GetInformation() << "\n"; } PyErr_SetString(Error, stream.str().c_str()); } PyObject * pdf::podofo_convert_pdfstring(const PdfString &s) { - return PyUnicode_FromString(s.GetStringUtf8().c_str()); + return PyUnicode_FromString(s.GetString().c_str()); } const PdfString pdf::podofo_convert_pystring(PyObject *val) { - return PdfString(reinterpret_cast(PyUnicode_AsUTF8(val))); + return PdfString(reinterpret_cast(PyUnicode_AsUTF8(val))); }