diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 2177cd9766..6f1220c9b9 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -10,6 +10,7 @@ #include #include #include +#include using namespace pdf; @@ -330,23 +331,109 @@ PDFDoc_copy_page(PDFDoc *self, PyObject *args) { } // }}} // append() {{{ + +static void +fix_references(PdfObject &parent, const std::unordered_map &ref_map) { + switch(parent.GetDataType()) { + case PdfDataType::Dictionary: + for (auto& pair : parent.GetDictionary()) { + fix_references(pair.second, ref_map); + } + break; + case PdfDataType::Array: + for (auto& child : parent.GetArray()) fix_references(child, ref_map); + break; + case PdfDataType::Reference: + if (auto search = ref_map.find(parent.GetReference()); search != ref_map.end()) { + parent.SetReference(search->second->GetIndirectReference()); + } + break; + default: + break; + } +} + static PyObject * PDFDoc_append(PDFDoc *self, PyObject *args) { - PyObject *doc; - int typ; - - if (!PyArg_ParseTuple(args, "O", &doc)) return NULL; - - typ = PyObject_IsInstance(doc, (PyObject*)&PDFDocType); - if (typ == -1) return NULL; - if (typ == 0) { PyErr_SetString(PyExc_TypeError, "You must pass a PDFDoc instance to this method"); return NULL; } - PDFDoc *pdfdoc = (PDFDoc*)doc; + static const PdfName inheritableAttributes[] = { + PdfName("Resources"), + PdfName("MediaBox"), + PdfName("CropBox"), + PdfName("Rotate"), + PdfName::KeyNull + }; + PdfMemDocument *dest = self->doc; try { - self->doc->GetPages().AppendDocumentPages(*pdfdoc->doc); + for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(args); i++) { + PyObject *doc = PyTuple_GET_ITEM(args, i); + int typ = PyObject_IsInstance(doc, (PyObject*)&PDFDocType); + if (typ == -1) return NULL; + if (typ == 0) { PyErr_SetString(PyExc_TypeError, "You must pass a PDFDoc instance to this method"); return NULL; } + const PdfMemDocument *src = ((PDFDoc*)doc)->doc; + std::unordered_map ref_map; + std::unordered_map page_parent_map; + const unsigned initial_page_count = dest->GetPages().GetCount(); + // append pages first + for (unsigned i = 0; i < src->GetPages().GetCount(); i++) { + const auto& src_page = src->GetPages().GetPageAt(i); + auto& dest_page = dest->GetPages().CreatePage(src_page.GetRect()); + page_parent_map[dest_page.GetObject().GetIndirectReference()] = dest_page.GetDictionary().GetKeyAs("Parent"); + dest_page.GetObject() = src_page.GetObject(); + dest_page.GetDictionary().RemoveKey("Resource"); + dest_page.GetDictionary().RemoveKey("Parent"); + ref_map[src_page.GetObject().GetIndirectReference()] = &dest_page.GetObject(); + } + // append all remaining objects + for (const auto& obj : src->GetObjects()) { + if (obj->IsIndirect() && ref_map.find(obj->GetIndirectReference()) == ref_map.end()) { + auto copied_obj = &dest->GetObjects().CreateObject(*obj); + ref_map[obj->GetIndirectReference()] = copied_obj; + } + } + // fix references in appended objects + for (auto& elem : ref_map) fix_references(*elem.second, ref_map); + // fixup all pages + for (unsigned i = 0; i < src->GetPages().GetCount(); i++) { + auto& src_page = src->GetPages().GetPageAt(i); + auto& dest_page = dest->GetPages().GetPageAt(initial_page_count + i); + // Reset the parent to the correct value from the stored mapping + dest_page.GetDictionary().AddKey("Parent", page_parent_map[dest_page.GetObject().GetIndirectReference()]); + // Set the page contents + if (auto key = src_page.GetDictionary().GetKeyAs(PdfName::KeyContents); key.IsIndirect()) { + if (auto search = ref_map.find(key); search != ref_map.end()) { + dest_page.GetOrCreateContents().Reset(search->second); + } + } + // ensure the contents is not NULL to prevent segfaults in other code that assumes it + dest_page.GetOrCreateContents(); + + // Set the page resources + if (src_page.GetResources() != nullptr) { + const auto &src_resources = src_page.GetResources()->GetDictionary(); + dest_page.GetOrCreateResources().GetDictionary() = src_resources; + fix_references(dest_page.GetResources()->GetObject(), ref_map); + } else dest_page.GetOrCreateResources(); + + // Copy inherited properties + auto inherited = inheritableAttributes; + while (!inherited->IsNull()) { + auto attribute = src_page.GetDictionary().FindKeyParent(*inherited); + if (attribute != nullptr) { + PdfObject attributeCopy(*attribute); + fix_references(attributeCopy, ref_map); + dest_page.GetDictionary().AddKey(*inherited, attributeCopy); + } + inherited++; + } + } + } } catch (const PdfError & err) { podofo_set_exception(err); return NULL; + } catch (std::exception & err) { + PyErr_Format(PyExc_ValueError, "An error occurred while trying to append pages: %s", err.what()); + return NULL; } Py_RETURN_NONE; } // }}}