From 226718cead1632eb135d0abc21b50e2823559192 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 12 May 2023 12:47:20 +0530 Subject: [PATCH] Fix PdfObject::GetReference semantics changed It now raises an exception if the object is not a reference. Instead we have to call GetIndirectReference on it. ... --- src/calibre/utils/podofo/doc.cpp | 20 +++++++------- src/calibre/utils/podofo/fonts.cpp | 41 ++++++++++++++--------------- src/calibre/utils/podofo/global.h | 9 +++++++ src/calibre/utils/podofo/images.cpp | 4 +-- 4 files changed, 41 insertions(+), 33 deletions(-) diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 9129f6c816..c38dbaa668 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -427,7 +427,7 @@ PDFDoc_extract_anchors(PDFDoc *self, PyObject *args) { const PdfObject *dests_ref = self->doc->GetCatalog().GetDictionary().GetKey("Dests"); auto& pages = self->doc->GetPages(); if (dests_ref && dests_ref->IsReference()) { - const PdfObject *dests_obj = self->doc->GetObjects().GetObject(dests_ref->GetReference()); + const PdfObject *dests_obj = self->doc->GetObjects().GetObject(object_as_reference(dests_ref)); if (dests_obj && dests_obj->IsDictionary()) { const PdfDictionary &dests = dests_obj->GetDictionary(); for (auto itres: dests) { @@ -436,7 +436,7 @@ PDFDoc_extract_anchors(PDFDoc *self, PyObject *args) { // see section 8.2 of PDF spec for different types of destination arrays // but chromium apparently generates only [page /XYZ left top zoom] type arrays if (dest.GetSize() > 4 && dest[1].IsName() && dest[1].GetName().GetString() == "XYZ") { - const PdfPage *page = get_page(pages, dest[0].GetReference()); + const PdfPage *page = get_page(pages, object_as_reference(dest[0])); if (page) { unsigned int pagenum = page->GetPageNumber(); double left = dest[2].GetReal(), top = dest[3].GetReal(); @@ -503,15 +503,15 @@ PDFDoc_alter_links(PDFDoc *self, PyObject *args) { link_color.Add(1.); link_color.Add(0.); link_color.Add(0.); std::vector links; for (auto &it : self->doc->GetObjects()) { - if(it->IsDictionary()) { - PdfDictionary &link = it->GetDictionary(); - if (dictionary_has_key_name(link, PdfName::KeyType, "Annot") && dictionary_has_key_name(link, PdfName::KeySubtype, "Link")) { - if (link.HasKey("A") && link.GetKey("A")->IsDictionary()) { - PdfDictionary &A = link.GetKey("A")->GetDictionary(); - if (dictionary_has_key_name(A, PdfName::KeyType, "Action") && dictionary_has_key_name(A, "S", "URI")) { - PdfObject *uo = A.GetKey("URI"); + PdfDictionary *link; + if(it->TryGetDictionary(link)) { + if (dictionary_has_key_name(*link, PdfName::KeyType, "Annot") && dictionary_has_key_name(*link, PdfName::KeySubtype, "Link")) { + PdfObject *akey; PdfDictionary *A; + if ((akey = link->GetKey("A")) && akey->TryGetDictionary(A)) { + if (dictionary_has_key_name(*A, PdfName::KeyType, "Action") && dictionary_has_key_name(*A, "S", "URI")) { + PdfObject *uo = A->GetKey("URI"); if (uo && uo->IsString()) { - links.push_back(it->GetReference()); + links.push_back(object_as_reference(it)); } } } diff --git a/src/calibre/utils/podofo/fonts.cpp b/src/calibre/utils/podofo/fonts.cpp index ba673f4d92..14ccca6b35 100644 --- a/src/calibre/utils/podofo/fonts.cpp +++ b/src/calibre/utils/podofo/fonts.cpp @@ -50,11 +50,11 @@ remove_font(PdfIndirectObjectList &objects, PdfObject *font) { PdfObject *descriptor = dict->FindKey("FontDescriptor"); if (descriptor) { const PdfObject *ff = get_font_file(descriptor); - if (ff) objects.RemoveObject(ff->GetReference()).reset(); - objects.RemoveObject(descriptor->GetReference()).reset(); + if (ff) objects.RemoveObject(object_as_reference(ff)).reset(); + objects.RemoveObject(object_as_reference(descriptor)).reset(); } } - objects.RemoveObject(font->GetReference()).reset(); + objects.RemoveObject(object_as_reference(font)).reset(); } static void @@ -86,9 +86,8 @@ used_fonts_in_canvas(const PdfCanvas &canvas, unordered_reference_set &ans) { stack.pop(); if (stack.size() > 0 && stack.top().IsName()) { const PdfName &reference_name = stack.top().GetName(); - if (fonts_dict.HasKey(reference_name)) { - ans.insert(fonts_dict.GetKey(reference_name)->GetReference()); - } + const PdfObject *f = fonts_dict.GetKey(reference_name); + if (f) ans.insert(object_as_reference(f)); } } } @@ -127,7 +126,7 @@ list_fonts(PDFDoc *self, PyObject *args) { if (dictionary_has_key_name(dict, PdfName::KeyType, "Font") && dict.HasKey("BaseFont")) { const std::string &name = dict.GetKey("BaseFont")->GetName().GetString(); const std::string &subtype = dict.GetKey(PdfName::KeySubtype)->GetName().GetString(); - const PdfReference &ref = it->GetReference(); + const PdfReference &ref = object_as_reference(it); unsigned long num = ref.ObjectNumber(), generation = ref.GenerationNumber(); const PdfObject *descriptor = dict.FindKey("FontDescriptor"); pyunique_ptr descendant_font, stream_ref, encoding, w, w2; @@ -151,7 +150,7 @@ list_fonts(PDFDoc *self, PyObject *args) { if (descriptor) { const PdfObject *ff = get_font_file(descriptor); if (ff) { - stream_ref.reset(ref_as_tuple(ff->GetReference())); + stream_ref.reset(ref_as_tuple(object_as_reference(ff))); if (!stream_ref) return NULL; const PdfObjectStream *stream = ff->GetStream(); if (stream && get_font_data) { @@ -160,10 +159,10 @@ list_fonts(PDFDoc *self, PyObject *args) { } } else if (dict.HasKey("DescendantFonts")) { const PdfArray &df = dict.GetKey("DescendantFonts")->GetArray(); - descendant_font.reset(ref_as_tuple(df[0].GetReference())); + descendant_font.reset(ref_as_tuple(object_as_reference(df[0]))); if (!descendant_font) return NULL; if (get_font_data && dict.HasKey("ToUnicode")) { - const PdfReference &uref = dict.GetKey("ToUnicode")->GetReference(); + const PdfReference &uref = object_as_reference(dict.GetKey("ToUnicode")); PdfObject *t = objects.GetObject(uref); if (t) { PdfObjectStream *stream = t->GetStream(); @@ -225,12 +224,12 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) { if (dictionary_has_key_name(dict, PdfName::KeyType, "Font")) { const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetString(); if (font_type == "Type0") { - all_fonts.insert(k->GetReference()); + all_fonts.insert(object_as_reference(k)); } else if (font_type == "Type3") { - all_fonts.insert(k->GetReference()); - type3_fonts.insert(k->GetReference()); + all_fonts.insert(object_as_reference(k)); + type3_fonts.insert(object_as_reference(k)); for (auto &x : dict.GetKey("CharProcs")->GetDictionary()) { - const PdfReference &ref = x.second.GetReference(); + const PdfReference &ref = object_as_reference(x.second); if (charprocs_usage.find(ref) == charprocs_usage.end()) charprocs_usage[ref] = 1; else charprocs_usage[ref] += 1; } @@ -248,11 +247,11 @@ remove_unused_fonts(PDFDoc *self, PyObject *args) { if (font->TryGetDictionary(dict)) { if (type3_fonts.find(ref) != type3_fonts.end()) { for (auto &x : dict->FindKey("CharProcs")->GetDictionary()) { - charprocs_usage[x.second.GetReference()] -= 1; + charprocs_usage[object_as_reference(x.second)] -= 1; } } else { for (auto &x : dict->FindKey("DescendantFonts")->GetArray()) { - PdfObject *dfont = objects.GetObject(x.GetReference()); + PdfObject *dfont = objects.GetObject(object_as_reference(x)); if (dfont) remove_font(objects, dfont); } }} @@ -318,8 +317,8 @@ merge_fonts(PDFDoc *self, PyObject *args) { PdfObjectStream *stream = ff->GetStream(); stream->SetData(bufferview(data, sz)); } else { - objects.RemoveObject(ff->GetReference()).reset(); - descriptor.AddKey(font_file_key, font_file->GetReference()); + objects.RemoveObject(object_as_reference(ff)).reset(); + descriptor.AddKey(font_file_key, object_as_reference(font_file)); } } Py_RETURN_NONE; @@ -371,9 +370,9 @@ dedup_type3_fonts(PDFDoc *self, PyObject *args) { if (dictionary_has_key_name(dict, PdfName::KeyType, "Font")) { const std::string &font_type = dict.GetKey(PdfName::KeySubtype)->GetName().GetString(); if (font_type == "Type3") { - all_type3_fonts.insert(k->GetReference()); + all_type3_fonts.insert(object_as_reference(k)); for (auto &x : dict.GetKey("CharProcs")->GetDictionary()) { - const PdfReference &ref = x.second.GetReference(); + const PdfReference &ref = object_as_reference(x.second); const PdfObject *cpobj = objects.GetObject(ref); if (!cpobj || !cpobj->HasStream()) continue; CharProc cp(ref, cpobj); @@ -408,7 +407,7 @@ dedup_type3_fonts(PDFDoc *self, PyObject *args) { PdfDictionary new_dict = PdfDictionary(dict); bool changed = false; for (auto &k : dict) { - auto it = ref_map.find(k.second.GetReference()); + auto it = ref_map.find(object_as_reference(k.second)); if (it != ref_map.end()) { new_dict.AddKey(k.first, (*it).second); changed = true; diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h index 77905af8a2..17cf64a8f9 100644 --- a/src/calibre/utils/podofo/global.h +++ b/src/calibre/utils/podofo/global.h @@ -119,6 +119,15 @@ get_page(PdfDocument *doc, const unsigned num) { return nullptr; } +static inline PdfReference +object_as_reference(const PdfObject &o) { + return o.IsReference() ? o.GetReference() : o.GetIndirectReference(); +} + +static inline PdfReference +object_as_reference(const PdfObject *o) { + return o->IsReference() ? o->GetReference() : o->GetIndirectReference(); +} class PdfReferenceHasher { diff --git a/src/calibre/utils/podofo/images.cpp b/src/calibre/utils/podofo/images.cpp index f5d948a33d..dc9cf32bd9 100644 --- a/src/calibre/utils/podofo/images.cpp +++ b/src/calibre/utils/podofo/images.cpp @@ -64,7 +64,7 @@ dedup_images(PDFDoc *self, PyObject *args) { if (!k->IsDictionary()) continue; const PdfDictionary &dict = k->GetDictionary(); if (dictionary_has_key_name(dict, PdfName::KeyType, "XObject") && dictionary_has_key_name(dict, PdfName::KeySubtype, "Image")) { - Image img(k->GetReference(), k); + Image img(object_as_reference(k), k); auto it = image_map.find(img); if (it == image_map.end()) { std::vector vals; @@ -99,7 +99,7 @@ dedup_images(PDFDoc *self, PyObject *args) { for (const auto &x : xobject) { if (x.second.IsReference()) { try { - const PdfReference &r = ref_map.at(x.second.GetReference()); + const PdfReference &r = ref_map.at(object_as_reference(x.second)); new_xobject.AddKey(x.first, r); changed = true; } catch (const std::out_of_range &err) { (void)err; continue; }