From b2c8f6f8ee1c27712bfc8d84cdc1939aab8183e5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 3 Mar 2022 15:33:33 +0530 Subject: [PATCH] PDF Output: Fix conversion failing if there are ToC entries pointing to removed content. Fixes #1960554 [Private bug](https://bugs.launchpad.net/calibre/+bug/1960554) --- src/calibre/ebooks/pdf/html_writer.py | 8 +++++++- src/calibre/utils/podofo/doc.cpp | 7 +++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py index ecf8b1304b..986f0153db 100644 --- a/src/calibre/ebooks/pdf/html_writer.py +++ b/src/calibre/ebooks/pdf/html_writer.py @@ -569,6 +569,7 @@ def get_anchor_locations(name, pdf_doc, first_page_num, toc_uuid, log): def fix_links(pdf_doc, anchor_locations, name_anchor_map, mark_links, log): + pc = pdf_doc.page_count() def replace_link(url): purl = urlparse(url) @@ -583,7 +584,12 @@ def fix_links(pdf_doc, anchor_locations, name_anchor_map, mark_links, log): loc = anchor_locations.get(name_anchor_map.get(purl.fragment)) if loc is None: log.warn(f'Anchor location for link to {purl.fragment} not found') - return None if loc is None else loc.as_tuple + if loc is None: + return None + if loc.pagenum > pc: + log.warn(f'Anchor location for link to {purl.fragment} is past the end of the document, moving it to last page') + loc.pagenum = pc + return loc.as_tuple pdf_doc.alter_links(replace_link, mark_links) # }}} diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 7cedee1a95..25f814a3c4 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -483,7 +483,7 @@ alter_link(PDFDoc *self, PdfDictionary &link, PyObject *alter_callback, bool mar page = self->doc->GetPage(pagenum - 1); } catch(const PdfError &err) { (void)err; - PyErr_Format(PyExc_ValueError, "No page number %d in the PDF file", pagenum); + PyErr_Format(PyExc_ValueError, "No page number %d in the PDF file of %d pages", pagenum, self->doc->GetPageCount()); return ; } if (page) { @@ -525,7 +525,10 @@ PDFDoc_alter_links(PDFDoc *self, PyObject *args) { } for (auto const & ref: links) { PdfObject *lo = self->doc->GetObjects().GetObject(ref); - if (lo) alter_link(self, lo->GetDictionary(), alter_callback, mark_links, border, link_color); + if (lo) { + alter_link(self, lo->GetDictionary(), alter_callback, mark_links, border, link_color); + if (PyErr_Occurred()) return NULL; + } } } catch(const PdfError & err) { podofo_set_exception(err);