diff --git a/src/calibre/ebooks/oeb/polish/split.py b/src/calibre/ebooks/oeb/polish/split.py index 0102e774ca..e24440a370 100644 --- a/src/calibre/ebooks/oeb/polish/split.py +++ b/src/calibre/ebooks/oeb/polish/split.py @@ -372,6 +372,7 @@ def merge_html(container, names, master, insert_page_breaks=False): master_body = p(master).findall('h:body', namespaces=XPNSMAP)[-1] master_base = os.path.dirname(master) anchor_map = {n:{} for n in names if n != master} + first_anchor_map = {} for name in names: if name == master: @@ -419,6 +420,7 @@ def merge_html(container, names, master, insert_page_breaks=False): if 'id' not in first_child.attrib: first_child.set('id', unique_anchor(seen_anchors, 'top')) seen_anchors.add(first_child.get('id')) + first_anchor_map[name] = first_child.get('id') if insert_page_breaks: first_child.set('style', first_child.get('style', '') + '; page-break-before: always') @@ -444,6 +446,8 @@ def merge_html(container, names, master, insert_page_breaks=False): repl = MergeLinkReplacer(fname, anchor_map, master, container) container.replace_links(fname, repl) + return first_anchor_map + def merge_css(container, names, master): p = container.parsed diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py index d580aed861..261efaa3a5 100644 --- a/src/calibre/ebooks/pdf/html_writer.py +++ b/src/calibre/ebooks/pdf/html_writer.py @@ -247,13 +247,14 @@ def add_cover(pdf_doc, cover_data, page_layout, opts): # Margin groups {{{ -def create_margin_groups(container): +def create_margin_groups(container, name_anchor_map): def merge_group(group): if len(group) > 1: group_margins = group[0][1] names = [name for (name, margins) in group] - merge_html(container, names, names[0], insert_page_breaks=True) + first_anchor_map = merge_html(container, names, names[0], insert_page_breaks=True) + name_anchor_map.update(first_anchor_map) group = [(names[0], group_margins)] return group @@ -371,8 +372,11 @@ class AnchorLocation(object): self.pagenum, self.left, self.top, self.zoom = pagenum, left, top, zoom def __repr__(self): - return 'AnchorLocation(pagenum={}, left={}, top={}, zoom={})'.format( - self.pagenum, self.left, self.top, self.zoom) + return 'AnchorLocation(pagenum={}, left={}, top={}, zoom={})'.format(self.as_tuple) + + @property + def as_tuple(self): + return self.pagenum, self.left, self.top, self.zoom def get_anchor_locations(pdf_doc, first_page_num, toc_uuid): @@ -403,7 +407,7 @@ def fix_links(pdf_doc, anchor_locations, name_anchor_map, mark_links, log): loc = anchor_locations.get(name_anchor_map.get(purl.fragment)) if loc is None: log.warn('Anchor location for link to {} not found'.format(purl.fragment)) - return loc + return None if loc is None else loc.as_tuple pdf_doc.alter_links(replace_link, mark_links) # }}} @@ -520,8 +524,8 @@ def add_pagenum_toc(root, toc, opts, page_number_display_map): def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None): container = Container(opf_path, log) report_progress(0.05, _('Parsed all content for markup transformation')) - margin_groups = create_margin_groups(container) name_anchor_map = make_anchors_unique(container) + margin_groups = create_margin_groups(container, name_anchor_map) toc = get_toc(container, verify_destinations=False) has_toc = toc and len(toc) links_page_uuid = add_all_links(container, margin_groups) diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 1e99abfb62..76e344289f 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -453,17 +453,16 @@ PDFDoc_alter_links(PDFDoc *self, PyObject *args) { PdfObject *uo = A.GetKey("URI"); if (uo && uo->IsString()) { const std::string &uri = uo->GetString().GetStringUtf8(); - PyObject *ret = PyObject_CallObject(alter_callback, Py_BuildValue("(N)", PyUnicode_DecodeUTF8(uri.c_str(), uri.length(), "replace"))); + pyunique_ptr ret(PyObject_CallObject(alter_callback, Py_BuildValue("(N)", PyUnicode_DecodeUTF8(uri.c_str(), uri.length(), "replace")))); if (!ret) { return NULL; } - if (PyTuple_Check(ret) && PyTuple_GET_SIZE(ret) == 4) { + if (PyTuple_Check(ret.get()) && PyTuple_GET_SIZE(ret.get()) == 4) { int pagenum; double left, top, zoom; - if (PyArg_ParseTuple(ret, "iddd", &pagenum, &left, &top, &zoom)) { + if (PyArg_ParseTuple(ret.get(), "iddd", &pagenum, &left, &top, &zoom)) { PdfPage *page = NULL; try { page = self->doc->GetPage(pagenum - 1); } catch(const PdfError &err) { PyErr_Format(PyExc_ValueError, "No page number %d in the PDF file", pagenum); - Py_DECREF(ret); return NULL; } if (page) { @@ -473,7 +472,6 @@ PDFDoc_alter_links(PDFDoc *self, PyObject *args) { } } } - Py_DECREF(ret); } } }