diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py
index e9d4393c3a..c9a57f745b 100644
--- a/src/calibre/ebooks/pdf/html_writer.py
+++ b/src/calibre/ebooks/pdf/html_writer.py
@@ -272,7 +272,7 @@ def make_anchors_unique(container):
spine_names = set()
def replacer(url):
- if replacer.file_type != 'text':
+ if replacer.file_type not in ('text', 'ncx'):
return url
if not url:
return url
@@ -282,7 +282,10 @@ def make_anchors_unique(container):
href, frag = base, url[1:]
else:
href, frag = url.partition('#')[::2]
- name = container.href_to_name(href, base)
+ if base is None:
+ name = href
+ else:
+ name = container.href_to_name(href, base)
if not name:
return url
if not frag and name in spine_names:
@@ -298,6 +301,7 @@ def make_anchors_unique(container):
return '#' + new_frag
return href + '#' + new_frag
+ name_anchor_map = {}
for spine_name, is_linear in container.spine_names:
spine_names.add(spine_name)
root = container.parsed(spine_name)
@@ -307,11 +311,17 @@ def make_anchors_unique(container):
if key not in mapping:
new_id = mapping[key] = 'a{}'.format(count)
elem.set('id', new_id)
+ body = root[-1]
+ if not body.get('id'):
+ count += 1
+ body.set('id', 'a{}'.format(count))
+ name_anchor_map[spine_name] = body.get('id')
for name in container.mime_map:
base = name
replacer.replaced = False
container.replace_links(name, replacer)
+ return name_anchor_map
AnchorLocation = namedtuple('AnchorLocation', 'pagenum left top zoom')
@@ -330,7 +340,7 @@ def get_anchor_locations(pdf_doc, first_page_num, toc_uuid):
return ans
-def fix_links(pdf_doc, anchor_locations, name_page_numbers, mark_links, log):
+def fix_links(pdf_doc, anchor_locations, name_anchor_map, mark_links, log):
def replace_link(url):
purl = urlparse(url)
@@ -342,39 +352,63 @@ def fix_links(pdf_doc, anchor_locations, name_page_numbers, mark_links, log):
if loc is None:
log.warn('Anchor location for link to {} not found'.format(purl.fragment))
else:
- pnum = name_page_numbers.get(purl.fragment)
- if pnum is None:
+ loc = anchor_locations.get(name_anchor_map.get(purl.fragment))
+ if loc is None:
log.warn('Anchor location for link to {} not found'.format(purl.fragment))
- else:
- loc = AnchorLocation(pnum, 0, 0, 0)
return loc
pdf_doc.alter_links(replace_link, mark_links)
+class PDFOutlineRoot(object):
+
+ def __init__(self, pdf_doc):
+ self.pdf_doc = pdf_doc
+ self.root_item = None
+
+ def create(self, title, pagenum, as_child, left, top, zoom):
+ if self.root_item is None:
+ self.root_item = self.pdf_doc.create_outline(title, pagenum, left, top, zoom)
+ else:
+ self.root_item = self.root_item.create(title, pagenum, False, left, top, zoom)
+ return self.root_item
+
+
+def add_toc(pdf_parent, toc_parent, anchor_locations, name_anchor_map):
+ for child in toc_parent:
+ title, frag = child.title, child.frag
+ try:
+ if '.' in frag:
+ loc = anchor_locations[name_anchor_map[frag]]
+ else:
+ loc = anchor_locations[frag]
+ except Exception:
+ loc = AnchorLocation(1, 0, 0, 0)
+ pdf_child = pdf_parent.create(title, loc.pagenum, True, loc.left, loc.top, loc.zoom)
+ if len(child):
+ add_toc(pdf_child, child, anchor_locations, name_anchor_map)
+
+
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None):
container = Container(opf_path, log)
- make_anchors_unique(container)
margin_groups = create_margin_groups(container)
+ name_anchor_map = make_anchors_unique(container)
+ toc = get_toc(container, verify_destinations=False)
links_page_uuid = add_all_links(container, margin_groups)
- toc = get_toc(container)
- (toc)
container.commit()
manager = RenderManager(opts)
page_layout = get_page_layout(opts)
pdf_doc = None
anchor_locations = {}
- name_page_numbers = {}
- num_pages = 0
jobs = []
for group in margin_groups:
name, margins = group[0]
jobs.append(job_for_name(container, name, margins, page_layout))
results = manager.convert_html_files(jobs, settle_time=1)
+ num_pages = 0
for group in margin_groups:
name, margins = group[0]
- name_page_numbers[name] = num_pages + 1
data = results[name]
if not isinstance(data, bytes):
raise SystemExit(data)
@@ -387,7 +421,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
else:
pdf_doc.append(doc)
- fix_links(pdf_doc, anchor_locations, name_page_numbers, opts.pdf_mark_links, log)
+ fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links, log)
+ if toc and len(toc):
+ add_toc(PDFOutlineRoot(pdf_doc), toc, anchor_locations, name_anchor_map)
if cover_data:
add_cover(pdf_doc, cover_data, page_layout, opts)
diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp
index 1a51385340..37939a3f52 100644
--- a/src/calibre/utils/podofo/doc.cpp
+++ b/src/calibre/utils/podofo/doc.cpp
@@ -264,23 +264,30 @@ PDFDoc_set_box(PDFDoc *self, PyObject *args) {
// create_outline() {{{
static PyObject *
PDFDoc_create_outline(PDFDoc *self, PyObject *args) {
- PyObject *p;
PDFOutlineItem *ans;
- int pagenum;
+ char *title_buf;
+ unsigned int pagenum;
+ double left = 0, top = 0, zoom = 0;
+ PdfPage *page;
- if (!PyArg_ParseTuple(args, "Ui", &p, &pagenum)) return NULL;
+ if (!PyArg_ParseTuple(args, "esI|ddd", "UTF-8", &title_buf, &pagenum, &left, &top, &zoom)) return NULL;
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
if (ans == NULL) goto error;
try {
- const PdfString title = podofo_convert_pystring(p);
+ PdfString title(reinterpret_cast(title_buf));
PdfOutlines *outlines = self->doc->GetOutlines();
if (outlines == NULL) {PyErr_NoMemory(); goto error;}
ans->item = outlines->CreateRoot(title);
if (ans->item == NULL) {PyErr_NoMemory(); goto error;}
ans->doc = self->doc;
- PdfDestination dest(self->doc->GetPage(pagenum));
+ try {
+ page = self->doc->GetPage(pagenum - 1);
+ } catch (const PdfError &err) {
+ PyErr_Format(PyExc_ValueError, "Invalid page number: %u", pagenum - 1); goto error;
+ }
+ PdfDestination dest(page, left, top, zoom);
ans->item->SetDestination(dest);
} catch(const PdfError & err) {
podofo_set_exception(err); goto error;
diff --git a/src/calibre/utils/podofo/outline.cpp b/src/calibre/utils/podofo/outline.cpp
index d7a4d8a79b..3a337492f2 100644
--- a/src/calibre/utils/podofo/outline.cpp
+++ b/src/calibre/utils/podofo/outline.cpp
@@ -44,23 +44,27 @@ erase(PDFOutlineItem *self, PyObject *args) {
static PyObject *
create(PDFOutlineItem *self, PyObject *args) {
- PyObject *ptitle, *as_child = NULL;
+ PyObject *as_child;
PDFOutlineItem *ans;
- int num;
+ unsigned int num;
+ double left = 0, top = 0, zoom = 0;
PdfPage *page;
+ char *title_buf;
- if (!PyArg_ParseTuple(args, "Ui|O", &ptitle, &num, &as_child)) return NULL;
+ if (!PyArg_ParseTuple(args, "esIO|ddd", "UTF-8", &title_buf, &num, &as_child, &left, &top, &zoom)) return NULL;
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
if (ans == NULL) goto error;
ans->doc = self->doc;
try {
- const PdfString title = podofo_convert_pystring(ptitle);
- page = self->doc->GetPage(num);
- if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %d", num); goto error; }
- PdfDestination dest(page);
- if (as_child != NULL && PyObject_IsTrue(as_child)) {
+ PdfString title(reinterpret_cast(title_buf));
+ try {
+ page = self->doc->GetPage(num - 1);
+ } catch(const PdfError &err) { page = NULL; }
+ if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %u", num); goto error; }
+ PdfDestination dest(page, left, top, zoom);
+ if (PyObject_IsTrue(as_child)) {
ans->item = self->item->CreateChild(title, dest);
} else
ans->item = self->item->CreateNext(title, dest);