PDF Outline generation now works

This commit is contained in:
Kovid Goyal 2019-07-11 21:33:44 +05:30
parent 39dae008c2
commit 32e83987d6
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 74 additions and 27 deletions

View File

@ -272,7 +272,7 @@ def make_anchors_unique(container):
spine_names = set()
def replacer(url):
if replacer.file_type != 'text':
if replacer.file_type not in ('text', 'ncx'):
return url
if not url:
return url
@ -282,7 +282,10 @@ def make_anchors_unique(container):
href, frag = base, url[1:]
else:
href, frag = url.partition('#')[::2]
name = container.href_to_name(href, base)
if base is None:
name = href
else:
name = container.href_to_name(href, base)
if not name:
return url
if not frag and name in spine_names:
@ -298,6 +301,7 @@ def make_anchors_unique(container):
return '#' + new_frag
return href + '#' + new_frag
name_anchor_map = {}
for spine_name, is_linear in container.spine_names:
spine_names.add(spine_name)
root = container.parsed(spine_name)
@ -307,11 +311,17 @@ def make_anchors_unique(container):
if key not in mapping:
new_id = mapping[key] = 'a{}'.format(count)
elem.set('id', new_id)
body = root[-1]
if not body.get('id'):
count += 1
body.set('id', 'a{}'.format(count))
name_anchor_map[spine_name] = body.get('id')
for name in container.mime_map:
base = name
replacer.replaced = False
container.replace_links(name, replacer)
return name_anchor_map
AnchorLocation = namedtuple('AnchorLocation', 'pagenum left top zoom')
@ -330,7 +340,7 @@ def get_anchor_locations(pdf_doc, first_page_num, toc_uuid):
return ans
def fix_links(pdf_doc, anchor_locations, name_page_numbers, mark_links, log):
def fix_links(pdf_doc, anchor_locations, name_anchor_map, mark_links, log):
def replace_link(url):
purl = urlparse(url)
@ -342,39 +352,63 @@ def fix_links(pdf_doc, anchor_locations, name_page_numbers, mark_links, log):
if loc is None:
log.warn('Anchor location for link to {} not found'.format(purl.fragment))
else:
pnum = name_page_numbers.get(purl.fragment)
if pnum is None:
loc = anchor_locations.get(name_anchor_map.get(purl.fragment))
if loc is None:
log.warn('Anchor location for link to {} not found'.format(purl.fragment))
else:
loc = AnchorLocation(pnum, 0, 0, 0)
return loc
pdf_doc.alter_links(replace_link, mark_links)
class PDFOutlineRoot(object):
def __init__(self, pdf_doc):
self.pdf_doc = pdf_doc
self.root_item = None
def create(self, title, pagenum, as_child, left, top, zoom):
if self.root_item is None:
self.root_item = self.pdf_doc.create_outline(title, pagenum, left, top, zoom)
else:
self.root_item = self.root_item.create(title, pagenum, False, left, top, zoom)
return self.root_item
def add_toc(pdf_parent, toc_parent, anchor_locations, name_anchor_map):
for child in toc_parent:
title, frag = child.title, child.frag
try:
if '.' in frag:
loc = anchor_locations[name_anchor_map[frag]]
else:
loc = anchor_locations[frag]
except Exception:
loc = AnchorLocation(1, 0, 0, 0)
pdf_child = pdf_parent.create(title, loc.pagenum, True, loc.left, loc.top, loc.zoom)
if len(child):
add_toc(pdf_child, child, anchor_locations, name_anchor_map)
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None):
container = Container(opf_path, log)
make_anchors_unique(container)
margin_groups = create_margin_groups(container)
name_anchor_map = make_anchors_unique(container)
toc = get_toc(container, verify_destinations=False)
links_page_uuid = add_all_links(container, margin_groups)
toc = get_toc(container)
(toc)
container.commit()
manager = RenderManager(opts)
page_layout = get_page_layout(opts)
pdf_doc = None
anchor_locations = {}
name_page_numbers = {}
num_pages = 0
jobs = []
for group in margin_groups:
name, margins = group[0]
jobs.append(job_for_name(container, name, margins, page_layout))
results = manager.convert_html_files(jobs, settle_time=1)
num_pages = 0
for group in margin_groups:
name, margins = group[0]
name_page_numbers[name] = num_pages + 1
data = results[name]
if not isinstance(data, bytes):
raise SystemExit(data)
@ -387,7 +421,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
else:
pdf_doc.append(doc)
fix_links(pdf_doc, anchor_locations, name_page_numbers, opts.pdf_mark_links, log)
fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links, log)
if toc and len(toc):
add_toc(PDFOutlineRoot(pdf_doc), toc, anchor_locations, name_anchor_map)
if cover_data:
add_cover(pdf_doc, cover_data, page_layout, opts)

View File

@ -264,23 +264,30 @@ PDFDoc_set_box(PDFDoc *self, PyObject *args) {
// create_outline() {{{
static PyObject *
PDFDoc_create_outline(PDFDoc *self, PyObject *args) {
PyObject *p;
PDFOutlineItem *ans;
int pagenum;
char *title_buf;
unsigned int pagenum;
double left = 0, top = 0, zoom = 0;
PdfPage *page;
if (!PyArg_ParseTuple(args, "Ui", &p, &pagenum)) return NULL;
if (!PyArg_ParseTuple(args, "esI|ddd", "UTF-8", &title_buf, &pagenum, &left, &top, &zoom)) return NULL;
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
if (ans == NULL) goto error;
try {
const PdfString title = podofo_convert_pystring(p);
PdfString title(reinterpret_cast<pdf_utf8 *>(title_buf));
PdfOutlines *outlines = self->doc->GetOutlines();
if (outlines == NULL) {PyErr_NoMemory(); goto error;}
ans->item = outlines->CreateRoot(title);
if (ans->item == NULL) {PyErr_NoMemory(); goto error;}
ans->doc = self->doc;
PdfDestination dest(self->doc->GetPage(pagenum));
try {
page = self->doc->GetPage(pagenum - 1);
} catch (const PdfError &err) {
PyErr_Format(PyExc_ValueError, "Invalid page number: %u", pagenum - 1); goto error;
}
PdfDestination dest(page, left, top, zoom);
ans->item->SetDestination(dest);
} catch(const PdfError & err) {
podofo_set_exception(err); goto error;

View File

@ -44,23 +44,27 @@ erase(PDFOutlineItem *self, PyObject *args) {
static PyObject *
create(PDFOutlineItem *self, PyObject *args) {
PyObject *ptitle, *as_child = NULL;
PyObject *as_child;
PDFOutlineItem *ans;
int num;
unsigned int num;
double left = 0, top = 0, zoom = 0;
PdfPage *page;
char *title_buf;
if (!PyArg_ParseTuple(args, "Ui|O", &ptitle, &num, &as_child)) return NULL;
if (!PyArg_ParseTuple(args, "esIO|ddd", "UTF-8", &title_buf, &num, &as_child, &left, &top, &zoom)) return NULL;
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
if (ans == NULL) goto error;
ans->doc = self->doc;
try {
const PdfString title = podofo_convert_pystring(ptitle);
page = self->doc->GetPage(num);
if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %d", num); goto error; }
PdfDestination dest(page);
if (as_child != NULL && PyObject_IsTrue(as_child)) {
PdfString title(reinterpret_cast<pdf_utf8 *>(title_buf));
try {
page = self->doc->GetPage(num - 1);
} catch(const PdfError &err) { page = NULL; }
if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %u", num); goto error; }
PdfDestination dest(page, left, top, zoom);
if (PyObject_IsTrue(as_child)) {
ans->item = self->item->CreateChild(title, dest);
} else
ans->item = self->item->CreateNext(title, dest);