mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
PDF Outline generation now works
This commit is contained in:
parent
39dae008c2
commit
32e83987d6
@ -272,7 +272,7 @@ def make_anchors_unique(container):
|
||||
spine_names = set()
|
||||
|
||||
def replacer(url):
|
||||
if replacer.file_type != 'text':
|
||||
if replacer.file_type not in ('text', 'ncx'):
|
||||
return url
|
||||
if not url:
|
||||
return url
|
||||
@ -282,7 +282,10 @@ def make_anchors_unique(container):
|
||||
href, frag = base, url[1:]
|
||||
else:
|
||||
href, frag = url.partition('#')[::2]
|
||||
name = container.href_to_name(href, base)
|
||||
if base is None:
|
||||
name = href
|
||||
else:
|
||||
name = container.href_to_name(href, base)
|
||||
if not name:
|
||||
return url
|
||||
if not frag and name in spine_names:
|
||||
@ -298,6 +301,7 @@ def make_anchors_unique(container):
|
||||
return '#' + new_frag
|
||||
return href + '#' + new_frag
|
||||
|
||||
name_anchor_map = {}
|
||||
for spine_name, is_linear in container.spine_names:
|
||||
spine_names.add(spine_name)
|
||||
root = container.parsed(spine_name)
|
||||
@ -307,11 +311,17 @@ def make_anchors_unique(container):
|
||||
if key not in mapping:
|
||||
new_id = mapping[key] = 'a{}'.format(count)
|
||||
elem.set('id', new_id)
|
||||
body = root[-1]
|
||||
if not body.get('id'):
|
||||
count += 1
|
||||
body.set('id', 'a{}'.format(count))
|
||||
name_anchor_map[spine_name] = body.get('id')
|
||||
|
||||
for name in container.mime_map:
|
||||
base = name
|
||||
replacer.replaced = False
|
||||
container.replace_links(name, replacer)
|
||||
return name_anchor_map
|
||||
|
||||
|
||||
AnchorLocation = namedtuple('AnchorLocation', 'pagenum left top zoom')
|
||||
@ -330,7 +340,7 @@ def get_anchor_locations(pdf_doc, first_page_num, toc_uuid):
|
||||
return ans
|
||||
|
||||
|
||||
def fix_links(pdf_doc, anchor_locations, name_page_numbers, mark_links, log):
|
||||
def fix_links(pdf_doc, anchor_locations, name_anchor_map, mark_links, log):
|
||||
|
||||
def replace_link(url):
|
||||
purl = urlparse(url)
|
||||
@ -342,39 +352,63 @@ def fix_links(pdf_doc, anchor_locations, name_page_numbers, mark_links, log):
|
||||
if loc is None:
|
||||
log.warn('Anchor location for link to {} not found'.format(purl.fragment))
|
||||
else:
|
||||
pnum = name_page_numbers.get(purl.fragment)
|
||||
if pnum is None:
|
||||
loc = anchor_locations.get(name_anchor_map.get(purl.fragment))
|
||||
if loc is None:
|
||||
log.warn('Anchor location for link to {} not found'.format(purl.fragment))
|
||||
else:
|
||||
loc = AnchorLocation(pnum, 0, 0, 0)
|
||||
return loc
|
||||
|
||||
pdf_doc.alter_links(replace_link, mark_links)
|
||||
|
||||
|
||||
class PDFOutlineRoot(object):
|
||||
|
||||
def __init__(self, pdf_doc):
|
||||
self.pdf_doc = pdf_doc
|
||||
self.root_item = None
|
||||
|
||||
def create(self, title, pagenum, as_child, left, top, zoom):
|
||||
if self.root_item is None:
|
||||
self.root_item = self.pdf_doc.create_outline(title, pagenum, left, top, zoom)
|
||||
else:
|
||||
self.root_item = self.root_item.create(title, pagenum, False, left, top, zoom)
|
||||
return self.root_item
|
||||
|
||||
|
||||
def add_toc(pdf_parent, toc_parent, anchor_locations, name_anchor_map):
|
||||
for child in toc_parent:
|
||||
title, frag = child.title, child.frag
|
||||
try:
|
||||
if '.' in frag:
|
||||
loc = anchor_locations[name_anchor_map[frag]]
|
||||
else:
|
||||
loc = anchor_locations[frag]
|
||||
except Exception:
|
||||
loc = AnchorLocation(1, 0, 0, 0)
|
||||
pdf_child = pdf_parent.create(title, loc.pagenum, True, loc.left, loc.top, loc.zoom)
|
||||
if len(child):
|
||||
add_toc(pdf_child, child, anchor_locations, name_anchor_map)
|
||||
|
||||
|
||||
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None):
|
||||
container = Container(opf_path, log)
|
||||
make_anchors_unique(container)
|
||||
margin_groups = create_margin_groups(container)
|
||||
name_anchor_map = make_anchors_unique(container)
|
||||
toc = get_toc(container, verify_destinations=False)
|
||||
links_page_uuid = add_all_links(container, margin_groups)
|
||||
toc = get_toc(container)
|
||||
(toc)
|
||||
container.commit()
|
||||
|
||||
manager = RenderManager(opts)
|
||||
page_layout = get_page_layout(opts)
|
||||
pdf_doc = None
|
||||
anchor_locations = {}
|
||||
name_page_numbers = {}
|
||||
num_pages = 0
|
||||
jobs = []
|
||||
for group in margin_groups:
|
||||
name, margins = group[0]
|
||||
jobs.append(job_for_name(container, name, margins, page_layout))
|
||||
results = manager.convert_html_files(jobs, settle_time=1)
|
||||
num_pages = 0
|
||||
for group in margin_groups:
|
||||
name, margins = group[0]
|
||||
name_page_numbers[name] = num_pages + 1
|
||||
data = results[name]
|
||||
if not isinstance(data, bytes):
|
||||
raise SystemExit(data)
|
||||
@ -387,7 +421,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
|
||||
else:
|
||||
pdf_doc.append(doc)
|
||||
|
||||
fix_links(pdf_doc, anchor_locations, name_page_numbers, opts.pdf_mark_links, log)
|
||||
fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links, log)
|
||||
if toc and len(toc):
|
||||
add_toc(PDFOutlineRoot(pdf_doc), toc, anchor_locations, name_anchor_map)
|
||||
|
||||
if cover_data:
|
||||
add_cover(pdf_doc, cover_data, page_layout, opts)
|
||||
|
@ -264,23 +264,30 @@ PDFDoc_set_box(PDFDoc *self, PyObject *args) {
|
||||
// create_outline() {{{
|
||||
static PyObject *
|
||||
PDFDoc_create_outline(PDFDoc *self, PyObject *args) {
|
||||
PyObject *p;
|
||||
PDFOutlineItem *ans;
|
||||
int pagenum;
|
||||
char *title_buf;
|
||||
unsigned int pagenum;
|
||||
double left = 0, top = 0, zoom = 0;
|
||||
PdfPage *page;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "Ui", &p, &pagenum)) return NULL;
|
||||
if (!PyArg_ParseTuple(args, "esI|ddd", "UTF-8", &title_buf, &pagenum, &left, &top, &zoom)) return NULL;
|
||||
|
||||
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
|
||||
if (ans == NULL) goto error;
|
||||
|
||||
try {
|
||||
const PdfString title = podofo_convert_pystring(p);
|
||||
PdfString title(reinterpret_cast<pdf_utf8 *>(title_buf));
|
||||
PdfOutlines *outlines = self->doc->GetOutlines();
|
||||
if (outlines == NULL) {PyErr_NoMemory(); goto error;}
|
||||
ans->item = outlines->CreateRoot(title);
|
||||
if (ans->item == NULL) {PyErr_NoMemory(); goto error;}
|
||||
ans->doc = self->doc;
|
||||
PdfDestination dest(self->doc->GetPage(pagenum));
|
||||
try {
|
||||
page = self->doc->GetPage(pagenum - 1);
|
||||
} catch (const PdfError &err) {
|
||||
PyErr_Format(PyExc_ValueError, "Invalid page number: %u", pagenum - 1); goto error;
|
||||
}
|
||||
PdfDestination dest(page, left, top, zoom);
|
||||
ans->item->SetDestination(dest);
|
||||
} catch(const PdfError & err) {
|
||||
podofo_set_exception(err); goto error;
|
||||
|
@ -44,23 +44,27 @@ erase(PDFOutlineItem *self, PyObject *args) {
|
||||
|
||||
static PyObject *
|
||||
create(PDFOutlineItem *self, PyObject *args) {
|
||||
PyObject *ptitle, *as_child = NULL;
|
||||
PyObject *as_child;
|
||||
PDFOutlineItem *ans;
|
||||
int num;
|
||||
unsigned int num;
|
||||
double left = 0, top = 0, zoom = 0;
|
||||
PdfPage *page;
|
||||
char *title_buf;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "Ui|O", &ptitle, &num, &as_child)) return NULL;
|
||||
if (!PyArg_ParseTuple(args, "esIO|ddd", "UTF-8", &title_buf, &num, &as_child, &left, &top, &zoom)) return NULL;
|
||||
|
||||
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
|
||||
if (ans == NULL) goto error;
|
||||
ans->doc = self->doc;
|
||||
|
||||
try {
|
||||
const PdfString title = podofo_convert_pystring(ptitle);
|
||||
page = self->doc->GetPage(num);
|
||||
if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %d", num); goto error; }
|
||||
PdfDestination dest(page);
|
||||
if (as_child != NULL && PyObject_IsTrue(as_child)) {
|
||||
PdfString title(reinterpret_cast<pdf_utf8 *>(title_buf));
|
||||
try {
|
||||
page = self->doc->GetPage(num - 1);
|
||||
} catch(const PdfError &err) { page = NULL; }
|
||||
if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %u", num); goto error; }
|
||||
PdfDestination dest(page, left, top, zoom);
|
||||
if (PyObject_IsTrue(as_child)) {
|
||||
ans->item = self->item->CreateChild(title, dest);
|
||||
} else
|
||||
ans->item = self->item->CreateNext(title, dest);
|
||||
|
Loading…
x
Reference in New Issue
Block a user