mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
PDF Outline generation now works
This commit is contained in:
parent
39dae008c2
commit
32e83987d6
@ -272,7 +272,7 @@ def make_anchors_unique(container):
|
|||||||
spine_names = set()
|
spine_names = set()
|
||||||
|
|
||||||
def replacer(url):
|
def replacer(url):
|
||||||
if replacer.file_type != 'text':
|
if replacer.file_type not in ('text', 'ncx'):
|
||||||
return url
|
return url
|
||||||
if not url:
|
if not url:
|
||||||
return url
|
return url
|
||||||
@ -282,6 +282,9 @@ def make_anchors_unique(container):
|
|||||||
href, frag = base, url[1:]
|
href, frag = base, url[1:]
|
||||||
else:
|
else:
|
||||||
href, frag = url.partition('#')[::2]
|
href, frag = url.partition('#')[::2]
|
||||||
|
if base is None:
|
||||||
|
name = href
|
||||||
|
else:
|
||||||
name = container.href_to_name(href, base)
|
name = container.href_to_name(href, base)
|
||||||
if not name:
|
if not name:
|
||||||
return url
|
return url
|
||||||
@ -298,6 +301,7 @@ def make_anchors_unique(container):
|
|||||||
return '#' + new_frag
|
return '#' + new_frag
|
||||||
return href + '#' + new_frag
|
return href + '#' + new_frag
|
||||||
|
|
||||||
|
name_anchor_map = {}
|
||||||
for spine_name, is_linear in container.spine_names:
|
for spine_name, is_linear in container.spine_names:
|
||||||
spine_names.add(spine_name)
|
spine_names.add(spine_name)
|
||||||
root = container.parsed(spine_name)
|
root = container.parsed(spine_name)
|
||||||
@ -307,11 +311,17 @@ def make_anchors_unique(container):
|
|||||||
if key not in mapping:
|
if key not in mapping:
|
||||||
new_id = mapping[key] = 'a{}'.format(count)
|
new_id = mapping[key] = 'a{}'.format(count)
|
||||||
elem.set('id', new_id)
|
elem.set('id', new_id)
|
||||||
|
body = root[-1]
|
||||||
|
if not body.get('id'):
|
||||||
|
count += 1
|
||||||
|
body.set('id', 'a{}'.format(count))
|
||||||
|
name_anchor_map[spine_name] = body.get('id')
|
||||||
|
|
||||||
for name in container.mime_map:
|
for name in container.mime_map:
|
||||||
base = name
|
base = name
|
||||||
replacer.replaced = False
|
replacer.replaced = False
|
||||||
container.replace_links(name, replacer)
|
container.replace_links(name, replacer)
|
||||||
|
return name_anchor_map
|
||||||
|
|
||||||
|
|
||||||
AnchorLocation = namedtuple('AnchorLocation', 'pagenum left top zoom')
|
AnchorLocation = namedtuple('AnchorLocation', 'pagenum left top zoom')
|
||||||
@ -330,7 +340,7 @@ def get_anchor_locations(pdf_doc, first_page_num, toc_uuid):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def fix_links(pdf_doc, anchor_locations, name_page_numbers, mark_links, log):
|
def fix_links(pdf_doc, anchor_locations, name_anchor_map, mark_links, log):
|
||||||
|
|
||||||
def replace_link(url):
|
def replace_link(url):
|
||||||
purl = urlparse(url)
|
purl = urlparse(url)
|
||||||
@ -342,39 +352,63 @@ def fix_links(pdf_doc, anchor_locations, name_page_numbers, mark_links, log):
|
|||||||
if loc is None:
|
if loc is None:
|
||||||
log.warn('Anchor location for link to {} not found'.format(purl.fragment))
|
log.warn('Anchor location for link to {} not found'.format(purl.fragment))
|
||||||
else:
|
else:
|
||||||
pnum = name_page_numbers.get(purl.fragment)
|
loc = anchor_locations.get(name_anchor_map.get(purl.fragment))
|
||||||
if pnum is None:
|
if loc is None:
|
||||||
log.warn('Anchor location for link to {} not found'.format(purl.fragment))
|
log.warn('Anchor location for link to {} not found'.format(purl.fragment))
|
||||||
else:
|
|
||||||
loc = AnchorLocation(pnum, 0, 0, 0)
|
|
||||||
return loc
|
return loc
|
||||||
|
|
||||||
pdf_doc.alter_links(replace_link, mark_links)
|
pdf_doc.alter_links(replace_link, mark_links)
|
||||||
|
|
||||||
|
|
||||||
|
class PDFOutlineRoot(object):
|
||||||
|
|
||||||
|
def __init__(self, pdf_doc):
|
||||||
|
self.pdf_doc = pdf_doc
|
||||||
|
self.root_item = None
|
||||||
|
|
||||||
|
def create(self, title, pagenum, as_child, left, top, zoom):
|
||||||
|
if self.root_item is None:
|
||||||
|
self.root_item = self.pdf_doc.create_outline(title, pagenum, left, top, zoom)
|
||||||
|
else:
|
||||||
|
self.root_item = self.root_item.create(title, pagenum, False, left, top, zoom)
|
||||||
|
return self.root_item
|
||||||
|
|
||||||
|
|
||||||
|
def add_toc(pdf_parent, toc_parent, anchor_locations, name_anchor_map):
|
||||||
|
for child in toc_parent:
|
||||||
|
title, frag = child.title, child.frag
|
||||||
|
try:
|
||||||
|
if '.' in frag:
|
||||||
|
loc = anchor_locations[name_anchor_map[frag]]
|
||||||
|
else:
|
||||||
|
loc = anchor_locations[frag]
|
||||||
|
except Exception:
|
||||||
|
loc = AnchorLocation(1, 0, 0, 0)
|
||||||
|
pdf_child = pdf_parent.create(title, loc.pagenum, True, loc.left, loc.top, loc.zoom)
|
||||||
|
if len(child):
|
||||||
|
add_toc(pdf_child, child, anchor_locations, name_anchor_map)
|
||||||
|
|
||||||
|
|
||||||
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None):
|
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None):
|
||||||
container = Container(opf_path, log)
|
container = Container(opf_path, log)
|
||||||
make_anchors_unique(container)
|
|
||||||
margin_groups = create_margin_groups(container)
|
margin_groups = create_margin_groups(container)
|
||||||
|
name_anchor_map = make_anchors_unique(container)
|
||||||
|
toc = get_toc(container, verify_destinations=False)
|
||||||
links_page_uuid = add_all_links(container, margin_groups)
|
links_page_uuid = add_all_links(container, margin_groups)
|
||||||
toc = get_toc(container)
|
|
||||||
(toc)
|
|
||||||
container.commit()
|
container.commit()
|
||||||
|
|
||||||
manager = RenderManager(opts)
|
manager = RenderManager(opts)
|
||||||
page_layout = get_page_layout(opts)
|
page_layout = get_page_layout(opts)
|
||||||
pdf_doc = None
|
pdf_doc = None
|
||||||
anchor_locations = {}
|
anchor_locations = {}
|
||||||
name_page_numbers = {}
|
|
||||||
num_pages = 0
|
|
||||||
jobs = []
|
jobs = []
|
||||||
for group in margin_groups:
|
for group in margin_groups:
|
||||||
name, margins = group[0]
|
name, margins = group[0]
|
||||||
jobs.append(job_for_name(container, name, margins, page_layout))
|
jobs.append(job_for_name(container, name, margins, page_layout))
|
||||||
results = manager.convert_html_files(jobs, settle_time=1)
|
results = manager.convert_html_files(jobs, settle_time=1)
|
||||||
|
num_pages = 0
|
||||||
for group in margin_groups:
|
for group in margin_groups:
|
||||||
name, margins = group[0]
|
name, margins = group[0]
|
||||||
name_page_numbers[name] = num_pages + 1
|
|
||||||
data = results[name]
|
data = results[name]
|
||||||
if not isinstance(data, bytes):
|
if not isinstance(data, bytes):
|
||||||
raise SystemExit(data)
|
raise SystemExit(data)
|
||||||
@ -387,7 +421,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
|
|||||||
else:
|
else:
|
||||||
pdf_doc.append(doc)
|
pdf_doc.append(doc)
|
||||||
|
|
||||||
fix_links(pdf_doc, anchor_locations, name_page_numbers, opts.pdf_mark_links, log)
|
fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links, log)
|
||||||
|
if toc and len(toc):
|
||||||
|
add_toc(PDFOutlineRoot(pdf_doc), toc, anchor_locations, name_anchor_map)
|
||||||
|
|
||||||
if cover_data:
|
if cover_data:
|
||||||
add_cover(pdf_doc, cover_data, page_layout, opts)
|
add_cover(pdf_doc, cover_data, page_layout, opts)
|
||||||
|
@ -264,23 +264,30 @@ PDFDoc_set_box(PDFDoc *self, PyObject *args) {
|
|||||||
// create_outline() {{{
|
// create_outline() {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
PDFDoc_create_outline(PDFDoc *self, PyObject *args) {
|
PDFDoc_create_outline(PDFDoc *self, PyObject *args) {
|
||||||
PyObject *p;
|
|
||||||
PDFOutlineItem *ans;
|
PDFOutlineItem *ans;
|
||||||
int pagenum;
|
char *title_buf;
|
||||||
|
unsigned int pagenum;
|
||||||
|
double left = 0, top = 0, zoom = 0;
|
||||||
|
PdfPage *page;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "Ui", &p, &pagenum)) return NULL;
|
if (!PyArg_ParseTuple(args, "esI|ddd", "UTF-8", &title_buf, &pagenum, &left, &top, &zoom)) return NULL;
|
||||||
|
|
||||||
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
|
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
|
||||||
if (ans == NULL) goto error;
|
if (ans == NULL) goto error;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const PdfString title = podofo_convert_pystring(p);
|
PdfString title(reinterpret_cast<pdf_utf8 *>(title_buf));
|
||||||
PdfOutlines *outlines = self->doc->GetOutlines();
|
PdfOutlines *outlines = self->doc->GetOutlines();
|
||||||
if (outlines == NULL) {PyErr_NoMemory(); goto error;}
|
if (outlines == NULL) {PyErr_NoMemory(); goto error;}
|
||||||
ans->item = outlines->CreateRoot(title);
|
ans->item = outlines->CreateRoot(title);
|
||||||
if (ans->item == NULL) {PyErr_NoMemory(); goto error;}
|
if (ans->item == NULL) {PyErr_NoMemory(); goto error;}
|
||||||
ans->doc = self->doc;
|
ans->doc = self->doc;
|
||||||
PdfDestination dest(self->doc->GetPage(pagenum));
|
try {
|
||||||
|
page = self->doc->GetPage(pagenum - 1);
|
||||||
|
} catch (const PdfError &err) {
|
||||||
|
PyErr_Format(PyExc_ValueError, "Invalid page number: %u", pagenum - 1); goto error;
|
||||||
|
}
|
||||||
|
PdfDestination dest(page, left, top, zoom);
|
||||||
ans->item->SetDestination(dest);
|
ans->item->SetDestination(dest);
|
||||||
} catch(const PdfError & err) {
|
} catch(const PdfError & err) {
|
||||||
podofo_set_exception(err); goto error;
|
podofo_set_exception(err); goto error;
|
||||||
|
@ -44,23 +44,27 @@ erase(PDFOutlineItem *self, PyObject *args) {
|
|||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
create(PDFOutlineItem *self, PyObject *args) {
|
create(PDFOutlineItem *self, PyObject *args) {
|
||||||
PyObject *ptitle, *as_child = NULL;
|
PyObject *as_child;
|
||||||
PDFOutlineItem *ans;
|
PDFOutlineItem *ans;
|
||||||
int num;
|
unsigned int num;
|
||||||
|
double left = 0, top = 0, zoom = 0;
|
||||||
PdfPage *page;
|
PdfPage *page;
|
||||||
|
char *title_buf;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "Ui|O", &ptitle, &num, &as_child)) return NULL;
|
if (!PyArg_ParseTuple(args, "esIO|ddd", "UTF-8", &title_buf, &num, &as_child, &left, &top, &zoom)) return NULL;
|
||||||
|
|
||||||
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
|
ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType);
|
||||||
if (ans == NULL) goto error;
|
if (ans == NULL) goto error;
|
||||||
ans->doc = self->doc;
|
ans->doc = self->doc;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const PdfString title = podofo_convert_pystring(ptitle);
|
PdfString title(reinterpret_cast<pdf_utf8 *>(title_buf));
|
||||||
page = self->doc->GetPage(num);
|
try {
|
||||||
if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %d", num); goto error; }
|
page = self->doc->GetPage(num - 1);
|
||||||
PdfDestination dest(page);
|
} catch(const PdfError &err) { page = NULL; }
|
||||||
if (as_child != NULL && PyObject_IsTrue(as_child)) {
|
if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %u", num); goto error; }
|
||||||
|
PdfDestination dest(page, left, top, zoom);
|
||||||
|
if (PyObject_IsTrue(as_child)) {
|
||||||
ans->item = self->item->CreateChild(title, dest);
|
ans->item = self->item->CreateChild(title, dest);
|
||||||
} else
|
} else
|
||||||
ans->item = self->item->CreateNext(title, dest);
|
ans->item = self->item->CreateNext(title, dest);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user