Add a function to get the outline from a PDF file

This commit is contained in:
Kovid Goyal 2019-10-03 18:12:45 +05:30
parent 1c22993e00
commit beb9727757
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 70 additions and 3 deletions

View File

@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import os import os
import shutil import shutil
import sys
from calibre.constants import plugins, preferred_encoding from calibre.constants import plugins, preferred_encoding
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
@ -115,6 +116,17 @@ def get_xmp_metadata(path):
return p.get_xmp_metadata() return p.get_xmp_metadata()
def get_outline(path=None):
if path is None:
path = sys.argv[-1]
podofo = get_podofo()
p = podofo.PDFDoc()
with open(path, 'rb') as f:
raw = f.read()
p.load(raw)
return p.get_outline()['children']
def get_image_count(path): def get_image_count(path):
podofo = get_podofo() podofo = get_podofo()
p = podofo.PDFDoc() p = podofo.PDFDoc()
@ -214,5 +226,4 @@ def test_podofo():
if __name__ == '__main__': if __name__ == '__main__':
import sys
get_xmp_metadata(sys.argv[-1]) get_xmp_metadata(sys.argv[-1])

View File

@ -752,6 +752,9 @@ static PyMethodDef PDFDoc_methods[] = {
{"create_outline", (PyCFunction)py_create_outline, METH_VARARGS, {"create_outline", (PyCFunction)py_create_outline, METH_VARARGS,
"create_outline(title, pagenum) -> Create an outline, return the first outline item." "create_outline(title, pagenum) -> Create an outline, return the first outline item."
}, },
{"get_outline", (PyCFunction)py_get_outline, METH_NOARGS,
"get_outline() -> Get the outline if any in the PDF file."
},
{"get_xmp_metadata", (PyCFunction)PDFDoc_get_xmp_metadata, METH_VARARGS, {"get_xmp_metadata", (PyCFunction)PDFDoc_get_xmp_metadata, METH_VARARGS,
"get_xmp_metadata(raw) -> Get the XMP metadata as raw bytes" "get_xmp_metadata(raw) -> Get the XMP metadata as raw bytes"
}, },

View File

@ -103,6 +103,7 @@ PyObject* py_dedup_type3_fonts(PDFDoc *self, PyObject *args);
PyObject* py_impose(PDFDoc *self, PyObject *args); PyObject* py_impose(PDFDoc *self, PyObject *args);
PyObject* py_dedup_images(PDFDoc *self, PyObject *args); PyObject* py_dedup_images(PDFDoc *self, PyObject *args);
PyObject* py_create_outline(PDFDoc *self, PyObject *args); PyObject* py_create_outline(PDFDoc *self, PyObject *args);
PyObject* py_get_outline(PDFDoc *self, PyObject *args);
} }
} }

View File

@ -9,7 +9,6 @@
using namespace pdf; using namespace pdf;
// create_outline() {{{
static PyObject * static PyObject *
create_outline(PDFDoc *self, PyObject *args) { create_outline(PDFDoc *self, PyObject *args) {
PDFOutlineItem *ans; PDFOutlineItem *ans;
@ -53,6 +52,59 @@ error:
Py_XDECREF(ans); Py_XDECREF(ans);
return NULL; return NULL;
} // }}} }
static PyObject*
create_outline_node() {
pyunique_ptr ans(PyDict_New());
if (!ans) return NULL;
pyunique_ptr children(PyList_New(0));
if (!children) return NULL;
if (PyDict_SetItemString(ans.get(), "children", children.get()) != 0) return NULL;
return ans.release();
}
static void
convert_outline(PDFDoc *self, PyObject *parent, PdfOutlineItem *item) {
pyunique_ptr title(podofo_convert_pdfstring(item->GetTitle()));
if (!title) return;
pyunique_ptr node(create_outline_node());
if (!node) return;
if (PyDict_SetItemString(node.get(), "title", title.get()) != 0) return;
PdfDestination* dest = item->GetDestination(self->doc);
if (dest) {
PdfPage *page = dest->GetPage(self->doc);
long pnum = page ? page->GetPageNumber() : -1;
pyunique_ptr d(Py_BuildValue("{sl sd sd sd}", "page", pnum, "top", dest->GetTop(), "left", dest->GetLeft(), "zoom", dest->GetZoom()));
if (!d) return;
if (PyDict_SetItemString(node.get(), "dest", d.get()) != 0) return;
}
PyObject *children = PyDict_GetItemString(parent, "children");
if (PyList_Append(children, node.get()) != 0) return;
if (item->First()) {
convert_outline(self, node.get(), item->First());
if (PyErr_Occurred()) return;
}
if (item->Next()) {
convert_outline(self, parent, item->Next());
if (PyErr_Occurred()) return;
}
}
static PyObject *
get_outline(PDFDoc *self, PyObject *args) {
PdfOutlines *root = self->doc->GetOutlines(PoDoFo::ePdfDontCreateObject);
if (!root || !root->First()) Py_RETURN_NONE;
PyObject *ans = create_outline_node();
if (!ans) return NULL;
convert_outline(self, ans, root->First());
if (PyErr_Occurred()) { Py_DECREF(ans); return NULL; }
if (!ans) return NULL;
return ans;
}
PYWRAP(create_outline) PYWRAP(create_outline)
PYWRAP(get_outline)