mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add a function to get the outline from a PDF file
This commit is contained in:
parent
1c22993e00
commit
beb9727757
@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
from calibre.constants import plugins, preferred_encoding
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
@ -115,6 +116,17 @@ def get_xmp_metadata(path):
|
||||
return p.get_xmp_metadata()
|
||||
|
||||
|
||||
def get_outline(path=None):
|
||||
if path is None:
|
||||
path = sys.argv[-1]
|
||||
podofo = get_podofo()
|
||||
p = podofo.PDFDoc()
|
||||
with open(path, 'rb') as f:
|
||||
raw = f.read()
|
||||
p.load(raw)
|
||||
return p.get_outline()['children']
|
||||
|
||||
|
||||
def get_image_count(path):
|
||||
podofo = get_podofo()
|
||||
p = podofo.PDFDoc()
|
||||
@ -214,5 +226,4 @@ def test_podofo():
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
get_xmp_metadata(sys.argv[-1])
|
||||
|
@ -752,6 +752,9 @@ static PyMethodDef PDFDoc_methods[] = {
|
||||
{"create_outline", (PyCFunction)py_create_outline, METH_VARARGS,
|
||||
"create_outline(title, pagenum) -> Create an outline, return the first outline item."
|
||||
},
|
||||
{"get_outline", (PyCFunction)py_get_outline, METH_NOARGS,
|
||||
"get_outline() -> Get the outline if any in the PDF file."
|
||||
},
|
||||
{"get_xmp_metadata", (PyCFunction)PDFDoc_get_xmp_metadata, METH_VARARGS,
|
||||
"get_xmp_metadata(raw) -> Get the XMP metadata as raw bytes"
|
||||
},
|
||||
|
@ -103,6 +103,7 @@ PyObject* py_dedup_type3_fonts(PDFDoc *self, PyObject *args);
|
||||
PyObject* py_impose(PDFDoc *self, PyObject *args);
|
||||
PyObject* py_dedup_images(PDFDoc *self, PyObject *args);
|
||||
PyObject* py_create_outline(PDFDoc *self, PyObject *args);
|
||||
PyObject* py_get_outline(PDFDoc *self, PyObject *args);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,6 @@
|
||||
using namespace pdf;
|
||||
|
||||
|
||||
// create_outline() {{{
|
||||
static PyObject *
|
||||
create_outline(PDFDoc *self, PyObject *args) {
|
||||
PDFOutlineItem *ans;
|
||||
@ -53,6 +52,59 @@ error:
|
||||
Py_XDECREF(ans);
|
||||
return NULL;
|
||||
|
||||
} // }}}
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
create_outline_node() {
|
||||
pyunique_ptr ans(PyDict_New());
|
||||
if (!ans) return NULL;
|
||||
pyunique_ptr children(PyList_New(0));
|
||||
if (!children) return NULL;
|
||||
if (PyDict_SetItemString(ans.get(), "children", children.get()) != 0) return NULL;
|
||||
return ans.release();
|
||||
}
|
||||
|
||||
static void
|
||||
convert_outline(PDFDoc *self, PyObject *parent, PdfOutlineItem *item) {
|
||||
pyunique_ptr title(podofo_convert_pdfstring(item->GetTitle()));
|
||||
if (!title) return;
|
||||
pyunique_ptr node(create_outline_node());
|
||||
if (!node) return;
|
||||
if (PyDict_SetItemString(node.get(), "title", title.get()) != 0) return;
|
||||
PdfDestination* dest = item->GetDestination(self->doc);
|
||||
if (dest) {
|
||||
PdfPage *page = dest->GetPage(self->doc);
|
||||
long pnum = page ? page->GetPageNumber() : -1;
|
||||
pyunique_ptr d(Py_BuildValue("{sl sd sd sd}", "page", pnum, "top", dest->GetTop(), "left", dest->GetLeft(), "zoom", dest->GetZoom()));
|
||||
if (!d) return;
|
||||
if (PyDict_SetItemString(node.get(), "dest", d.get()) != 0) return;
|
||||
}
|
||||
PyObject *children = PyDict_GetItemString(parent, "children");
|
||||
if (PyList_Append(children, node.get()) != 0) return;
|
||||
|
||||
if (item->First()) {
|
||||
convert_outline(self, node.get(), item->First());
|
||||
if (PyErr_Occurred()) return;
|
||||
}
|
||||
|
||||
if (item->Next()) {
|
||||
convert_outline(self, parent, item->Next());
|
||||
if (PyErr_Occurred()) return;
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_outline(PDFDoc *self, PyObject *args) {
|
||||
PdfOutlines *root = self->doc->GetOutlines(PoDoFo::ePdfDontCreateObject);
|
||||
if (!root || !root->First()) Py_RETURN_NONE;
|
||||
PyObject *ans = create_outline_node();
|
||||
if (!ans) return NULL;
|
||||
convert_outline(self, ans, root->First());
|
||||
if (PyErr_Occurred()) { Py_DECREF(ans); return NULL; }
|
||||
if (!ans) return NULL;
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
PYWRAP(create_outline)
|
||||
PYWRAP(get_outline)
|
||||
|
Loading…
x
Reference in New Issue
Block a user