Add ability to extract pages from PDF files to calibre podofo bindings

This commit is contained in:
Kovid Goyal 2012-05-28 20:30:29 +05:30
parent 4f03d28aa5
commit 4017e381a0
2 changed files with 51 additions and 14 deletions

View File

@ -181,20 +181,23 @@ def set_metadata_(path, opath, title, authors, bkp, tags):
return True
return False
def delete_all_but(path, pages):
''' Delete all the pages in the pdf except for the specified ones. Negative
numbers are counted from the end of the PDF.'''
with TemporaryFile('_podofo_in.pdf') as of:
shutil.copyfile(path, of)
p = podofo.PDFDoc()
p.open(of)
total = p.page_count()
pages = { total + x if x < 0 else x for x in pages }
for page in xrange(total-1, -1, -1):
if page not in pages:
p.delete_page(page)
os.remove(path)
p.save(path)
if __name__ == '__main__':
f = '/tmp/t.pdf'
import StringIO
stream = StringIO.StringIO(open(f).read())
mi = get_metadata(open(f))
print
print 'Original metadata:'
print mi
mi.title = 'Test title'
mi.authors = ['Test author', 'author2']
mi.book_producer = 'calibre'
set_metadata(stream, mi)
open('/tmp/x.pdf', 'wb').write(stream.getvalue())
print
print 'New pdf written to /tmp/x.pdf'
delete_all_but(f, [0, 1, -2, -1])

View File

@ -146,6 +146,33 @@ podofo_PDFDoc_extract_first_page(podofo_PDFDoc *self, PyObject *args, PyObject *
Py_RETURN_NONE;
}
static PyObject *
podofo_PDFDoc_page_count(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
int count;
try {
count = self->doc->GetPageCount();
} catch(const PdfError & err) {
podofo_set_exception(err);
return NULL;
}
return Py_BuildValue("i", count);
}
static PyObject *
podofo_PDFDoc_delete_page(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
int num = 0;
if (PyArg_ParseTuple(args, "i", &num)) {
try {
self->doc->DeletePages(num, 1);
} catch(const PdfError & err) {
podofo_set_exception(err);
return NULL;
}
} else return NULL;
Py_RETURN_NONE;
}
static PyObject *
podofo_convert_pdfstring(const PdfString &s) {
std::string raw = s.GetStringUtf8();
@ -321,6 +348,13 @@ static PyMethodDef podofo_PDFDoc_methods[] = {
{"extract_first_page", (PyCFunction)podofo_PDFDoc_extract_first_page, METH_VARARGS,
"extract_first_page() -> Remove all but the first page."
},
{"page_count", (PyCFunction)podofo_PDFDoc_page_count, METH_VARARGS,
"page_count() -> Number of pages in the PDF."
},
{"delete_page", (PyCFunction)podofo_PDFDoc_delete_page, METH_VARARGS,
"delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)."
},
{NULL} /* Sentinel */
};