From 4017e381a0116bf458511dd933ee8f7b4b26c045 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 28 May 2012 20:30:29 +0530 Subject: [PATCH] Add ability to extract pages from PDF files to calibre podofo bindings --- src/calibre/utils/podofo/__init__.py | 31 +++++++++++++------------ src/calibre/utils/podofo/podofo.cpp | 34 ++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index 78f250dd0e..948962f438 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -181,20 +181,23 @@ def set_metadata_(path, opath, title, authors, bkp, tags): return True return False +def delete_all_but(path, pages): + ''' Delete all the pages in the pdf except for the specified ones. Negative + numbers are counted from the end of the PDF.''' + with TemporaryFile('_podofo_in.pdf') as of: + shutil.copyfile(path, of) + + p = podofo.PDFDoc() + p.open(of) + total = p.page_count() + pages = { total + x if x < 0 else x for x in pages } + for page in xrange(total-1, -1, -1): + if page not in pages: + p.delete_page(page) + os.remove(path) + p.save(path) + if __name__ == '__main__': f = '/tmp/t.pdf' - import StringIO - stream = StringIO.StringIO(open(f).read()) - mi = get_metadata(open(f)) - print - print 'Original metadata:' - print mi - mi.title = 'Test title' - mi.authors = ['Test author', 'author2'] - mi.book_producer = 'calibre' - set_metadata(stream, mi) - open('/tmp/x.pdf', 'wb').write(stream.getvalue()) - print - print 'New pdf written to /tmp/x.pdf' - + delete_all_but(f, [0, 1, -2, -1]) diff --git a/src/calibre/utils/podofo/podofo.cpp b/src/calibre/utils/podofo/podofo.cpp index c1f9f84f61..3764fce829 100644 --- a/src/calibre/utils/podofo/podofo.cpp +++ b/src/calibre/utils/podofo/podofo.cpp @@ -146,6 +146,33 @@ podofo_PDFDoc_extract_first_page(podofo_PDFDoc *self, PyObject *args, PyObject * Py_RETURN_NONE; } +static PyObject * +podofo_PDFDoc_page_count(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { + int count; + try { + count = self->doc->GetPageCount(); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + return Py_BuildValue("i", count); +} + +static PyObject * +podofo_PDFDoc_delete_page(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { + int num = 0; + if (PyArg_ParseTuple(args, "i", &num)) { + try { + self->doc->DeletePages(num, 1); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + } else return NULL; + + Py_RETURN_NONE; +} + static PyObject * podofo_convert_pdfstring(const PdfString &s) { std::string raw = s.GetStringUtf8(); @@ -321,6 +348,13 @@ static PyMethodDef podofo_PDFDoc_methods[] = { {"extract_first_page", (PyCFunction)podofo_PDFDoc_extract_first_page, METH_VARARGS, "extract_first_page() -> Remove all but the first page." }, + {"page_count", (PyCFunction)podofo_PDFDoc_page_count, METH_VARARGS, + "page_count() -> Number of pages in the PDF." + }, + {"delete_page", (PyCFunction)podofo_PDFDoc_delete_page, METH_VARARGS, + "delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)." + }, + {NULL} /* Sentinel */ };