podofo: Implement writing to python file objects

This commit is contained in:
Kovid Goyal 2012-09-02 14:59:20 +05:30
parent b3fd7d3e01
commit 5736706846
6 changed files with 200 additions and 11 deletions

View File

@ -139,6 +139,7 @@ extensions = [
Extension('podofo', Extension('podofo',
[ [
'calibre/utils/podofo/utils.cpp', 'calibre/utils/podofo/utils.cpp',
'calibre/utils/podofo/output.cpp',
'calibre/utils/podofo/doc.cpp', 'calibre/utils/podofo/doc.cpp',
'calibre/utils/podofo/outline.cpp', 'calibre/utils/podofo/outline.cpp',
'calibre/utils/podofo/podofo.cpp', 'calibre/utils/podofo/podofo.cpp',

View File

@ -15,12 +15,11 @@ from PyQt4.Qt import (QEventLoop, QObject, QPrinter, QSizeF, Qt, QPainter,
QPixmap, QTimer, pyqtProperty, QString, QSize) QPixmap, QTimer, pyqtProperty, QString, QSize)
from PyQt4.QtWebKit import QWebView, QWebPage, QWebSettings from PyQt4.QtWebKit import QWebView, QWebPage, QWebSettings
from calibre.constants import filesystem_encoding
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.pdf.pageoptions import (unit, paper_size, orientation) from calibre.ebooks.pdf.pageoptions import (unit, paper_size, orientation)
from calibre.ebooks.pdf.outline_writer import Outline from calibre.ebooks.pdf.outline_writer import Outline
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre import (__appname__, __version__, fit_image, isosx, force_unicode) from calibre import (__appname__, __version__, fit_image, isosx, force_unicode)
from calibre.ebooks.oeb.display.webview import load_html from calibre.ebooks.oeb.display.webview import load_html
@ -352,12 +351,7 @@ class PDFWriter(QObject): # {{{
if self.metadata.tags: if self.metadata.tags:
self.doc.keywords = self.metadata.tags self.doc.keywords = self.metadata.tags
self.outline(self.doc) self.outline(self.doc)
with TemporaryFile(u'pdf_out.pdf') as tf: self.doc.save_to_fileobj(self.out_stream)
if isinstance(tf, unicode):
tf = tf.encode(filesystem_encoding)
self.doc.save(tf)
with open(tf, 'rb') as src:
shutil.copyfileobj(src, self.out_stream)
self.render_succeeded = True self.render_succeeded = True
finally: finally:
self._delete_tmpdir() self._delete_tmpdir()

View File

@ -94,9 +94,8 @@ def delete_all_but(path, pages):
if page not in pages: if page not in pages:
p.delete_page(page) p.delete_page(page)
raw = p.write()
with open(path, 'wb') as f: with open(path, 'wb') as f:
f.write(raw) f.save_to_fileobj(path)
def test_outline(src): def test_outline(src):
podofo = get_podofo() podofo = get_podofo()
@ -114,7 +113,17 @@ def test_outline(src):
f.write(raw) f.write(raw)
print 'Outlined PDF:', out print 'Outlined PDF:', out
def test_save_to(src, dest):
podofo = get_podofo()
p = podofo.PDFDoc()
with open(src, 'rb') as f:
raw = f.read()
p.load(raw)
with open(dest, 'wb') as out:
p.save_to_fileobj(out)
print ('Wrote PDF of size:', out.tell())
if __name__ == '__main__': if __name__ == '__main__':
import sys import sys
test_outline(sys.argv[-1]) test_save_to(sys.argv[-2], sys.argv[-1])

View File

@ -104,6 +104,15 @@ PDFDoc_write(PDFDoc *self, PyObject *args) {
return ans; return ans;
} }
static PyObject *
PDFDoc_save_to_fileobj(PDFDoc *self, PyObject *args) {
PyObject *f;
if (!PyArg_ParseTuple(args, "O", &f)) return NULL;
return write_doc(self->doc, f);
}
// }}} // }}}
// extract_first_page() {{{ // extract_first_page() {{{
@ -453,6 +462,9 @@ static PyMethodDef PDFDoc_methods[] = {
{"write", (PyCFunction)PDFDoc_write, METH_VARARGS, {"write", (PyCFunction)PDFDoc_write, METH_VARARGS,
"Return the PDF document as a bytestring." "Return the PDF document as a bytestring."
}, },
{"save_to_fileobj", (PyCFunction)PDFDoc_save_to_fileobj, METH_VARARGS,
"Write the PDF document to the soecified file-like object."
},
{"extract_first_page", (PyCFunction)PDFDoc_extract_first_page, METH_VARARGS, {"extract_first_page", (PyCFunction)PDFDoc_extract_first_page, METH_VARARGS,
"extract_first_page() -> Remove all but the first page." "extract_first_page() -> Remove all but the first page."
}, },

View File

@ -41,6 +41,7 @@ extern void podofo_set_exception(const PdfError &err);
extern PyObject * podofo_convert_pdfstring(const PdfString &s); extern PyObject * podofo_convert_pdfstring(const PdfString &s);
extern PdfString * podofo_convert_pystring(PyObject *py); extern PdfString * podofo_convert_pystring(PyObject *py);
extern PdfString * podofo_convert_pystring_single_byte(PyObject *py); extern PdfString * podofo_convert_pystring_single_byte(PyObject *py);
extern PyObject* write_doc(PdfMemDocument *doc, PyObject *f);
} }

View File

@ -0,0 +1,172 @@
/*
* output.cpp
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#include "global.h"
using namespace PoDoFo;
class pyerr : public std::exception {
};
class OutputDevice : public PdfOutputDevice {
private:
PyObject *file;
size_t written;
void update_written() {
size_t pos;
pos = Tell();
if (pos > written) written = pos;
}
public:
OutputDevice(PyObject *f) : file(f), written(0) { Py_XINCREF(file); }
~OutputDevice() { Py_XDECREF(file); file = NULL; }
size_t GetLength() const { return written; }
long PrintVLen(const char* pszFormat, va_list args) {
char buf[10];
int res;
if( !pszFormat ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); }
res = PyOS_vsnprintf(buf, 1, pszFormat, args);
if (res < 0) {
PyErr_SetString(PyExc_Exception, "Something bad happend while calling PyOS_vsnprintf");
throw pyerr();
}
return static_cast<long>(res+1);
}
void PrintV( const char* pszFormat, long lBytes, va_list args ) {
char *buf;
int res;
if( !pszFormat ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); }
buf = new (std::nothrow) char[lBytes+1];
if (buf == NULL) { PyErr_NoMemory(); throw pyerr(); }
res = PyOS_vsnprintf(buf, lBytes, pszFormat, args);
if (res < 0) {
PyErr_SetString(PyExc_Exception, "Something bad happend while calling PyOS_vsnprintf");
delete[] buf;
throw pyerr();
}
Write(buf, static_cast<size_t>(res));
delete[] buf;
}
void Print( const char* pszFormat, ... )
{
va_list args;
long lBytes;
va_start( args, pszFormat );
lBytes = PrintVLen(pszFormat, args);
va_end( args );
va_start( args, pszFormat );
PrintV(pszFormat, lBytes, args);
va_end( args );
}
size_t Read( char* pBuffer, size_t lLen ) {
PyObject *ret;
char *buf = NULL;
Py_ssize_t len = 0;
ret = PyObject_CallMethod(file, (char*)"read", (char*)"n", static_cast<Py_ssize_t>(lLen));
if (ret != NULL) {
if (PyBytes_AsStringAndSize(ret, &buf, &len) != -1) {
memcpy(pBuffer, buf, len);
Py_DECREF(ret);
return static_cast<size_t>(len);
}
Py_DECREF(ret);
}
if (PyErr_Occurred() == NULL)
PyErr_SetString(PyExc_Exception, "Failed to read data from python file object");
throw pyerr();
}
void Seek(size_t offset) {
PyObject *ret;
ret = PyObject_CallMethod(file, (char*)"seek", (char*)"n", static_cast<Py_ssize_t>(offset));
if (ret == NULL) {
if (PyErr_Occurred() == NULL)
PyErr_SetString(PyExc_Exception, "Failed to seek in python file object");
throw pyerr();
}
Py_DECREF(ret);
}
size_t Tell() const {
PyObject *ret;
unsigned long ans;
ret = PyObject_CallMethod(file, (char*)"tell", NULL);
if (ret == NULL) {
if (PyErr_Occurred() == NULL)
PyErr_SetString(PyExc_Exception, "Failed to call tell() on python file object");
throw pyerr();
}
if (!PyNumber_Check(ret)) {
Py_DECREF(ret);
PyErr_SetString(PyExc_Exception, "tell() method did not return a number");
throw pyerr();
}
ans = PyInt_AsUnsignedLongMask(ret);
Py_DECREF(ret);
if (PyErr_Occurred() != NULL) throw pyerr();
return static_cast<size_t>(ans);
}
void Write(const char* pBuffer, size_t lLen) {
PyObject *ret;
ret = PyObject_CallMethod(file, (char*)"write", (char*)"s#", pBuffer, (int)lLen);
if (ret == NULL) {
if (PyErr_Occurred() == NULL)
PyErr_SetString(PyExc_Exception, "Failed to call write() on python file object");
throw pyerr();
}
Py_DECREF(ret);
update_written();
}
void Flush() {
Py_XDECREF(PyObject_CallMethod(file, (char*)"flush", NULL));
}
};
PyObject* pdf::write_doc(PdfMemDocument *doc, PyObject *f) {
OutputDevice d(f);
try {
doc->Write(&d);
} catch(const PdfError & err) {
podofo_set_exception(err); return NULL;
} catch (...) {
if (PyErr_Occurred() == NULL)
PyErr_SetString(PyExc_Exception, "An unknown error occurred while trying to write the pdf to the file object");
return NULL;
}
Py_RETURN_NONE;
}