Avoid the extra malloc+copies entailed by BytesIO in the non websocket part of the server as well

Needed a replacement for the zlib module as  the python2 version does
not support memoryview objects.
This commit is contained in:
Kovid Goyal 2015-10-28 18:24:01 +05:30
parent e56703ebc2
commit 83151cc1f0
4 changed files with 450 additions and 13 deletions

View File

@ -15,7 +15,7 @@ from setup.build_environment import (
msvc, win_inc, win_lib, magick_inc_dirs, magick_lib_dirs, magick_libs, msvc, win_inc, win_lib, magick_inc_dirs, magick_lib_dirs, magick_libs,
chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, icu_lib_dirs, ft_libs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, icu_lib_dirs, ft_libs,
ft_lib_dirs, ft_inc_dirs, cpu_count, is64bit, glib_flags, fontconfig_flags, ft_lib_dirs, ft_inc_dirs, cpu_count, is64bit, glib_flags, fontconfig_flags,
openssl_inc_dirs, openssl_lib_dirs) openssl_inc_dirs, openssl_lib_dirs, zlib_inc_dirs, zlib_lib_dirs, zlib_libs)
from setup.parallel_build import create_job, parallel_build from setup.parallel_build import create_job, parallel_build
isunix = islinux or isosx or isbsd isunix = islinux or isosx or isbsd
@ -102,6 +102,12 @@ extensions = [
libraries=[] if iswindows else ['m'] libraries=[] if iswindows else ['m']
), ),
Extension('zlib2',
['calibre/utils/zlib2.c'],
inc_dirs=zlib_inc_dirs,
libraries=zlib_libs, lib_dirs=zlib_lib_dirs
),
Extension('certgen', Extension('certgen',
['calibre/utils/certgen.c'], ['calibre/utils/certgen.c'],
libraries=['libeay32'] if iswindows else ['crypto'], libraries=['libeay32'] if iswindows else ['crypto'],

View File

@ -136,6 +136,7 @@ class Plugins(collections.Mapping):
'icu', 'icu',
'speedup', 'speedup',
'monotonic', 'monotonic',
'zlib2',
'html', 'html',
'freetype', 'freetype',
'unrar', 'unrar',

View File

@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import os, httplib, hashlib, uuid, zlib, time, struct, repr as reprlib import os, httplib, hashlib, uuid, time, struct, repr as reprlib
from collections import namedtuple from collections import namedtuple
from io import BytesIO, DEFAULT_BUFFER_SIZE from io import BytesIO, DEFAULT_BUFFER_SIZE
from itertools import chain, repeat, izip_longest from itertools import chain, repeat, izip_longest
@ -15,7 +15,7 @@ from functools import wraps
from future_builtins import map from future_builtins import map
from calibre import guess_type, force_unicode from calibre import guess_type, force_unicode
from calibre.constants import __version__ from calibre.constants import __version__, plugins
from calibre.srv.loop import WRITE from calibre.srv.loop import WRITE
from calibre.srv.errors import HTTPSimpleResponse from calibre.srv.errors import HTTPSimpleResponse
from calibre.srv.http_request import HTTPRequest, read_headers from calibre.srv.http_request import HTTPRequest, read_headers
@ -28,10 +28,14 @@ from calibre.utils.monotonic import monotonic
Range = namedtuple('Range', 'start stop size') Range = namedtuple('Range', 'start stop size')
MULTIPART_SEPARATOR = uuid.uuid4().hex.decode('ascii') MULTIPART_SEPARATOR = uuid.uuid4().hex.decode('ascii')
COMPRESSIBLE_TYPES = {'application/json', 'application/javascript', 'application/xml', 'application/oebps-package+xml'} COMPRESSIBLE_TYPES = {'application/json', 'application/javascript', 'application/xml', 'application/oebps-package+xml'}
zlib, zlib2_err = plugins['zlib2']
if zlib2_err:
raise RuntimeError('Failed to laod the zlib2 module with error: ' + zlib2_err)
del zlib2_err
def header_list_to_file(buf): # {{{ def header_list_to_file(buf): # {{{
buf.append('') buf.append('')
return BytesIO(b''.join((x + '\r\n').encode('ascii') for x in buf)) return ReadOnlyFileBuffer(b''.join((x + '\r\n').encode('ascii') for x in buf))
# }}} # }}}
def parse_multipart_byterange(buf, content_type): # {{{ def parse_multipart_byterange(buf, content_type): # {{{
@ -166,7 +170,7 @@ def compress_readable_output(src_file, compress_level=6):
prefix_written = True prefix_written = True
data = gzip_prefix(time.time()) + data data = gzip_prefix(time.time()) + data
yield data yield data
yield zobj.flush() + struct.pack(b"<L", crc & 0xFFFFFFFF) + struct.pack(b"<L", size & 0xFFFFFFFF) yield zobj.flush() + struct.pack(b"<L", crc) + struct.pack(b"<L", size)
# }}} # }}}
def get_range_parts(ranges, content_type, content_length): # {{{ def get_range_parts(ranges, content_type, content_length): # {{{
@ -290,7 +294,7 @@ def dynamic_output(output, outheaders):
ct = outheaders.get('Content-Type') ct = outheaders.get('Content-Type')
if not ct: if not ct:
outheaders.set('Content-Type', 'text/plain; charset=UTF-8', replace_all=True) outheaders.set('Content-Type', 'text/plain; charset=UTF-8', replace_all=True)
ans = ReadableOutput(BytesIO(data)) ans = ReadableOutput(ReadOnlyFileBuffer(data))
ans.accept_ranges = False ans.accept_ranges = False
return ans return ans
@ -376,7 +380,7 @@ class HTTPConnection(HTTPRequest):
buf = [(x + '\r\n').encode('ascii') for x in buf] buf = [(x + '\r\n').encode('ascii') for x in buf]
if self.method != 'HEAD': if self.method != 'HEAD':
buf.append(msg) buf.append(msg)
self.response_ready(BytesIO(b''.join(buf))) self.response_ready(ReadOnlyFileBuffer(b''.join(buf)))
def prepare_response(self, inheaders, request_body_file): def prepare_response(self, inheaders, request_body_file):
if self.method == 'TRACE': if self.method == 'TRACE':
@ -463,7 +467,7 @@ class HTTPConnection(HTTPRequest):
x = x.decode('ascii') x = x.decode('ascii')
buf.append(x) buf.append(x)
buf.append('') buf.append('')
self.response_ready(BytesIO(b''.join((x + '\r\n').encode('ascii') for x in buf)), output=output) self.response_ready(ReadOnlyFileBuffer(b''.join((x + '\r\n').encode('ascii') for x in buf)), output=output)
def response_ready(self, header_file, output=None): def response_ready(self, header_file, output=None):
self.response_started = True self.response_started = True
@ -503,10 +507,10 @@ class HTTPConnection(HTTPRequest):
r, range_part = next(ranges) r, range_part = next(ranges)
if r is None: if r is None:
# EOF range part # EOF range part
self.set_state(WRITE, self.write_buf, BytesIO(b'\r\n' + range_part)) self.set_state(WRITE, self.write_buf, ReadOnlyFileBuffer(b'\r\n' + range_part))
else: else:
buf.seek(r.start) buf.seek(r.start)
self.set_state(WRITE, self.write_range_part, BytesIO((b'' if first else b'\r\n') + range_part + b'\r\n'), buf, r.stop + 1, ranges) self.set_state(WRITE, self.write_range_part, ReadOnlyFileBuffer((b'' if first else b'\r\n') + range_part + b'\r\n'), buf, r.stop + 1, ranges)
def write_range_part(self, part_buf, buf, end, ranges, event): def write_range_part(self, part_buf, buf, end, ranges, event):
if self.write(part_buf): if self.write(part_buf):
@ -519,13 +523,13 @@ class HTTPConnection(HTTPRequest):
def write_iter(self, output, event): def write_iter(self, output, event):
chunk = next(output) chunk = next(output)
if chunk is None: if chunk is None:
self.set_state(WRITE, self.write_chunk, BytesIO(b'0\r\n\r\n'), output, last=True) self.set_state(WRITE, self.write_chunk, ReadOnlyFileBuffer(b'0\r\n\r\n'), output, last=True)
else: else:
if chunk: if chunk:
if not isinstance(chunk, bytes): if not isinstance(chunk, bytes):
chunk = chunk.encode('utf-8') chunk = chunk.encode('utf-8')
chunk = ('%X\r\n' % len(chunk)).encode('ascii') + chunk + b'\r\n' chunk = ('%X\r\n' % len(chunk)).encode('ascii') + chunk + b'\r\n'
self.set_state(WRITE, self.write_chunk, BytesIO(chunk), output) self.set_state(WRITE, self.write_chunk, ReadOnlyFileBuffer(chunk), output)
else: else:
# Empty chunk, ignore it # Empty chunk, ignore it
self.write_iter(output, event) self.write_iter(output, event)
@ -562,7 +566,7 @@ class HTTPConnection(HTTPRequest):
elif hasattr(output, 'read'): elif hasattr(output, 'read'):
output = ReadableOutput(output) output = ReadableOutput(output)
elif isinstance(output, StaticOutput): elif isinstance(output, StaticOutput):
output = ReadableOutput(BytesIO(output.data), etag=output.etag, content_length=output.content_length) output = ReadableOutput(ReadOnlyFileBuffer(output.data), etag=output.etag, content_length=output.content_length)
else: else:
output = GeneratedOutput(output) output = GeneratedOutput(output)
ct = outheaders.get('Content-Type', '').partition(';')[0] ct = outheaders.get('Content-Type', '').partition(';')[0]

426
src/calibre/utils/zlib2.c Normal file
View File

@ -0,0 +1,426 @@
/*
* crc32.c
* Copyright (C) 2015 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#define UNICODE
#include <Python.h>
#include <zlib.h>
#define DEF_BUF_SIZE (16*1024)
/* The following parameters are copied from zutil.h, version 0.95 */
#define DEFLATED 8
#if MAX_MEM_LEVEL >= 8
# define DEF_MEM_LEVEL 8
#else
# define DEF_MEM_LEVEL MAX_MEM_LEVEL
#endif
static PyTypeObject Comptype;
static PyObject *ZlibError = NULL;
typedef struct
{
PyObject_HEAD
z_stream zst;
PyObject *unused_data;
PyObject *unconsumed_tail;
char eof;
int is_initialised;
PyObject *zdict;
} compobject;
static void
zlib_error(z_stream zst, int err, char *msg)
{
const char *zmsg = Z_NULL;
/* In case of a version mismatch, zst.msg won't be initialized.
Check for this case first, before looking at zst.msg. */
if (err == Z_VERSION_ERROR)
zmsg = "library version mismatch";
if (zmsg == Z_NULL)
zmsg = zst.msg;
if (zmsg == Z_NULL) {
switch (err) {
case Z_BUF_ERROR:
zmsg = "incomplete or truncated stream";
break;
case Z_STREAM_ERROR:
zmsg = "inconsistent stream state";
break;
case Z_DATA_ERROR:
zmsg = "invalid input data";
break;
}
}
if (zmsg == Z_NULL)
PyErr_Format(ZlibError, "Error %d %s", err, msg);
else
PyErr_Format(ZlibError, "Error %d %s: %.200s", err, msg, zmsg);
}
static compobject *
newcompobject(PyTypeObject *type)
{
compobject *self;
self = PyObject_New(compobject, type);
if (self == NULL)
return NULL;
self->eof = 0;
self->is_initialised = 0;
self->zdict = NULL;
self->unused_data = PyBytes_FromStringAndSize("", 0);
if (self->unused_data == NULL) {
Py_DECREF(self);
return NULL;
}
self->unconsumed_tail = PyBytes_FromStringAndSize("", 0);
if (self->unconsumed_tail == NULL) {
Py_DECREF(self);
return NULL;
}
return self;
}
static PyObject *
PyZlib_compressobj(PyObject *selfptr, PyObject *args)
{
compobject *self = NULL;
int level=Z_DEFAULT_COMPRESSION, method=DEFLATED;
int wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=Z_DEFAULT_STRATEGY, err;
if (!PyArg_ParseTuple(args, "|iiiii:compressobj", &level, &method, &wbits,
&memLevel, &strategy))
return NULL;
self = newcompobject(&Comptype);
if (self==NULL) return NULL;
self->zst.zalloc = (alloc_func)Z_NULL;
self->zst.zfree = (free_func)Z_NULL;
self->zst.next_in = Z_NULL;
self->zst.avail_in = 0;
err = deflateInit2(&self->zst, level, method, wbits, memLevel, strategy);
switch(err) {
case (Z_OK):
self->is_initialised = 1;
return (PyObject*)self;
case (Z_MEM_ERROR):
Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory for compression object");
return NULL;
case(Z_STREAM_ERROR):
Py_DECREF(self);
PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
return NULL;
default:
zlib_error(self->zst, err, "while creating compression object");
Py_DECREF(self);
return NULL;
}
return (PyObject*) self;
}
static void
Dealloc(compobject *self)
{
Py_XDECREF(self->unused_data);
Py_XDECREF(self->unconsumed_tail);
Py_XDECREF(self->zdict);
PyObject_Del(self);
}
static void
Comp_dealloc(compobject *self)
{
if (self->is_initialised)
deflateEnd(&self->zst);
Dealloc(self);
}
static PyObject *
Compress_compress(compobject *self, PyObject *data_obj)
/*[clinic end generated code: output=5d5cd791cbc6a7f4 input=0d95908d6e64fab8]*/
{
int err = 0, len = 0;
unsigned int inplen = 0;
unsigned int length = DEF_BUF_SIZE, new_length = 0;
PyObject *RetVal = NULL;
Py_buffer indata = {0};
Byte *input = NULL;
unsigned long start_total_out = 0;
if (PyObject_GetBuffer(data_obj, &indata, PyBUF_SIMPLE) != 0) return NULL;
input = indata.buf; len = indata.len;
if ((size_t)len > UINT_MAX) {
PyErr_SetString(PyExc_OverflowError, "Size does not fit in an unsigned int");
goto done;
}
inplen = (unsigned int)len;
if (!(RetVal = PyBytes_FromStringAndSize(NULL, length))) goto done;
start_total_out = self->zst.total_out;
self->zst.avail_in = inplen;
self->zst.next_in = input;
self->zst.avail_out = length;
self->zst.next_out = (unsigned char *)PyBytes_AS_STRING(RetVal);
Py_BEGIN_ALLOW_THREADS
err = deflate(&(self->zst), Z_NO_FLUSH);
Py_END_ALLOW_THREADS
/* while Z_OK and the output buffer is full, there might be more output,
so extend the output buffer and try again */
while (err == Z_OK && self->zst.avail_out == 0) {
if (length <= (UINT_MAX >> 1))
new_length = length << 1;
else
new_length = UINT_MAX;
if (_PyBytes_Resize(&RetVal, new_length) < 0) {
Py_CLEAR(RetVal);
goto done;
}
self->zst.next_out =
(unsigned char *)PyBytes_AS_STRING(RetVal) + length;
self->zst.avail_out = length;
length = new_length;
Py_BEGIN_ALLOW_THREADS
err = deflate(&(self->zst), Z_NO_FLUSH);
Py_END_ALLOW_THREADS
}
/* We will only get Z_BUF_ERROR if the output buffer was full but
there wasn't more output when we tried again, so it is not an error
condition.
*/
if (err != Z_OK && err != Z_BUF_ERROR) {
zlib_error(self->zst, err, "while compressing data");
Py_CLEAR(RetVal);
goto done;
}
if (_PyBytes_Resize(&RetVal, self->zst.total_out - start_total_out) < 0) {
Py_CLEAR(RetVal);
}
done:
if (indata.obj) PyBuffer_Release(&indata);
return RetVal;
}
static PyObject *
Compress_flush(compobject *self, PyObject *args)
{
int err = 0, mode=Z_FINISH;
unsigned int length = DEF_BUF_SIZE, new_length = 0;
PyObject *RetVal = NULL;
unsigned long start_total_out = 0;
if (!PyArg_ParseTuple(args, "|i:flush", &mode)) return NULL;
/* Flushing with Z_NO_FLUSH is a no-op, so there's no point in
doing any work at all; just return an empty string. */
if (mode == Z_NO_FLUSH) {
return PyBytes_FromStringAndSize(NULL, 0);
}
if (!(RetVal = PyBytes_FromStringAndSize(NULL, length)))
return NULL;
start_total_out = self->zst.total_out;
self->zst.avail_in = 0;
self->zst.avail_out = length;
self->zst.next_out = (unsigned char *)PyBytes_AS_STRING(RetVal);
Py_BEGIN_ALLOW_THREADS
err = deflate(&(self->zst), mode);
Py_END_ALLOW_THREADS
/* while Z_OK and the output buffer is full, there might be more output,
so extend the output buffer and try again */
while (err == Z_OK && self->zst.avail_out == 0) {
if (length <= (UINT_MAX >> 1))
new_length = length << 1;
else
new_length = UINT_MAX;
if (_PyBytes_Resize(&RetVal, new_length) < 0) {
Py_CLEAR(RetVal);
goto error;
}
self->zst.next_out =
(unsigned char *)PyBytes_AS_STRING(RetVal) + length;
self->zst.avail_out = length;
length = new_length;
Py_BEGIN_ALLOW_THREADS
err = deflate(&(self->zst), mode);
Py_END_ALLOW_THREADS
}
/* If mode is Z_FINISH, we also have to call deflateEnd() to free
various data structures. Note we should only get Z_STREAM_END when
mode is Z_FINISH, but checking both for safety*/
if (err == Z_STREAM_END && mode == Z_FINISH) {
err = deflateEnd(&(self->zst));
if (err != Z_OK) {
zlib_error(self->zst, err, "while finishing compression");
Py_DECREF(RetVal);
RetVal = NULL;
goto error;
}
else
self->is_initialised = 0;
/* We will only get Z_BUF_ERROR if the output buffer was full
but there wasn't more output when we tried again, so it is
not an error condition.
*/
} else if (err!=Z_OK && err!=Z_BUF_ERROR) {
zlib_error(self->zst, err, "while flushing");
Py_DECREF(RetVal);
RetVal = NULL;
goto error;
}
if (_PyBytes_Resize(&RetVal, self->zst.total_out - start_total_out) < 0) {
Py_CLEAR(RetVal);
}
error:
return RetVal;
}
static PyMethodDef comp_methods[] =
{
{"compress", (PyCFunction)Compress_compress, METH_O, "compress(data) -- returns compressed data, dont forget to call flush when done."},
{"flush", (PyCFunction)Compress_flush, METH_VARARGS, "flush([mode]) -- returns any remaining data"},
{NULL}
};
static PyTypeObject Comptype = {
PyVarObject_HEAD_INIT(0, 0)
"zlib2.Compress",
sizeof(compobject),
0,
(destructor)Comp_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_reserved*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
0, /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
comp_methods, /*tp_methods*/
};
static PyObject *
zlib_crc32(PyObject *self, PyObject *args)
{
int signed_val = 0, len = 0;
unsigned int value = 0;
unsigned char *buf = NULL;
Py_buffer indata = {0};
PyObject* obj = NULL;
if(!PyArg_ParseTuple(args, "O|I:crc32", &obj, &value)) return NULL;
if (PyObject_GetBuffer(obj, &indata, PyBUF_SIMPLE) != 0) return NULL;
buf = indata.buf; len = indata.len;
/* Releasing the GIL for very small buffers is inefficient
and may lower performance */
if (len > 1024*5) {
Py_BEGIN_ALLOW_THREADS
/* Avoid truncation of length for very large buffers. crc32() takes
length as an unsigned int, which may be narrower than Py_ssize_t. */
while ((size_t)len > UINT_MAX) {
value = crc32(value, buf, UINT_MAX);
buf += (size_t) UINT_MAX;
len -= (size_t) UINT_MAX;
}
signed_val = crc32(value, buf, (unsigned int)len);
Py_END_ALLOW_THREADS
} else {
signed_val = crc32(value, buf, len);
}
if (indata.obj) PyBuffer_Release(&indata);
return PyLong_FromUnsignedLong(signed_val & 0xffffffffU);
}
static PyMethodDef methods[] = {
{"crc32", zlib_crc32, METH_VARARGS,
"crc32(data, [, state=0)\n\nCalculate crc32 for the given data starting from the given state."
},
{"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS, "Create compression object"},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC
initzlib2(void) {
PyObject *m, *ver;
Comptype.tp_new = PyType_GenericNew;
if (PyType_Ready(&Comptype) < 0)
return;
m = Py_InitModule3("zlib2", methods,
"Implementation of zlib compression with support for the buffer protocol, which is missing in Python2. Code taken from the Python3 zlib module"
);
if (m == NULL) return;
PyModule_AddIntMacro(m, MAX_WBITS);
PyModule_AddIntMacro(m, DEFLATED);
PyModule_AddIntMacro(m, DEF_MEM_LEVEL);
PyModule_AddIntMacro(m, DEF_BUF_SIZE);
PyModule_AddIntMacro(m, Z_BEST_SPEED);
PyModule_AddIntMacro(m, Z_BEST_COMPRESSION);
PyModule_AddIntMacro(m, Z_DEFAULT_COMPRESSION);
PyModule_AddIntMacro(m, Z_FILTERED);
PyModule_AddIntMacro(m, Z_HUFFMAN_ONLY);
PyModule_AddIntMacro(m, Z_DEFAULT_STRATEGY);
PyModule_AddIntMacro(m, Z_FINISH);
PyModule_AddIntMacro(m, Z_NO_FLUSH);
PyModule_AddIntMacro(m, Z_SYNC_FLUSH);
PyModule_AddIntMacro(m, Z_FULL_FLUSH);
ver = PyUnicode_FromString(ZLIB_VERSION);
if (ver != NULL)
PyModule_AddObject(m, "ZLIB_VERSION", ver);
ver = PyUnicode_FromString(zlibVersion());
if (ver != NULL)
PyModule_AddObject(m, "ZLIB_RUNTIME_VERSION", ver);
ZlibError = PyErr_NewException("zlib2.error", NULL, NULL);
if (ZlibError != NULL) {
Py_INCREF(ZlibError);
PyModule_AddObject(m, "error", ZlibError);
}
}