mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Python/C LZX Compressor good to go. Fixed a minor bug in LIT HTML ugly-printing.
This commit is contained in:
parent
ac9baea183
commit
052657e6af
1
setup.py
1
setup.py
@ -374,6 +374,7 @@ if __name__ == '__main__':
|
||||
ext_modules = [
|
||||
Extension('calibre.plugins.lzx',
|
||||
sources=['src/calibre/utils/lzx/lzxmodule.c',
|
||||
'src/calibre/utils/lzx/compressor.c',
|
||||
'src/calibre/utils/lzx/lzxd.c',
|
||||
'src/calibre/utils/lzx/lzc.c',
|
||||
'src/calibre/utils/lzx/lzxc.c'],
|
||||
|
27
src/calibre/ebooks/lit/lzx.py
Normal file
27
src/calibre/ebooks/lit/lzx.py
Normal file
@ -0,0 +1,27 @@
|
||||
'''
|
||||
LZX compression/decompression wrapper.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
from calibre import plugins
|
||||
_lzx, LZXError = plugins['lzx']
|
||||
|
||||
__all__ = ['Compressor', 'Decompressor', 'LZXError']
|
||||
|
||||
Compressor = _lzx.Compressor
|
||||
|
||||
class Decompressor(object):
|
||||
def __init__(self, wbits):
|
||||
self.wbits = wbits
|
||||
self.blocksize = 1 << wbits
|
||||
_lzx.init(wbits)
|
||||
|
||||
def decompress(self, data, outlen):
|
||||
return _lzx.decompress(data, outlen)
|
||||
|
||||
def reset(self):
|
||||
return _lzx.reset()
|
@ -1,173 +0,0 @@
|
||||
'''
|
||||
Higher-level LZX compression/decompression routines.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
from ctypes import *
|
||||
from calibre import plugins
|
||||
_lzx, LzxError = plugins['lzx']
|
||||
|
||||
__all__ = ['Compressor']
|
||||
|
||||
class lzx_data(Structure):
|
||||
pass
|
||||
|
||||
lzx_get_bytes_t = CFUNCTYPE(c_int, c_voidp, c_int, c_voidp)
|
||||
lzx_put_bytes_t = CFUNCTYPE(c_int, c_voidp, c_int, c_voidp)
|
||||
lzx_mark_frame_t = CFUNCTYPE(None, c_voidp, c_uint32, c_uint32)
|
||||
lzx_at_eof_t = CFUNCTYPE(c_int, c_voidp)
|
||||
|
||||
class lzx_results(Structure):
|
||||
_fields_ = [('len_compressed_output', c_long),
|
||||
('len_uncompressed_input', c_long)]
|
||||
|
||||
# int lzx_init(struct lzx_data **lzxdp, int wsize_code,
|
||||
# lzx_get_bytes_t get_bytes, void *get_bytes_arg,
|
||||
# lzx_at_eof_t at_eof,
|
||||
# lzx_put_bytes_t put_bytes, void *put_bytes_arg,
|
||||
# lzx_mark_frame_t mark_frame, void *mark_frame_arg);
|
||||
lzx_init_t = CFUNCTYPE(
|
||||
c_int, POINTER(POINTER(lzx_data)), c_int, lzx_get_bytes_t, c_voidp,
|
||||
lzx_at_eof_t, lzx_put_bytes_t, c_voidp, lzx_mark_frame_t, c_voidp)
|
||||
lzx_init = lzx_init_t(_lzx._lzxc_init)
|
||||
|
||||
# void lzx_reset(lzx_data *lzxd);
|
||||
lzx_reset_t = CFUNCTYPE(None, POINTER(lzx_data))
|
||||
lzx_reset = lzx_reset_t(_lzx._lzxc_reset)
|
||||
|
||||
# int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide);
|
||||
lzx_compress_block_t = CFUNCTYPE(c_int, POINTER(lzx_data), c_int, c_int)
|
||||
lzx_compress_block = lzx_compress_block_t(_lzx._lzxc_compress_block)
|
||||
|
||||
# int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr);
|
||||
lzx_finish_t = CFUNCTYPE(c_int, POINTER(lzx_data), POINTER(lzx_results))
|
||||
lzx_finish = lzx_finish_t(_lzx._lzxc_finish)
|
||||
|
||||
|
||||
class Compressor(object):
|
||||
def __init__(self, wbits, reset=True):
|
||||
self._reset = reset
|
||||
self._blocksize = 1 << wbits
|
||||
self._buffered = 0
|
||||
self._input = StringIO()
|
||||
self._output = StringIO()
|
||||
self._flushing = False
|
||||
self._rtable = []
|
||||
self._get_bytes = lzx_get_bytes_t(self._get_bytes)
|
||||
self._at_eof = lzx_at_eof_t(self._at_eof)
|
||||
self._put_bytes = lzx_put_bytes_t(self._put_bytes)
|
||||
self._mark_frame = lzx_mark_frame_t(self._mark_frame)
|
||||
self._lzx = POINTER(lzx_data)()
|
||||
self._results = lzx_results()
|
||||
rv = lzx_init(self._lzx, wbits, self._get_bytes, c_voidp(),
|
||||
self._at_eof, self._put_bytes, c_voidp(),
|
||||
self._mark_frame, c_voidp())
|
||||
if rv != 0:
|
||||
raise LzxError("lzx_init() failed with %d" % rv)
|
||||
|
||||
def _add_input(self, data):
|
||||
self._input.seek(0, 2)
|
||||
self._input.write(data)
|
||||
self._input.seek(0)
|
||||
self._buffered += len(data)
|
||||
|
||||
def _reset_input(self):
|
||||
data = self._input.read()
|
||||
self._input.seek(0)
|
||||
self._input.truncate()
|
||||
self._input.write(data)
|
||||
self._input.seek(0)
|
||||
|
||||
def _reset_output(self):
|
||||
data = self._output.getvalue()
|
||||
self._output.seek(0)
|
||||
self._output.truncate()
|
||||
return data
|
||||
|
||||
def _reset_rtable(self):
|
||||
rtable = list(self._rtable)
|
||||
del self._rtable[:]
|
||||
return rtable
|
||||
|
||||
def _get_bytes(self, arg, n, buf):
|
||||
data = self._input.read(n)
|
||||
memmove(buf, data, len(data))
|
||||
self._buffered -= len(data)
|
||||
return len(data)
|
||||
|
||||
def _put_bytes(self, arg, n, buf):
|
||||
self._output.write(string_at(buf, n))
|
||||
return n
|
||||
|
||||
def _at_eof(self, arg):
|
||||
if self._flushing and self._buffered == 0:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
def _mark_frame(self, arg, uncomp, comp):
|
||||
self._rtable.append((uncomp, comp))
|
||||
return
|
||||
|
||||
def _compress_block(self):
|
||||
rv = lzx_compress_block(self._lzx, self._blocksize, 1)
|
||||
if rv != 0:
|
||||
raise LzxError("lzx_compress_block() failed with %d" % rv)
|
||||
if self._reset:
|
||||
lzx_reset(self._lzx)
|
||||
|
||||
def compress(self, data, flush=False):
|
||||
self._add_input(data)
|
||||
self._flushing = flush
|
||||
while self._buffered >= self._blocksize:
|
||||
self._compress_block()
|
||||
if self._buffered > 0 and flush:
|
||||
self._compress_block()
|
||||
self._reset_input()
|
||||
data = self._reset_output()
|
||||
rtable = self._reset_rtable()
|
||||
return (data, rtable)
|
||||
|
||||
def flush(self):
|
||||
self._flushing = True
|
||||
if self._buffered > 0:
|
||||
self._compress_block()
|
||||
self._reset_input()
|
||||
data = self._reset_output()
|
||||
rtable = self._reset_rtable()
|
||||
return (data, rtable)
|
||||
|
||||
def close(self):
|
||||
if self._lzx:
|
||||
lzx_finish(self._lzx, self._results)
|
||||
self._lzx = None
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc_info):
|
||||
self.close()
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
|
||||
def main(argv=sys.argv):
|
||||
wbits, inf, outf = argv[1:]
|
||||
with open(inf, 'rb') as f:
|
||||
data = f.read()
|
||||
with Compressor(int(wbits)) as lzx:
|
||||
data, rtable = lzx.compress(data, flush=True)
|
||||
print rtable
|
||||
with open(outf, 'wb') as f:
|
||||
f.write(data)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -783,7 +783,7 @@ class LitReader(object):
|
||||
try:
|
||||
result.append(
|
||||
lzx.decompress(content[base:size], window_bytes))
|
||||
except lzx.LzxError:
|
||||
except lzx.LZXError:
|
||||
self._warn("LZX decompression error; skipping chunk")
|
||||
bytes_remaining -= window_bytes
|
||||
base = size
|
||||
@ -793,7 +793,7 @@ class LitReader(object):
|
||||
lzx.reset()
|
||||
try:
|
||||
result.append(lzx.decompress(content[base:], bytes_remaining))
|
||||
except lzx.LzxError:
|
||||
except lzx.LZXError:
|
||||
self._warn("LZX decompression error; skipping chunk")
|
||||
bytes_remaining = 0
|
||||
if bytes_remaining > 0:
|
||||
|
@ -8,7 +8,6 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
from __future__ import with_statement
|
||||
import sys
|
||||
import os
|
||||
import locale
|
||||
|
@ -6,7 +6,6 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
from __future__ import with_statement
|
||||
import sys
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
@ -23,12 +22,12 @@ from urllib import unquote as urlunquote
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lit.reader import msguid, DirectoryEntry
|
||||
import calibre.ebooks.lit.maps as maps
|
||||
from calibre.ebooks.lit.oeb import OEB_STYLES, OEB_CSS_MIME, CSS_MIME, \
|
||||
XHTML_MIME, OPF_MIME, XML_NS, XML
|
||||
from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
|
||||
CSS_MIME, XHTML_MIME, OPF_MIME, XML_NS, XML
|
||||
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
|
||||
from calibre.ebooks.lit.oeb import OEBBook
|
||||
from calibre.ebooks.lit.stylizer import Stylizer
|
||||
from calibre.ebooks.lit.lzxcomp import Compressor
|
||||
from calibre.ebooks.lit.lzx import Compressor
|
||||
import calibre
|
||||
from calibre import plugins
|
||||
msdes, msdeserror = plugins['msdes']
|
||||
@ -104,7 +103,7 @@ LZXC_CONTROL = \
|
||||
"\x04\x00\x00\x00\x02\x00\x00\x00" \
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
|
||||
COLLAPSE = re.compile(r'[ \r\n\v]+')
|
||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
def prefixname(name, nsrmap):
|
||||
prefix = nsrmap[namespace(name)]
|
||||
@ -228,8 +227,9 @@ class ReBinary(object):
|
||||
if elem.text:
|
||||
if preserve:
|
||||
self.write(elem.text)
|
||||
elif len(elem) > 0 or not elem.text.isspace():
|
||||
elif len(elem) == 0 or not elem.text.isspace():
|
||||
self.write(COLLAPSE.sub(' ', elem.text))
|
||||
# else: de nada
|
||||
parents.append(tag_offset)
|
||||
child = cstyle = nstyle = None
|
||||
for next in chain(elem, [None]):
|
||||
@ -423,6 +423,7 @@ class LitWriter(object):
|
||||
self._add_folder('/data')
|
||||
for item in self._oeb.manifest.values():
|
||||
if item.media_type not in LIT_MIMES:
|
||||
print "WARNING: excluding item %r" % item.href
|
||||
continue
|
||||
name = '/data/' + item.id
|
||||
data = item.data
|
||||
@ -563,8 +564,8 @@ class LitWriter(object):
|
||||
cdata = LZXC_CONTROL + cdata
|
||||
if not data: continue
|
||||
unlen = len(data)
|
||||
with Compressor(17) as lzx:
|
||||
data, rtable = lzx.compress(data, flush=True)
|
||||
lzx = Compressor(17)
|
||||
data, rtable = lzx.compress(data, flush=True)
|
||||
rdata = StringIO()
|
||||
rdata.write(pack('<IIIIQQQQ',
|
||||
3, len(rtable), 8, 0x28, unlen, len(data), 0x8000, 0))
|
||||
|
375
src/calibre/utils/lzx/compressor.c
Normal file
375
src/calibre/utils/lzx/compressor.c
Normal file
@ -0,0 +1,375 @@
|
||||
/* __license__ = 'GPL v3'
|
||||
* __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
*
|
||||
* Python/C implementation of an LZX compressor type.
|
||||
*/
|
||||
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
#include <lzxc.h>
|
||||
#include <lzxmodule.h>
|
||||
|
||||
#define BUFFER_INIT(buffer) \
|
||||
do { \
|
||||
(buffer).data = NULL; \
|
||||
(buffer).size = 0; \
|
||||
(buffer).offset = 0; \
|
||||
} while (0)
|
||||
|
||||
#define COMPRESSOR_REMAINING(compressor) \
|
||||
(((compressor)->residue.size - (compressor)->residue.offset) \
|
||||
+ ((compressor)->input.size - (compressor)->input.offset))
|
||||
|
||||
typedef struct buffer_t {
|
||||
void *data;
|
||||
unsigned int size;
|
||||
unsigned int offset;
|
||||
} buffer_t;
|
||||
|
||||
typedef struct Compressor {
|
||||
PyObject_HEAD
|
||||
int reset;
|
||||
int wbits;
|
||||
int blocksize;
|
||||
int flushing;
|
||||
struct lzxc_data *stream;
|
||||
buffer_t residue;
|
||||
buffer_t input;
|
||||
buffer_t output;
|
||||
PyObject *rtable;
|
||||
} Compressor;
|
||||
|
||||
static PyMemberDef Compressor_members[] = {
|
||||
{ "reset", T_INT, offsetof(Compressor, reset), READONLY,
|
||||
"whether or not the Compressor resets each block" },
|
||||
{ "wbits", T_INT, offsetof(Compressor, wbits), READONLY,
|
||||
"window size in bits" },
|
||||
{ "blocksize", T_INT, offsetof(Compressor, blocksize), READONLY,
|
||||
"block size in bytes" },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static int
|
||||
Compressor_traverse(Compressor *self, visitproc visit, void *arg)
|
||||
{
|
||||
Py_VISIT(self->rtable);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
Compressor_clear(Compressor *self)
|
||||
{
|
||||
Py_CLEAR(self->rtable);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
Compressor_dealloc(Compressor *self)
|
||||
{
|
||||
Compressor_clear(self);
|
||||
|
||||
if (self->stream) {
|
||||
lzxc_finish(self->stream, NULL);
|
||||
self->stream = NULL;
|
||||
}
|
||||
if (self->residue.data) {
|
||||
PyMem_Free(self->residue.data);
|
||||
self->residue.data = NULL;
|
||||
}
|
||||
if (self->output.data) {
|
||||
PyMem_Free(self->output.data);
|
||||
self->output.data = NULL;
|
||||
}
|
||||
|
||||
self->ob_type->tp_free((PyObject *)self);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
Compressor *self = NULL;
|
||||
|
||||
self = (Compressor *)type->tp_alloc(type, 0);
|
||||
if (self != NULL) {
|
||||
self->rtable = PyList_New(0);
|
||||
if (self->rtable == NULL) {
|
||||
Py_DECREF(self);
|
||||
return NULL;
|
||||
}
|
||||
self->wbits = 0;
|
||||
self->blocksize = 0;
|
||||
self->flushing = 0;
|
||||
|
||||
BUFFER_INIT(self->residue);
|
||||
BUFFER_INIT(self->input);
|
||||
BUFFER_INIT(self->output);
|
||||
}
|
||||
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
static int
|
||||
get_bytes(void *context, int nbytes, void *buf)
|
||||
{
|
||||
Compressor *self = (Compressor *)context;
|
||||
unsigned char *data = (unsigned char *)buf;
|
||||
buffer_t *residue = &self->residue;
|
||||
buffer_t *input = &self->input;
|
||||
int resrem = residue->size - residue->offset;
|
||||
int inrem = input->size - input->offset;
|
||||
|
||||
if (resrem > 0) {
|
||||
if (resrem <= nbytes) {
|
||||
memcpy(data, residue->data + residue->offset, nbytes);
|
||||
residue->offset += nbytes;
|
||||
return nbytes;
|
||||
} else {
|
||||
memcpy(data, residue->data + residue->offset, resrem);
|
||||
residue->offset += resrem;
|
||||
data += resrem;
|
||||
nbytes -= resrem;
|
||||
}
|
||||
}
|
||||
|
||||
if (inrem == 0) {
|
||||
return resrem;
|
||||
} else if (nbytes > inrem) {
|
||||
nbytes = inrem;
|
||||
}
|
||||
memcpy(data, input->data + input->offset, nbytes);
|
||||
input->offset += nbytes;
|
||||
|
||||
return nbytes + resrem;
|
||||
}
|
||||
|
||||
static int
|
||||
at_eof(void *context)
|
||||
{
|
||||
Compressor *self = (Compressor *)context;
|
||||
return (self->flushing && (COMPRESSOR_REMAINING(self) == 0));
|
||||
}
|
||||
|
||||
static int
|
||||
put_bytes(void *context, int nbytes, void *data)
|
||||
{
|
||||
Compressor *self = (Compressor *)context;
|
||||
buffer_t *output = &self->output;
|
||||
int remaining = output->size - output->offset;
|
||||
|
||||
if (nbytes > remaining) {
|
||||
PyErr_SetString(LZXError,
|
||||
"Attempt to write compressed data beyond end of buffer");
|
||||
nbytes = remaining;
|
||||
}
|
||||
|
||||
memcpy(output->data + output->offset, data, nbytes);
|
||||
output->offset += nbytes;
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static void
|
||||
mark_frame(void *context, uint32_t uncomp, uint32_t comp)
|
||||
{
|
||||
Compressor *self = (Compressor *)context;
|
||||
PyObject *rtable = self->rtable;
|
||||
PyObject *entry = NULL;
|
||||
|
||||
entry = Py_BuildValue("(LL)", uncomp, comp);
|
||||
if (entry) {
|
||||
PyList_Append(rtable, entry);
|
||||
Py_DECREF(entry);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
Compressor_init(Compressor *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
static char *kwlist[] = {"wbits", "reset", NULL};
|
||||
int wbits = 0;
|
||||
int retval = 0;
|
||||
|
||||
self->reset = 1;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(
|
||||
args, kwds, "I|b", kwlist, &wbits, &self->reset)) {
|
||||
return -1;
|
||||
}
|
||||
/* TODO: check window size. */
|
||||
|
||||
self->wbits = wbits;
|
||||
self->blocksize = 1 << wbits;
|
||||
|
||||
self->residue.data = PyMem_Realloc(self->residue.data, self->blocksize);
|
||||
if (self->residue.data == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (self->stream != NULL) {
|
||||
lzxc_finish(self->stream, NULL);
|
||||
}
|
||||
retval = lzxc_init(&self->stream, wbits, get_bytes, self, at_eof,
|
||||
put_bytes, self, mark_frame, self);
|
||||
if (retval != 0) {
|
||||
self->stream = NULL;
|
||||
PyErr_SetString(LZXError, "Failed to create compression stream");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
Compressor_compress__(
|
||||
Compressor *self, unsigned char *data, unsigned int inlen, int flush)
|
||||
{
|
||||
buffer_t *residue = &self->residue;
|
||||
buffer_t *input = &self->input;
|
||||
buffer_t *output = &self->output;
|
||||
unsigned int outlen, remainder;
|
||||
int reset = self->reset;
|
||||
unsigned int blocksize = self->blocksize;
|
||||
int retval = 0;
|
||||
PyObject *cdata = NULL;
|
||||
PyObject *rtable = NULL;
|
||||
PyObject *result = NULL;
|
||||
|
||||
self->flushing = flush;
|
||||
input->data = data;
|
||||
input->size = inlen;
|
||||
input->offset = 0;
|
||||
|
||||
outlen = inlen;
|
||||
remainder = outlen % blocksize;
|
||||
if (remainder != 0) {
|
||||
outlen += (blocksize - remainder) + 1;
|
||||
}
|
||||
if (output->size < outlen) {
|
||||
output->data = PyMem_Realloc(output->data, outlen);
|
||||
if (output->data == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
output->size = outlen;
|
||||
}
|
||||
output->offset = 0;
|
||||
|
||||
while (COMPRESSOR_REMAINING(self) >= blocksize) {
|
||||
retval = lzxc_compress_block(self->stream, blocksize, 1);
|
||||
if (retval != 0) {
|
||||
PyErr_SetString(LZXError, "Error during compression");
|
||||
return NULL;
|
||||
}
|
||||
if (reset) {
|
||||
lzxc_reset(self->stream);
|
||||
}
|
||||
}
|
||||
if (flush && COMPRESSOR_REMAINING(self) > 0) {
|
||||
retval = lzxc_compress_block(self->stream, blocksize, 1);
|
||||
if (retval != 0) {
|
||||
PyErr_SetString(LZXError, "Error during compression");
|
||||
return NULL;
|
||||
}
|
||||
if (reset) {
|
||||
lzxc_reset(self->stream);
|
||||
}
|
||||
residue->size = 0;
|
||||
residue->offset = 0;
|
||||
} else {
|
||||
int reslen = input->size - input->offset;
|
||||
memcpy(residue->data, input->data + input->offset, reslen);
|
||||
residue->size = reslen;
|
||||
residue->offset = 0;
|
||||
}
|
||||
|
||||
rtable = self->rtable;
|
||||
self->rtable = PyList_New(0);
|
||||
if (self->rtable == NULL) {
|
||||
self->rtable = rtable;
|
||||
return NULL;
|
||||
}
|
||||
cdata = PyString_FromStringAndSize(output->data, output->offset);
|
||||
if (cdata == NULL) {
|
||||
Py_DECREF(rtable);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result = Py_BuildValue("(OO)", cdata, rtable);
|
||||
Py_DECREF(rtable);
|
||||
Py_DECREF(cdata);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
Compressor_compress(Compressor *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
static char *kwlist[] = {"data", "flush", NULL};
|
||||
unsigned char *data = NULL;
|
||||
unsigned int inlen = 0;
|
||||
int flush = 0;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(
|
||||
args, kwds, "s#|b", kwlist, &data, &inlen, &flush)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return Compressor_compress__(self, data, inlen, flush);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
Compressor_flush(Compressor *self)
|
||||
{
|
||||
return Compressor_compress__(self, NULL, 0, 1);
|
||||
}
|
||||
|
||||
static PyMethodDef Compressor_methods[] = {
|
||||
{ "compress", (PyCFunction)Compressor_compress,
|
||||
METH_VARARGS | METH_KEYWORDS,
|
||||
"Return a string containing data LZX compressed." },
|
||||
{ "flush", (PyCFunction)Compressor_flush, METH_NOARGS,
|
||||
"Return a string containing any remaining LZX compressed data." },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
PyTypeObject CompressorType = {
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /*ob_size*/
|
||||
"lzx.Compressor", /*tp_name*/
|
||||
sizeof(Compressor), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)Compressor_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
0, /*tp_compare*/
|
||||
0, /*tp_repr*/
|
||||
0, /*tp_as_number*/
|
||||
0, /*tp_as_sequence*/
|
||||
0, /*tp_as_mapping*/
|
||||
0, /*tp_hash */
|
||||
0, /*tp_call*/
|
||||
0, /*tp_str*/
|
||||
0, /*tp_getattro*/
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
|
||||
"Compressor objects", /* tp_doc */
|
||||
(traverseproc)Compressor_traverse, /* tp_traverse */
|
||||
(inquiry)Compressor_clear, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
Compressor_methods, /* tp_methods */
|
||||
Compressor_members, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
(initproc)Compressor_init, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
Compressor_new, /* tp_new */
|
||||
};
|
@ -16,6 +16,11 @@
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
/* Force using (actually working) non-sliding version. */
|
||||
#define NONSLIDE 1
|
||||
#define LZ_ONEBUFFER 1
|
||||
#define LAZY 1
|
||||
|
||||
/*
|
||||
* Document here
|
||||
*/
|
||||
@ -28,7 +33,7 @@
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
#include <lzc.h>
|
||||
#include "lzc.h"
|
||||
|
||||
#define MAX_MATCH 253
|
||||
#define MIN_MATCH 2
|
||||
|
@ -15,6 +15,12 @@
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
/* Force using (actually working) non-sliding version. */
|
||||
#define NONSLIDE 1
|
||||
#define LZ_ONEBUFFER 1
|
||||
#define LAZY 1
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
@ -22,11 +28,17 @@
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <lzc.h>
|
||||
#include <lzxc.h>
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
#define LZX_BIG_ENDIAN
|
||||
#endif
|
||||
|
||||
/* Force using (actually working) non-sliding version. */
|
||||
#define NONSLIDE
|
||||
#ifdef NONSLIDE
|
||||
#include "lzc.h"
|
||||
#else
|
||||
#include "hash_slide.h"
|
||||
#include "lz_slide.h"
|
||||
#endif
|
||||
#include "lzxc.h"
|
||||
|
||||
/* these named constants are from the Microsoft LZX documentation */
|
||||
#define MIN_MATCH 2
|
||||
@ -35,6 +47,16 @@
|
||||
#define NUM_PRIMARY_LENGTHS 7
|
||||
#define NUM_SECONDARY_LENGTHS 249
|
||||
|
||||
/* the names of these constants are specific to this library */
|
||||
#define LZX_MAX_CODE_LENGTH 16
|
||||
#define LZX_FRAME_SIZE 32768
|
||||
#define LZX_PRETREE_SIZE 20
|
||||
#define LZX_ALIGNED_BITS 3
|
||||
#define LZX_ALIGNED_SIZE 8
|
||||
|
||||
#define LZX_VERBATIM_BLOCK 1
|
||||
#define LZX_ALIGNED_OFFSET_BLOCK 2
|
||||
|
||||
/* Debugging defines useful during development. All add diagnostic output
|
||||
at various points in the system */
|
||||
|
||||
@ -393,15 +415,15 @@ static void lzx_init_static(void)
|
||||
}
|
||||
}
|
||||
|
||||
struct lzx_data
|
||||
struct lzxc_data
|
||||
{
|
||||
void *in_arg;
|
||||
void *out_arg;
|
||||
void *mark_frame_arg;
|
||||
lzx_get_bytes_t get_bytes;
|
||||
lzx_at_eof_t at_eof;
|
||||
lzx_put_bytes_t put_bytes;
|
||||
lzx_mark_frame_t mark_frame;
|
||||
lzxc_get_bytes_t get_bytes;
|
||||
lzxc_at_eof_t at_eof;
|
||||
lzxc_put_bytes_t put_bytes;
|
||||
lzxc_mark_frame_t mark_frame;
|
||||
struct lz_info *lzi;
|
||||
/* a 'frame' is an 0x8000 byte thing. Called that because otherwise
|
||||
I'd confuse myself overloading 'block' */
|
||||
@ -439,7 +461,7 @@ lzx_get_chars(lz_info *lzi, int n, u_char *buf)
|
||||
int chars_read;
|
||||
int chars_pad;
|
||||
|
||||
lzx_data *lzud = (lzx_data *)lzi->user_data;
|
||||
lzxc_data *lzud = (lzxc_data *)lzi->user_data;
|
||||
#ifdef OLDFRAMING
|
||||
if (lzud->subdivide < 0) return 0;
|
||||
if (n > lzud->left_in_frame)
|
||||
@ -534,7 +556,7 @@ static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp)
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
static void check_entropy(lzx_data *lzud, int main_index)
|
||||
static void check_entropy(lzxc_data *lzud, int main_index)
|
||||
{
|
||||
/* entropy = - sum_alphabet P(x) * log2 P(x) */
|
||||
/* entropy = - sum_alphabet f(x)/N * log2 (f(x)/N) */
|
||||
@ -599,7 +621,7 @@ static void check_entropy(lzx_data *lzud, int main_index)
|
||||
static int
|
||||
lzx_output_match(lz_info *lzi, int match_pos, int match_len)
|
||||
{
|
||||
lzx_data *lzud = (lzx_data *)lzi->user_data;
|
||||
lzxc_data *lzud = (lzxc_data *)lzi->user_data;
|
||||
uint32_t formatted_offset;
|
||||
uint32_t position_footer;
|
||||
uint8_t length_footer;
|
||||
@ -774,7 +796,7 @@ lzx_output_match(lz_info *lzi, int match_pos, int match_len)
|
||||
static void
|
||||
lzx_output_literal(lz_info *lzi, u_char ch)
|
||||
{
|
||||
lzx_data *lzud = (lzx_data *)lzi->user_data;
|
||||
lzxc_data *lzud = (lzxc_data *)lzi->user_data;
|
||||
|
||||
#ifndef OLDFRAMING
|
||||
lzud->left_in_block--;
|
||||
@ -788,7 +810,7 @@ lzx_output_literal(lz_info *lzi, u_char ch)
|
||||
check_entropy(lzud, ch);
|
||||
}
|
||||
|
||||
static void lzx_write_bits(lzx_data *lzxd, int nbits, uint32_t bits)
|
||||
static void lzx_write_bits(lzxc_data *lzxd, int nbits, uint32_t bits)
|
||||
{
|
||||
int cur_bits;
|
||||
int shift_bits;
|
||||
@ -836,7 +858,7 @@ static void lzx_write_bits(lzx_data *lzxd, int nbits, uint32_t bits)
|
||||
lzxd->bits_in_buf = cur_bits;
|
||||
}
|
||||
|
||||
static void lzx_align_output(lzx_data *lzxd)
|
||||
static void lzx_align_output(lzxc_data *lzxd)
|
||||
{
|
||||
if (lzxd->bits_in_buf) {
|
||||
lzx_write_bits(lzxd, 16 - lzxd->bits_in_buf, 0);
|
||||
@ -846,7 +868,7 @@ static void lzx_align_output(lzx_data *lzxd)
|
||||
}
|
||||
|
||||
static void
|
||||
lzx_write_compressed_literals(lzx_data *lzxd, int block_type)
|
||||
lzx_write_compressed_literals(lzxc_data *lzxd, int block_type)
|
||||
{
|
||||
uint32_t *cursor = lzxd->block_codes;
|
||||
uint32_t *endp = lzxd->block_codesp;
|
||||
@ -931,7 +953,7 @@ lzx_write_compressed_literals(lzx_data *lzxd, int block_type)
|
||||
}
|
||||
|
||||
static int
|
||||
lzx_write_compressed_tree(struct lzx_data *lzxd,
|
||||
lzx_write_compressed_tree(struct lzxc_data *lzxd,
|
||||
struct huff_entry *tree, uint8_t *prevlengths,
|
||||
int treesize)
|
||||
{
|
||||
@ -1054,7 +1076,7 @@ lzx_write_compressed_tree(struct lzx_data *lzxd,
|
||||
}
|
||||
|
||||
void
|
||||
lzx_reset(lzx_data *lzxd)
|
||||
lzxc_reset(lzxc_data *lzxd)
|
||||
{
|
||||
lzxd->need_1bit_header = 1;
|
||||
lzxd->R0 = lzxd->R1 = lzxd->R2 = 1;
|
||||
@ -1063,7 +1085,7 @@ lzx_reset(lzx_data *lzxd)
|
||||
lz_reset(lzxd->lzi);
|
||||
}
|
||||
|
||||
int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide)
|
||||
int lzxc_compress_block(lzxc_data *lzxd, int block_size, int subdivide)
|
||||
{
|
||||
int i;
|
||||
uint32_t written_sofar = 0;
|
||||
@ -1190,14 +1212,14 @@ int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lzx_init(struct lzx_data **lzxdp, int wsize_code,
|
||||
lzx_get_bytes_t get_bytes, void *get_bytes_arg,
|
||||
lzx_at_eof_t at_eof,
|
||||
lzx_put_bytes_t put_bytes, void *put_bytes_arg,
|
||||
lzx_mark_frame_t mark_frame, void *mark_frame_arg)
|
||||
int lzxc_init(struct lzxc_data **lzxdp, int wsize_code,
|
||||
lzxc_get_bytes_t get_bytes, void *get_bytes_arg,
|
||||
lzxc_at_eof_t at_eof,
|
||||
lzxc_put_bytes_t put_bytes, void *put_bytes_arg,
|
||||
lzxc_mark_frame_t mark_frame, void *mark_frame_arg)
|
||||
{
|
||||
int wsize;
|
||||
struct lzx_data *lzxd;
|
||||
struct lzxc_data *lzxd;
|
||||
|
||||
if ((wsize_code < 15) || (wsize_code > 21)) {
|
||||
return -1;
|
||||
@ -1234,11 +1256,11 @@ int lzx_init(struct lzx_data **lzxdp, int wsize_code,
|
||||
lzx_get_chars, lzx_output_match, lzx_output_literal,lzxd);
|
||||
lzxd->len_uncompressed_input = 0;
|
||||
lzxd->len_compressed_output = 0;
|
||||
lzx_reset(lzxd);
|
||||
lzxc_reset(lzxd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr)
|
||||
int lzxc_finish(struct lzxc_data *lzxd, struct lzxc_results *lzxr)
|
||||
{
|
||||
/* lzx_align_output(lzxd); Not needed as long as frame padding is in place */
|
||||
if (lzxr) {
|
||||
|
@ -15,43 +15,28 @@
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
typedef struct lzxc_data lzxc_data;
|
||||
typedef int (*lzxc_get_bytes_t)(void *arg, int n, void *buf);
|
||||
typedef int (*lzxc_put_bytes_t)(void *arg, int n, void *buf);
|
||||
typedef void (*lzxc_mark_frame_t)(void *arg, uint32_t uncomp, uint32_t comp);
|
||||
typedef int (*lzxc_at_eof_t)(void *arg);
|
||||
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
# define LZX_BIG_ENDIAN
|
||||
#endif
|
||||
|
||||
/* the names of these constants are specific to this library */
|
||||
#define LZX_MAX_CODE_LENGTH 16
|
||||
#define LZX_FRAME_SIZE 32768
|
||||
#define LZX_PRETREE_SIZE 20
|
||||
#define LZX_ALIGNED_BITS 3
|
||||
#define LZX_ALIGNED_SIZE 8
|
||||
|
||||
#define LZX_VERBATIM_BLOCK 1
|
||||
#define LZX_ALIGNED_OFFSET_BLOCK 2
|
||||
|
||||
typedef struct lzx_data lzx_data;
|
||||
typedef int (*lzx_get_bytes_t)(void *arg, int n, void *buf);
|
||||
typedef int (*lzx_put_bytes_t)(void *arg, int n, void *buf);
|
||||
typedef void (*lzx_mark_frame_t)(void *arg, uint32_t uncomp, uint32_t comp);
|
||||
typedef int (*lzx_at_eof_t)(void *arg);
|
||||
|
||||
typedef struct lzx_results
|
||||
typedef struct lzxc_results
|
||||
{
|
||||
/* add more here? Error codes, # blocks, # frames, etc? */
|
||||
long len_compressed_output;
|
||||
long len_uncompressed_input;
|
||||
} lzx_results;
|
||||
} lzxc_results;
|
||||
|
||||
int lzx_init(struct lzx_data **lzxdp, int wsize_code,
|
||||
lzx_get_bytes_t get_bytes, void *get_bytes_arg,
|
||||
lzx_at_eof_t at_eof,
|
||||
lzx_put_bytes_t put_bytes, void *put_bytes_arg,
|
||||
lzx_mark_frame_t mark_frame, void *mark_frame_arg);
|
||||
int lzxc_init(struct lzxc_data **lzxdp, int wsize_code,
|
||||
lzxc_get_bytes_t get_bytes, void *get_bytes_arg,
|
||||
lzxc_at_eof_t at_eof,
|
||||
lzxc_put_bytes_t put_bytes, void *put_bytes_arg,
|
||||
lzxc_mark_frame_t mark_frame, void *mark_frame_arg);
|
||||
|
||||
void lzx_reset(lzx_data *lzxd);
|
||||
void lzxc_reset(lzxc_data *lzxd);
|
||||
|
||||
int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide);
|
||||
int lzxc_compress_block(lzxc_data *lzxd, int block_size, int subdivide);
|
||||
|
||||
int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr);
|
||||
int lzxc_finish(struct lzxc_data *lzxd, struct lzxc_results *lzxr);
|
||||
|
||||
|
@ -8,22 +8,23 @@
|
||||
|
||||
#include <mspack.h>
|
||||
#include <lzxd.h>
|
||||
#include <lzxc.h>
|
||||
|
||||
#include <lzxmodule.h>
|
||||
|
||||
static char lzx_doc[] =
|
||||
"Provide basic LZX compression and decompression using the code from\n"
|
||||
"liblzxcomp and libmspack respectively.";
|
||||
|
||||
static PyObject *LzxError = NULL;
|
||||
PyObject *LZXError = NULL;
|
||||
|
||||
typedef struct memory_file {
|
||||
unsigned int magic; /* 0xB5 */
|
||||
void * buffer;
|
||||
void *buffer;
|
||||
int total_bytes;
|
||||
int current_bytes;
|
||||
} memory_file;
|
||||
|
||||
void *
|
||||
static void *
|
||||
glue_alloc(struct mspack_system *this, size_t bytes)
|
||||
{
|
||||
void *p = NULL;
|
||||
@ -34,33 +35,33 @@ glue_alloc(struct mspack_system *this, size_t bytes)
|
||||
return p;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
glue_free(void *p)
|
||||
{
|
||||
free(p);
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
glue_copy(void *src, void *dest, size_t bytes)
|
||||
{
|
||||
memcpy(dest, src, bytes);
|
||||
}
|
||||
|
||||
struct mspack_file *
|
||||
static struct mspack_file *
|
||||
glue_open(struct mspack_system *this, char *filename, int mode)
|
||||
{
|
||||
PyErr_SetString(LzxError, "MSPACK_OPEN unsupported");
|
||||
PyErr_SetString(LZXError, "MSPACK_OPEN unsupported");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
glue_close(struct mspack_file *file)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int
|
||||
glue_read(struct mspack_file *file, void * buffer, int bytes)
|
||||
static int
|
||||
glue_read(struct mspack_file *file, void *buffer, int bytes)
|
||||
{
|
||||
memory_file *mem;
|
||||
int remaining;
|
||||
@ -77,8 +78,8 @@ glue_read(struct mspack_file *file, void * buffer, int bytes)
|
||||
return bytes;
|
||||
}
|
||||
|
||||
int
|
||||
glue_write(struct mspack_file * file, void * buffer, int bytes)
|
||||
static int
|
||||
glue_write(struct mspack_file *file, void *buffer, int bytes)
|
||||
{
|
||||
memory_file *mem;
|
||||
int remaining;
|
||||
@ -87,9 +88,8 @@ glue_write(struct mspack_file * file, void * buffer, int bytes)
|
||||
if (mem->magic != 0xB5) return -1;
|
||||
|
||||
remaining = mem->total_bytes - mem->current_bytes;
|
||||
if (!remaining) return 0;
|
||||
if (bytes > remaining) {
|
||||
PyErr_SetString(LzxError,
|
||||
PyErr_SetString(LZXError,
|
||||
"MSPACK_WRITE tried to write beyond end of buffer");
|
||||
bytes = remaining;
|
||||
}
|
||||
@ -189,7 +189,7 @@ decompress(PyObject *self, PyObject *args)
|
||||
if (err != MSPACK_ERR_OK) {
|
||||
Py_DECREF(retval);
|
||||
retval = NULL;
|
||||
PyErr_SetString(LzxError, "LZX decompression failed");
|
||||
PyErr_SetString(LZXError, "LZX decompression failed");
|
||||
}
|
||||
|
||||
return retval;
|
||||
@ -199,7 +199,7 @@ static PyMethodDef lzx_methods[] = {
|
||||
{ "init", &init, METH_VARARGS, "Initialize the LZX decompressor" },
|
||||
{ "reset", &reset, METH_VARARGS, "Reset the LZX decompressor" },
|
||||
{ "decompress", &decompress, METH_VARARGS, "Run the LZX decompressor" },
|
||||
{ NULL, NULL }
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC
|
||||
@ -207,23 +207,21 @@ initlzx(void)
|
||||
{
|
||||
PyObject *m;
|
||||
|
||||
if (PyType_Ready(&CompressorType) < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
m = Py_InitModule3("lzx", lzx_methods, lzx_doc);
|
||||
if (m == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL);
|
||||
Py_INCREF(LzxError);
|
||||
PyModule_AddObject(m, "LzxError", LzxError);
|
||||
LZXError = PyErr_NewException("lzx.LZXError", NULL, NULL);
|
||||
Py_INCREF(LZXError);
|
||||
PyModule_AddObject(m, "LZXError", LZXError);
|
||||
|
||||
Py_INCREF(&CompressorType);
|
||||
PyModule_AddObject(m, "Compressor", (PyObject *)&CompressorType);
|
||||
|
||||
PyModule_AddObject(m, "_lzxc_init",
|
||||
Py_BuildValue("k", (unsigned long)lzx_init));
|
||||
PyModule_AddObject(m, "_lzxc_reset",
|
||||
Py_BuildValue("k", (unsigned long)lzx_reset));
|
||||
PyModule_AddObject(m, "_lzxc_compress_block",
|
||||
Py_BuildValue("k", (unsigned long)lzx_compress_block));
|
||||
PyModule_AddObject(m, "_lzxc_finish",
|
||||
Py_BuildValue("k", (unsigned long)lzx_finish));
|
||||
|
||||
return;
|
||||
}
|
||||
|
15
src/calibre/utils/lzx/lzxmodule.h
Normal file
15
src/calibre/utils/lzx/lzxmodule.h
Normal file
@ -0,0 +1,15 @@
|
||||
/* __license__ = 'GPL v3'
|
||||
* __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
*
|
||||
* Common declarations for Python module C glue code.
|
||||
*/
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
#ifndef LZXMODULE_H
|
||||
#define LZXMODULE_H
|
||||
|
||||
extern PyObject *LZXError;
|
||||
extern PyTypeObject CompressorType;
|
||||
|
||||
#endif /* LZXMODULE_H */
|
Loading…
x
Reference in New Issue
Block a user