Section decompression working

This commit is contained in:
Marshall T. Vandegrift 2008-07-18 18:03:28 -04:00
parent 1e78860f4f
commit 1367ba58f3
3 changed files with 90 additions and 191 deletions

View File

@ -15,13 +15,14 @@ from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
import calibre.ebooks.lit.mssha1 as mssha1 import calibre.ebooks.lit.mssha1 as mssha1
import calibre.ebooks.lit.msdes as msdes import calibre.ebooks.lit.msdes as msdes
import calibre.utils.lzx as lzx
OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?> OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE package <!DOCTYPE package
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN" PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd"> "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
""" """
XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?> HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE html PUBLIC <!DOCTYPE html PUBLIC
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN" "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd"> "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
@ -30,6 +31,14 @@ XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}" DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}" LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
LZXC_TAG = 0x43585a4c
CONTROL_TAG = 4
CONTROL_WINDOW_SIZE = 12
RESET_NENTRIES = 4
RESET_HDRLEN = 12
RESET_UCLENGTH = 16
RESET_INTERVAL = 32
def u32(bytes): def u32(bytes):
return struct.unpack('<L', bytes[:4])[0] return struct.unpack('<L', bytes[:4])[0]
@ -114,10 +123,7 @@ class UnBinary(object):
offset += 4 offset += 4
def item_path(self, internal_id): def item_path(self, internal_id):
for i in self.manifest: return self.manifest.get(internal_id, internal_id)
if i == internal_id:
return i.path
raise LitError('Could not find item %s'%(internal_id,))
def __unicode__(self): def __unicode__(self):
return self.raw return self.raw
@ -555,7 +561,7 @@ class LitFile(object):
pos += size pos += size
def read_manifest(self, entry): def read_manifest(self, entry):
self.manifest = [] self.manifest = {}
raw = self._read_content(entry.offset, entry.size) raw = self._read_content(entry.offset, entry.size)
pos = 0 pos = 0
while pos < len(raw): while pos < len(raw):
@ -593,14 +599,14 @@ class LitFile(object):
mime_type = raw[pos:pos+slen].decode('utf8') mime_type = raw[pos:pos+slen].decode('utf8')
pos += slen + 1 pos += slen + 1
self.manifest.append( self.manifest[internal] = \
ManifestItem(original, internal, mime_type, ManifestItem(original, internal, mime_type,
offset, root, state)) offset, root, state)
i += 1 i += 1
def read_meta(self, entry): def read_meta(self, entry):
raw = self._read_content(entry.offset, entry.size) raw = self._read_content(entry.offset, entry.size)
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest)) xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
self.meta = xml self.meta = xml
def read_drm(self): def read_drm(self):
@ -643,6 +649,13 @@ class LitFile(object):
for i in xrange(0, len(digest)): for i in xrange(0, len(digest)):
key[i % 8] ^= ord(digest[i]) key[i % 8] ^= ord(digest[i])
return ''.join(chr(x) for x in key) return ''.join(chr(x) for x in key)
def get_markup_file(self, name):
raw = self.get_file(name)
decl, map = (OPF_DECL, OPF_MAP) \
if name == '/meta' else (HTML_DECL, HTML_MAP)
xml = decl + unicode(UnBinary(raw, self.manifest, map))
return xml
def get_file(self, name): def get_file(self, name):
entry = self.entries[name] entry = self.entries[name]
@ -664,20 +677,20 @@ class LitFile(object):
transform = self.get_file(path + '/Transform/List') transform = self.get_file(path + '/Transform/List')
content = self.get_file(path + '/Content') content = self.get_file(path + '/Content')
control = self.get_file(path + '/ControlData') control = self.get_file(path + '/ControlData')
idx_transform = idx_control = 0 while len(transform) >= 16:
while (len(transform) - idx_transform) >= 16: csize = (int32(control) + 1) * 4
ndwords = int32(control[idx_control:]) + 1 if csize > len(control) or csize <= 0:
if (idx_control + (ndwords * 4)) > len(control) or ndwords <= 0:
raise LitError("ControlData is too short") raise LitError("ControlData is too short")
guid = msguid(transform[idx_transform:]) guid = msguid(transform)
if guid == DESENCRYPT_GUID: if guid == DESENCRYPT_GUID:
content = self._decrypt(content) content = self._decrypt(content)
idx_control += ndwords * 4 control = control[csize:]
elif guid == LZXCOMPRESS_GUID: elif guid == LZXCOMPRESS_GUID:
raise LitError("LZX decompression not implemented") content = self._decompress_section(name, control, content)
control = control[csize:]
else: else:
raise LitError("Unrecognized transform: %s." % repr(guid)) raise LitError("Unrecognized transform: %s." % repr(guid))
idx_transform += 16 transform = transform[16:]
return content return content
def _decrypt(self, content): def _decrypt(self, content):
@ -685,6 +698,59 @@ class LitFile(object):
raise LitError('Cannot extract content from a DRM protected ebook') raise LitError('Cannot extract content from a DRM protected ebook')
return msdes.new(self.bookkey).decrypt(content) return msdes.new(self.bookkey).decrypt(content)
def _decompress_section(self, name, control, content):
if len(control) < 32 or u32(control[CONTROL_TAG:]) != LZXC_TAG:
raise LitError("Invalid ControlData tag value")
result = []
window_size = 14
u = u32(control[CONTROL_WINDOW_SIZE:])
while u > 0:
u >>= 1
window_size += 1
if window_size < 15 or window_size > 21:
raise LitError("Invalid window in ControlData")
lzx.init(window_size)
reset_table = self.get_file('/'.join(
['::DataSpace/Storage', name, 'Transform',
LZXCOMPRESS_GUID, 'InstanceData/ResetTable']))
if len(reset_table) < (RESET_INTERVAL + 8):
raise LitError("Reset table is too short")
if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
raise LitError("Reset table has 64bit value for UCLENGTH")
ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8
uclength = int32(reset_table[RESET_UCLENGTH:])
accum = int32(reset_table[RESET_INTERVAL:])
bytes_remaining = uclength
window_bytes = (1 << window_size)
base = 0
while ofs_entry < len(reset_table):
if accum >= window_bytes:
accum = 0
size = int32(reset_table[ofs_entry:])
u = int32(reset_table[ofs_entry + 4:])
if u != 0:
raise LitError("Reset table entry greater than 32 bits")
if size >= (len(content) + base):
raise("Reset table entry out of bounds")
if bytes_remaining >= window_bytes:
lzx.reset()
result.append(lzx.decompress(content, window_bytes))
bytes_remaining -= window_bytes
content = content[size - base:]
base = size
accum += int32(reset_table[RESET_INTERVAL:])
ofs_entry += 8
if bytes_remaining < window_bytes and bytes_remaining > 0:
lzx.reset()
result.append(lzx.decompress(content, bytes_remaining))
bytes_remaining = 0
if bytes_remaining > 0:
raise LitError("Failed to completely decompress section")
return ''.join(result)
def get_metadata(stream): def get_metadata(stream):
try: try:
litfile = LitFile(stream) litfile = LitFile(stream)
@ -693,7 +759,7 @@ def get_metadata(stream):
cover_url, cover_item = mi.cover, None cover_url, cover_item = mi.cover, None
if cover_url: if cover_url:
cover_url = relpath(cover_url, os.getcwd()) cover_url = relpath(cover_url, os.getcwd())
for item in litfile.manifest: for item in litfile.manifest.values():
if item.path == cover_url: if item.path == cover_url:
cover_item = item.internal cover_item = item.internal
if cover_item is not None: if cover_item is not None:

View File

@ -1,172 +0,0 @@
/*--[lzxglue.c]----------------------------------------------------------------
| Copyright (C) 2004 DRS
|
| This file is part of the "openclit" library for processing .LIT files.
|
| "Openclit" is free software; you can redistribute it and/or modify
| it under the terms of the GNU General Public License as published by
| the Free Software Foundation; either version 2 of the License, or
| (at your option) any later version.
|
| This program is distributed in the hope that it will be useful,
| but WITHOUT ANY WARRANTY; without even the implied warranty of
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
| GNU General Public License for more details.
|
| You should have received a copy of the GNU General Public License
| along with this program; if not, write to the Free Software
| Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
| The GNU General Public License may also be available at the following
| URL: http://www.gnu.org/licenses/gpl.html
*/
/* This provides a "glue" between Stuart Caie's libmspack library and the
* Openclit calls to the earlier LZX library.
*
* This way, I should be able to use the files unmodified.
*/
#include <stdio.h>
#include <stdlib.h>
#include "litlib.h"
#include "mspack.h"
#include "lzx.h"
typedef struct memory_file
{
unsigned int magic; /* 0xB5 */
void * buffer;
int total_bytes;
int current_bytes;
} memory_file;
void * glue_alloc(struct mspack_system *this, size_t bytes)
{
void * p;
p = (void *)malloc(bytes);
if (p == NULL) {
lit_error(ERR_R|ERR_LIBC,"Malloc(%d) failed!", bytes);
}
return p;
}
void glue_free(void * p)
{
free(p);
}
void glue_copy(void *src, void *dest, size_t bytes)
{
memcpy(dest, src, bytes);
}
struct mspack_file * glue_open(struct mspack_system *this, char *filename,
int mode)
{
lit_error(0,"MSPACK_OPEN unsupported!");
return NULL;
}
void glue_close(struct mspack_file * file) {
return;
}
int glue_read(struct mspack_file * file, void * buffer, int bytes)
{
memory_file * mem;
int remaining;
mem = (memory_file *)file;
if (mem->magic != 0xB5) return -1;
remaining = mem->total_bytes - mem->current_bytes;
if (!remaining) return 0;
if (bytes > remaining) bytes = remaining;
memcpy(buffer, (unsigned char *)mem->buffer+mem->current_bytes, bytes);
mem->current_bytes += bytes;
return bytes;
}
int glue_write(struct mspack_file * file, void * buffer, int bytes)
{
memory_file * mem;
int remaining;
mem = (memory_file *)file;
if (mem->magic != 0xB5) return -1;
remaining = mem->total_bytes - mem->current_bytes;
if (!remaining) return 0;
if (bytes > remaining) {
lit_error(0,"MSPACK_READ tried to write %d bytes, only %d left.",
bytes, remaining);
bytes = remaining;
}
memcpy((unsigned char *)mem->buffer+mem->current_bytes, buffer, bytes);
mem->current_bytes += bytes;
return bytes;
}
struct mspack_system lzxglue_system =
{
glue_open,
glue_close,
glue_read, /* Read */
glue_write, /* Write */
NULL, /* Seek */
NULL, /* Tell */
NULL, /* Message */
glue_alloc,
glue_free,
glue_copy,
NULL /* Termination */
};
int LZXwindow;
struct lzxd_stream * lzx_stream = NULL;
/* Can't really init here,don't know enough */
int LZXinit(int window)
{
LZXwindow = window;
lzx_stream = NULL;
return 0;
}
/* Doesn't exist. Oh well, reinitialize state every time anyway */
void LZXreset(void)
{
return;
}
int LZXdecompress(unsigned char *inbuf, unsigned char *outbuf,
unsigned int inlen, unsigned int outlen)
{
int err;
memory_file source;
memory_file dest;
source.magic = 0xB5;
source.buffer = inbuf;
source.current_bytes = 0;
source.total_bytes = inlen;
dest.magic = 0xB5;
dest.buffer = outbuf;
dest.current_bytes = 0;
dest.total_bytes = outlen;
lzx_stream = lzxd_init(&lzxglue_system, (struct mspack_file *)&source,
(struct mspack_file *)&dest, LZXwindow,
0x7fff /* Never reset, I do it */, 4096, outlen);
err = -1;
if (lzx_stream) err = lzxd_decompress(lzx_stream, outlen);
lzxd_free(lzx_stream);
lzx_stream = NULL;
return err;
}

View File

@ -199,8 +199,13 @@ initlzx(void)
PyObject *m; PyObject *m;
m = Py_InitModule3("lzx", lzx_methods, lzx_doc); m = Py_InitModule3("lzx", lzx_methods, lzx_doc);
if (m == NULL) return; if (m == NULL) {
return;
}
LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL); LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL);
Py_INCREF(LzxError); Py_INCREF(LzxError);
PyModule_AddObject(m, "LzxError", LzxError); PyModule_AddObject(m, "LzxError", LzxError);
return;
} }