mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Section decompression working
This commit is contained in:
parent
1e78860f4f
commit
1367ba58f3
@ -15,13 +15,14 @@ from calibre.ebooks.lit import LitError
|
||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||
import calibre.ebooks.lit.mssha1 as mssha1
|
||||
import calibre.ebooks.lit.msdes as msdes
|
||||
import calibre.utils.lzx as lzx
|
||||
|
||||
OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE package
|
||||
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
|
||||
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
|
||||
"""
|
||||
XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||
HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE html PUBLIC
|
||||
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
|
||||
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
|
||||
@ -30,6 +31,14 @@ XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||
DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
|
||||
LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
|
||||
|
||||
LZXC_TAG = 0x43585a4c
|
||||
CONTROL_TAG = 4
|
||||
CONTROL_WINDOW_SIZE = 12
|
||||
RESET_NENTRIES = 4
|
||||
RESET_HDRLEN = 12
|
||||
RESET_UCLENGTH = 16
|
||||
RESET_INTERVAL = 32
|
||||
|
||||
def u32(bytes):
|
||||
return struct.unpack('<L', bytes[:4])[0]
|
||||
|
||||
@ -114,10 +123,7 @@ class UnBinary(object):
|
||||
offset += 4
|
||||
|
||||
def item_path(self, internal_id):
|
||||
for i in self.manifest:
|
||||
if i == internal_id:
|
||||
return i.path
|
||||
raise LitError('Could not find item %s'%(internal_id,))
|
||||
return self.manifest.get(internal_id, internal_id)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.raw
|
||||
@ -555,7 +561,7 @@ class LitFile(object):
|
||||
pos += size
|
||||
|
||||
def read_manifest(self, entry):
|
||||
self.manifest = []
|
||||
self.manifest = {}
|
||||
raw = self._read_content(entry.offset, entry.size)
|
||||
pos = 0
|
||||
while pos < len(raw):
|
||||
@ -593,14 +599,14 @@ class LitFile(object):
|
||||
mime_type = raw[pos:pos+slen].decode('utf8')
|
||||
pos += slen + 1
|
||||
|
||||
self.manifest.append(
|
||||
self.manifest[internal] = \
|
||||
ManifestItem(original, internal, mime_type,
|
||||
offset, root, state))
|
||||
offset, root, state)
|
||||
i += 1
|
||||
|
||||
def read_meta(self, entry):
|
||||
raw = self._read_content(entry.offset, entry.size)
|
||||
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
|
||||
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
|
||||
self.meta = xml
|
||||
|
||||
def read_drm(self):
|
||||
@ -644,6 +650,13 @@ class LitFile(object):
|
||||
key[i % 8] ^= ord(digest[i])
|
||||
return ''.join(chr(x) for x in key)
|
||||
|
||||
def get_markup_file(self, name):
|
||||
raw = self.get_file(name)
|
||||
decl, map = (OPF_DECL, OPF_MAP) \
|
||||
if name == '/meta' else (HTML_DECL, HTML_MAP)
|
||||
xml = decl + unicode(UnBinary(raw, self.manifest, map))
|
||||
return xml
|
||||
|
||||
def get_file(self, name):
|
||||
entry = self.entries[name]
|
||||
if entry.section == 0:
|
||||
@ -664,20 +677,20 @@ class LitFile(object):
|
||||
transform = self.get_file(path + '/Transform/List')
|
||||
content = self.get_file(path + '/Content')
|
||||
control = self.get_file(path + '/ControlData')
|
||||
idx_transform = idx_control = 0
|
||||
while (len(transform) - idx_transform) >= 16:
|
||||
ndwords = int32(control[idx_control:]) + 1
|
||||
if (idx_control + (ndwords * 4)) > len(control) or ndwords <= 0:
|
||||
while len(transform) >= 16:
|
||||
csize = (int32(control) + 1) * 4
|
||||
if csize > len(control) or csize <= 0:
|
||||
raise LitError("ControlData is too short")
|
||||
guid = msguid(transform[idx_transform:])
|
||||
guid = msguid(transform)
|
||||
if guid == DESENCRYPT_GUID:
|
||||
content = self._decrypt(content)
|
||||
idx_control += ndwords * 4
|
||||
control = control[csize:]
|
||||
elif guid == LZXCOMPRESS_GUID:
|
||||
raise LitError("LZX decompression not implemented")
|
||||
content = self._decompress_section(name, control, content)
|
||||
control = control[csize:]
|
||||
else:
|
||||
raise LitError("Unrecognized transform: %s." % repr(guid))
|
||||
idx_transform += 16
|
||||
transform = transform[16:]
|
||||
return content
|
||||
|
||||
def _decrypt(self, content):
|
||||
@ -685,6 +698,59 @@ class LitFile(object):
|
||||
raise LitError('Cannot extract content from a DRM protected ebook')
|
||||
return msdes.new(self.bookkey).decrypt(content)
|
||||
|
||||
def _decompress_section(self, name, control, content):
|
||||
if len(control) < 32 or u32(control[CONTROL_TAG:]) != LZXC_TAG:
|
||||
raise LitError("Invalid ControlData tag value")
|
||||
result = []
|
||||
|
||||
window_size = 14
|
||||
u = u32(control[CONTROL_WINDOW_SIZE:])
|
||||
while u > 0:
|
||||
u >>= 1
|
||||
window_size += 1
|
||||
if window_size < 15 or window_size > 21:
|
||||
raise LitError("Invalid window in ControlData")
|
||||
lzx.init(window_size)
|
||||
|
||||
reset_table = self.get_file('/'.join(
|
||||
['::DataSpace/Storage', name, 'Transform',
|
||||
LZXCOMPRESS_GUID, 'InstanceData/ResetTable']))
|
||||
if len(reset_table) < (RESET_INTERVAL + 8):
|
||||
raise LitError("Reset table is too short")
|
||||
if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
|
||||
raise LitError("Reset table has 64bit value for UCLENGTH")
|
||||
ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8
|
||||
uclength = int32(reset_table[RESET_UCLENGTH:])
|
||||
accum = int32(reset_table[RESET_INTERVAL:])
|
||||
bytes_remaining = uclength
|
||||
window_bytes = (1 << window_size)
|
||||
base = 0
|
||||
|
||||
while ofs_entry < len(reset_table):
|
||||
if accum >= window_bytes:
|
||||
accum = 0
|
||||
size = int32(reset_table[ofs_entry:])
|
||||
u = int32(reset_table[ofs_entry + 4:])
|
||||
if u != 0:
|
||||
raise LitError("Reset table entry greater than 32 bits")
|
||||
if size >= (len(content) + base):
|
||||
raise("Reset table entry out of bounds")
|
||||
if bytes_remaining >= window_bytes:
|
||||
lzx.reset()
|
||||
result.append(lzx.decompress(content, window_bytes))
|
||||
bytes_remaining -= window_bytes
|
||||
content = content[size - base:]
|
||||
base = size
|
||||
accum += int32(reset_table[RESET_INTERVAL:])
|
||||
ofs_entry += 8
|
||||
if bytes_remaining < window_bytes and bytes_remaining > 0:
|
||||
lzx.reset()
|
||||
result.append(lzx.decompress(content, bytes_remaining))
|
||||
bytes_remaining = 0
|
||||
if bytes_remaining > 0:
|
||||
raise LitError("Failed to completely decompress section")
|
||||
return ''.join(result)
|
||||
|
||||
def get_metadata(stream):
|
||||
try:
|
||||
litfile = LitFile(stream)
|
||||
@ -693,7 +759,7 @@ def get_metadata(stream):
|
||||
cover_url, cover_item = mi.cover, None
|
||||
if cover_url:
|
||||
cover_url = relpath(cover_url, os.getcwd())
|
||||
for item in litfile.manifest:
|
||||
for item in litfile.manifest.values():
|
||||
if item.path == cover_url:
|
||||
cover_item = item.internal
|
||||
if cover_item is not None:
|
||||
|
@ -1,172 +0,0 @@
|
||||
/*--[lzxglue.c]----------------------------------------------------------------
|
||||
| Copyright (C) 2004 DRS
|
||||
|
|
||||
| This file is part of the "openclit" library for processing .LIT files.
|
||||
|
|
||||
| "Openclit" is free software; you can redistribute it and/or modify
|
||||
| it under the terms of the GNU General Public License as published by
|
||||
| the Free Software Foundation; either version 2 of the License, or
|
||||
| (at your option) any later version.
|
||||
|
|
||||
| This program is distributed in the hope that it will be useful,
|
||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
| GNU General Public License for more details.
|
||||
|
|
||||
| You should have received a copy of the GNU General Public License
|
||||
| along with this program; if not, write to the Free Software
|
||||
| Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
|
||||
| The GNU General Public License may also be available at the following
|
||||
| URL: http://www.gnu.org/licenses/gpl.html
|
||||
*/
|
||||
|
||||
/* This provides a "glue" between Stuart Caie's libmspack library and the
|
||||
* Openclit calls to the earlier LZX library.
|
||||
*
|
||||
* This way, I should be able to use the files unmodified.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "litlib.h"
|
||||
#include "mspack.h"
|
||||
#include "lzx.h"
|
||||
|
||||
typedef struct memory_file
|
||||
{
|
||||
unsigned int magic; /* 0xB5 */
|
||||
void * buffer;
|
||||
int total_bytes;
|
||||
int current_bytes;
|
||||
} memory_file;
|
||||
|
||||
|
||||
void * glue_alloc(struct mspack_system *this, size_t bytes)
|
||||
{
|
||||
void * p;
|
||||
p = (void *)malloc(bytes);
|
||||
if (p == NULL) {
|
||||
lit_error(ERR_R|ERR_LIBC,"Malloc(%d) failed!", bytes);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
void glue_free(void * p)
|
||||
{
|
||||
free(p);
|
||||
}
|
||||
|
||||
void glue_copy(void *src, void *dest, size_t bytes)
|
||||
{
|
||||
memcpy(dest, src, bytes);
|
||||
}
|
||||
|
||||
struct mspack_file * glue_open(struct mspack_system *this, char *filename,
|
||||
int mode)
|
||||
{
|
||||
lit_error(0,"MSPACK_OPEN unsupported!");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void glue_close(struct mspack_file * file) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
int glue_read(struct mspack_file * file, void * buffer, int bytes)
|
||||
{
|
||||
memory_file * mem;
|
||||
int remaining;
|
||||
|
||||
mem = (memory_file *)file;
|
||||
if (mem->magic != 0xB5) return -1;
|
||||
|
||||
remaining = mem->total_bytes - mem->current_bytes;
|
||||
if (!remaining) return 0;
|
||||
if (bytes > remaining) bytes = remaining;
|
||||
memcpy(buffer, (unsigned char *)mem->buffer+mem->current_bytes, bytes);
|
||||
mem->current_bytes += bytes;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
int glue_write(struct mspack_file * file, void * buffer, int bytes)
|
||||
{
|
||||
memory_file * mem;
|
||||
int remaining;
|
||||
|
||||
mem = (memory_file *)file;
|
||||
if (mem->magic != 0xB5) return -1;
|
||||
|
||||
remaining = mem->total_bytes - mem->current_bytes;
|
||||
if (!remaining) return 0;
|
||||
if (bytes > remaining) {
|
||||
lit_error(0,"MSPACK_READ tried to write %d bytes, only %d left.",
|
||||
bytes, remaining);
|
||||
bytes = remaining;
|
||||
}
|
||||
memcpy((unsigned char *)mem->buffer+mem->current_bytes, buffer, bytes);
|
||||
mem->current_bytes += bytes;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
struct mspack_system lzxglue_system =
|
||||
{
|
||||
glue_open,
|
||||
glue_close,
|
||||
glue_read, /* Read */
|
||||
glue_write, /* Write */
|
||||
NULL, /* Seek */
|
||||
NULL, /* Tell */
|
||||
NULL, /* Message */
|
||||
glue_alloc,
|
||||
glue_free,
|
||||
glue_copy,
|
||||
NULL /* Termination */
|
||||
};
|
||||
|
||||
int LZXwindow;
|
||||
struct lzxd_stream * lzx_stream = NULL;
|
||||
|
||||
|
||||
/* Can't really init here,don't know enough */
|
||||
int LZXinit(int window)
|
||||
{
|
||||
LZXwindow = window;
|
||||
lzx_stream = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Doesn't exist. Oh well, reinitialize state every time anyway */
|
||||
void LZXreset(void)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int LZXdecompress(unsigned char *inbuf, unsigned char *outbuf,
|
||||
unsigned int inlen, unsigned int outlen)
|
||||
{
|
||||
int err;
|
||||
memory_file source;
|
||||
memory_file dest;
|
||||
|
||||
source.magic = 0xB5;
|
||||
source.buffer = inbuf;
|
||||
source.current_bytes = 0;
|
||||
source.total_bytes = inlen;
|
||||
|
||||
dest.magic = 0xB5;
|
||||
dest.buffer = outbuf;
|
||||
dest.current_bytes = 0;
|
||||
dest.total_bytes = outlen;
|
||||
|
||||
lzx_stream = lzxd_init(&lzxglue_system, (struct mspack_file *)&source,
|
||||
(struct mspack_file *)&dest, LZXwindow,
|
||||
0x7fff /* Never reset, I do it */, 4096, outlen);
|
||||
err = -1;
|
||||
if (lzx_stream) err = lzxd_decompress(lzx_stream, outlen);
|
||||
|
||||
lzxd_free(lzx_stream);
|
||||
lzx_stream = NULL;
|
||||
return err;
|
||||
}
|
@ -199,8 +199,13 @@ initlzx(void)
|
||||
PyObject *m;
|
||||
|
||||
m = Py_InitModule3("lzx", lzx_methods, lzx_doc);
|
||||
if (m == NULL) return;
|
||||
if (m == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL);
|
||||
Py_INCREF(LzxError);
|
||||
PyModule_AddObject(m, "LzxError", LzxError);
|
||||
|
||||
return;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user