Initial merge of native replacement for convertlit (thanks to llasram)

This commit is contained in:
Kovid Goyal 2008-07-21 13:34:53 -07:00
commit c81e009e0d
15 changed files with 5602 additions and 722 deletions

View File

@ -0,0 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
class LitError(Exception):
pass

View File

@ -0,0 +1,9 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
"""
Microsoft LIT tag and attribute tables.
"""
from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
from calibre.ebooks.lit.maps.html import MAP as HTML_MAP

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,81 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
"""
Microsoft LIT OPF tag and attribute tables, copied from ConvertLIT.
"""
TAGS = [
None,
"package",
"dc:Title",
"dc:Creator",
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
"manifest",
"item",
"spine",
"itemref",
"metadata",
"dc-metadata",
"dc:Subject",
"dc:Description",
"dc:Publisher",
"dc:Contributor",
"dc:Date",
"dc:Type",
"dc:Format",
"dc:Identifier",
"dc:Source",
"dc:Language",
"dc:Relation",
"dc:Coverage",
"dc:Rights",
"x-metadata",
"meta",
"tours",
"tour",
"site",
"guide",
"reference",
None,
]
ATTRS = {
0x0001: "href",
0x0002: "%never-used",
0x0003: "%guid",
0x0004: "%minimum_level",
0x0005: "%attr5",
0x0006: "id",
0x0007: "href",
0x0008: "media-type",
0x0009: "fallback",
0x000A: "idref",
0x000B: "xmlns:dc",
0x000C: "xmlns:oebpackage",
0x000D: "role",
0x000E: "file-as",
0x000F: "event",
0x0010: "scheme",
0x0011: "title",
0x0012: "type",
0x0013: "unique-identifier",
0x0014: "name",
0x0015: "content",
0x0016: "xml:lang",
}
TAGS_ATTRS = [{} for i in xrange(43)]
MAP = (TAGS, ATTRS, TAGS_ATTRS)

View File

@ -0,0 +1,489 @@
# Re-modified for use in MS LIT decryption. Un-reversed the bytebit[] array.
# Substituted Microsoft's absurd modified S-boxes. Modified the
# encrypt/decrypt methods to handle more than one block at a time. Added a few
# speed-ups supported by modern versions of Python. Added option 'psyco' use.
#
# And lo, all the previous notices follow:
# Modified DES encryption for VNC password authentication.
# Ported from realvnc's java viewer by <cliechti@gmx.net>
# I chose this package name because it is not compatible with the
# original DES algorithm, e.g. found pycrypto.
#
# (C) 2003 chris <cliechti@gmx.net>
# Released as free software under the Python License.
#
# You're free to use it for commercial and noncommercial
# application, modify and redistribute it as long as the
# copyright notices are intact. There are no warranties, not
# even that it does what it says to do ;-)
#
# Original notice following:
# This DES class has been extracted from package Acme.Crypto for use in VNC.
# The bytebit[] array has been reversed so that the most significant bit
# in each byte of the key is ignored, not the least significant. Also the
# unnecessary odd parity code has been removed.
#
# These changes are:
# Copyright (C) 1999 AT&T Laboratories Cambridge. All Rights Reserved.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# DesCipher - the DES encryption method
#
# The meat of this code is by Dave Zimmerman <dzimm@widget.com>, and is:
#
# Copyright (c) 1996 Widget Workshop, Inc. All Rights Reserved.
#
# Permission to use, copy, modify, and distribute this software
# and its documentation for NON-COMMERCIAL or COMMERCIAL purposes and
# without fee is hereby granted, provided that this copyright notice is kept
# intact.
#
# WIDGET WORKSHOP MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY
# OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
# TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE, OR NON-INFRINGEMENT. WIDGET WORKSHOP SHALL NOT BE LIABLE
# FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
# DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
#
# THIS SOFTWARE IS NOT DESIGNED OR INTENDED FOR USE OR RESALE AS ON-LINE
# CONTROL EQUIPMENT IN HAZARDOUS ENVIRONMENTS REQUIRING FAIL-SAFE
# PERFORMANCE, SUCH AS IN THE OPERATION OF NUCLEAR FACILITIES, AIRCRAFT
# NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL, DIRECT LIFE
# SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH THE FAILURE OF THE
# SOFTWARE COULD LEAD DIRECTLY TO DEATH, PERSONAL INJURY, OR SEVERE
# PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH RISK ACTIVITIES"). WIDGET WORKSHOP
# SPECIFICALLY DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR
# HIGH RISK ACTIVITIES.
#
#
# The rest is:
#
# Copyright (C) 1996 by Jef Poskanzer <jef@acme.com>. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# Visit the ACME Labs Java page for up-to-date versions of this and other
# fine Java utilities: http://www.acme.com/java/
#/ The DES encryption method.
# <P>
# This is surprisingly fast, for pure Java. On a SPARC 20, wrapped
# in Acme.Crypto.EncryptedOutputStream or Acme.Crypto.EncryptedInputStream,
# it does around 7000 bytes/second.
# <P>
# Most of this code is by Dave Zimmerman <dzimm@widget.com>, and is
# Copyright (c) 1996 Widget Workshop, Inc. See the source file for details.
# <P>
# <A HREF="/resources/classes/Acme/Crypto/DesCipher.java">Fetch the software.</A><BR>
# <A HREF="/resources/classes/Acme.tar.Z">Fetch the entire Acme package.</A>
# <P>
# @see Des3Cipher
# @see EncryptedOutputStream
# @see EncryptedInputStream
import struct
class DesCipher:
# Constructor, byte-array key.
def __init__(self, key):
self.setKey(key)
#/ Set the key.
def setKey(self, key):
self.encryptKeys = self.deskey([ord(x) for x in key], 1)
self.decryptKeys = self.deskey([ord(x) for x in key], 0)
# Turn an 8-byte key into internal keys.
def deskey(self, keyBlock, encrypting):
#~ int i, j, l, m, n;
pc1m = [0]*56 #new int[56];
pcr = [0]*56 #new int[56];
kn = [0]*32 #new int[32];
for j in xrange(56):
l = pc1[j]
m = l & 07
pc1m[j] = ((keyBlock[l >> 3] & bytebit[m]) != 0)
for i in xrange(16):
if encrypting:
m = i << 1
else:
m = (15-i) << 1
n = m + 1
kn[m] = kn[n] = 0
for j in xrange(28):
l = j + totrot[i]
if l < 28:
pcr[j] = pc1m[l]
else:
pcr[j] = pc1m[l - 28]
for j in xrange(28, 56):
l = j + totrot[i]
if l < 56:
pcr[j] = pc1m[l]
else:
pcr[j] = pc1m[l - 28]
for j in xrange(24):
if pcr[pc2[j]] != 0:
kn[m] |= bigbyte[j]
if pcr[pc2[j+24]] != 0:
kn[n] |= bigbyte[j]
return self.cookey(kn)
def cookey(self, raw):
#~ int raw0, raw1;
#~ int rawi, KnLi;
#~ int i;
KnL = [0]*32
rawi = 0
KnLi = 0
for i in xrange(16):
raw0 = raw[rawi]
rawi += 1
raw1 = raw[rawi]
rawi += 1
KnL[KnLi] = (raw0 & 0x00fc0000L) << 6
KnL[KnLi] |= (raw0 & 0x00000fc0L) << 10
KnL[KnLi] |= (raw1 & 0x00fc0000L) >> 10
KnL[KnLi] |= (raw1 & 0x00000fc0L) >> 6
KnLi += 1
KnL[KnLi] = (raw0 & 0x0003f000L) << 12
KnL[KnLi] |= (raw0 & 0x0000003fL) << 16
KnL[KnLi] |= (raw1 & 0x0003f000L) >> 4
KnL[KnLi] |= (raw1 & 0x0000003fL)
KnLi += 1
return KnL
# Block encryption routines.
#/ Encrypt a block of eight bytes.
def encrypt(self, clearText):
if len(clearText) % 8 != 0:
raise TypeError, "length must be multiple of block size"
result = []
for base in xrange(0, len(clearText), 8):
result.append(struct.pack(
">LL", *self.des(struct.unpack(">LL", clearText[base:base+8]),
self.encryptKeys)))
return ''.join(result)
#/ Decrypt a block of eight bytes.
def decrypt(self, cipherText):
if len(cipherText) % 8 != 0:
raise TypeError, "length must be multiple of block size"
result = []
for base in xrange(0, len(cipherText), 8):
result.append(struct.pack(
">LL", *self.des(struct.unpack(">LL", cipherText[base:base+8]),
self.decryptKeys)))
return ''.join(result)
# The DES function.
def des(self, (leftt, right), keys):
#~ int fval, work, right, leftt;
#~ int round
keysi = 0
work = ((leftt >> 4) ^ right) & 0x0f0f0f0fL
right ^= work
leftt ^= (work << 4) & 0xffffffffL
work = ((leftt >> 16) ^ right) & 0x0000ffffL
right ^= work
leftt ^= (work << 16) & 0xffffffffL
work = ((right >> 2) ^ leftt) & 0x33333333L
leftt ^= work
right ^= (work << 2) & 0xffffffffL
work = ((right >> 8) ^ leftt) & 0x00ff00ffL
leftt ^= work
right ^= (work << 8) & 0xffffffffL
right = ((right << 1) | ((right >> 31) & 1)) & 0xffffffffL
work = (leftt ^ right) & 0xaaaaaaaaL
leftt ^= work
right ^= work
leftt = ((leftt << 1) | ((leftt >> 31) & 1)) & 0xffffffffL
for round in xrange(8):
work = ((right << 28) | (right >> 4)) & 0xffffffffL
work ^= keys[keysi]
keysi += 1
fval = SP7[ work & 0x0000003fL ]
fval |= SP5[(work >> 8) & 0x0000003fL ]
fval |= SP3[(work >> 16) & 0x0000003fL ]
fval |= SP1[(work >> 24) & 0x0000003fL ]
work = right ^ keys[keysi]
keysi += 1
fval |= SP8[ work & 0x0000003fL ]
fval |= SP6[(work >> 8) & 0x0000003fL ]
fval |= SP4[(work >> 16) & 0x0000003fL ]
fval |= SP2[(work >> 24) & 0x0000003fL ]
leftt ^= fval
work = ((leftt << 28) | (leftt >> 4)) & 0xffffffffL
work ^= keys[keysi]
keysi += 1
fval = SP7[ work & 0x0000003fL ]
fval |= SP5[(work >> 8) & 0x0000003fL ]
fval |= SP3[(work >> 16) & 0x0000003fL ]
fval |= SP1[(work >> 24) & 0x0000003fL ]
work = leftt ^ keys[keysi]
keysi += 1
fval |= SP8[ work & 0x0000003fL ]
fval |= SP6[(work >> 8) & 0x0000003fL ]
fval |= SP4[(work >> 16) & 0x0000003fL ]
fval |= SP2[(work >> 24) & 0x0000003fL ]
right ^= fval
right = ((right << 31) | (right >> 1)) & 0xffffffffL
work = (leftt ^ right) & 0xaaaaaaaaL
leftt ^= work
right ^= work
leftt = ((leftt << 31) | (leftt >> 1)) & 0xffffffffL
work = ((leftt >> 8) ^ right) & 0x00ff00ffL
right ^= work
leftt ^= (work << 8) & 0xffffffffL
work = ((leftt >> 2) ^ right) & 0x33333333L
right ^= work
leftt ^= (work << 2) & 0xffffffffL
work = ((right >> 16) ^ leftt) & 0x0000ffffL
leftt ^= work
right ^= (work << 16) & 0xffffffffL
work = ((right >> 4) ^ leftt) & 0x0f0f0f0fL
leftt ^= work
right ^= (work << 4) & 0xffffffffL
return right, leftt
# Tables, permutations, S-boxes, etc.
bytebit = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01]
bigbyte = [
0x800000, 0x400000, 0x200000, 0x100000,
0x080000, 0x040000, 0x020000, 0x010000,
0x008000, 0x004000, 0x002000, 0x001000,
0x000800, 0x000400, 0x000200, 0x000100,
0x000080, 0x000040, 0x000020, 0x000010,
0x000008, 0x000004, 0x000002, 0x000001
]
pc1 = [
56, 48, 40, 32, 24, 16, 8,
0, 57, 49, 41, 33, 25, 17,
9, 1, 58, 50, 42, 34, 26,
18, 10, 2, 59, 51, 43, 35,
62, 54, 46, 38, 30, 22, 14,
6, 61, 53, 45, 37, 29, 21,
13, 5, 60, 52, 44, 36, 28,
20, 12, 4, 27, 19, 11, 3
]
totrot = [
1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28
]
pc2 = [
13, 16, 10, 23, 0, 4,
2, 27, 14, 5, 20, 9,
22, 18, 11, 3 , 25, 7,
15, 6, 26, 19, 12, 1,
40, 51, 30, 36, 46, 54,
29, 39, 50, 44, 32, 47,
43, 48, 38, 55, 33, 52,
45, 41, 49, 35, 28, 31,
]
# Microsoft's modified S-boxes for LIT file encryption
SP1 = [
0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,
0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L,
0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L,
0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L,
0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L,
0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L,
0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L,
0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L,
0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L,
0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L,
0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L,
0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L,
0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L,
0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L,
0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L
]
SP2 = [
0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L,
0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L,
0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L,
0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L,
0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L,
0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L,
0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L,
0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L,
0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L,
0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L,
0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L,
0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L,
0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L,
0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L,
0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L,
0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L
]
SP3 = [
0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L,
0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L,
0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L,
0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L,
0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L,
0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L,
0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L,
0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L,
0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L,
0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L,
0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L,
0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L,
0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L,
0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L,
0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L,
0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L
]
SP4 = [
0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L,
0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L,
0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L,
0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L,
0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L,
0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L,
0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L,
0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L,
0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L,
0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L,
0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L,
0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L,
0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L,
0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L,
0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L,
0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L
]
SP5 = [
0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L,
0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L,
0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L,
0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L,
0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L,
0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L,
0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L,
0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L,
0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L,
0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L,
0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L,
0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L,
0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L,
0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L,
0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L,
0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L
]
SP6 = [
0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L,
0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L,
0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L,
0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L,
0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L,
0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L,
0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L,
0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L,
0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L,
0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L,
0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L,
0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L,
0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L,
0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L,
0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L,
0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L,
]
SP7 = [
0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L,
0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L,
0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L,
0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L,
0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L,
0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L,
0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L,
0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L,
0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L,
0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L,
0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L,
0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L,
0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L,
0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L,
0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L,
0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L,
]
SP8 = [
0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L,
0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L,
0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L,
0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L,
0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L,
0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L,
0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L,
0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L,
0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L,
0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L,
0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L,
0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L,
0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L,
0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L,
0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L,
0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L,
]
def new(key):
return DesCipher(key)
block_size = 8
key_size = 8
try:
import psyco
psyco.bind(DesCipher.deskey)
psyco.bind(DesCipher.cookey)
psyco.bind(DesCipher.des)
except ImportError:
pass
#test only:
if __name__ == '__main__':
des = DesCipher("\x01\x23\x45\x67\x89\xab\xcd\xef")
print ''.join(
"%02x" % ord(x) for x in des.encrypt("Now is t"))

View File

@ -0,0 +1,337 @@
"""
Modified version of SHA-1 used in Microsoft LIT files.
Adapted from the PyPy pure-Python SHA-1 implementation.
"""
import struct, copy
# ======================================================================
# Bit-Manipulation helpers
#
# _long2bytes() was contributed by Barry Warsaw
# and is reused here with tiny modifications.
# ======================================================================
def _long2bytesBigEndian(n, blocksize=0):
"""Convert a long integer to a byte string.
If optional blocksize is given and greater than zero, pad the front
of the byte string with binary zeros so that the length is a multiple
of blocksize.
"""
# After much testing, this algorithm was deemed to be the fastest.
s = ''
pack = struct.pack
while n > 0:
s = pack('>I', n & 0xffffffffL) + s
n = n >> 32
# Strip off leading zeros.
for i in range(len(s)):
if s[i] != '\000':
break
else:
# Only happens when n == 0.
s = '\000'
i = 0
s = s[i:]
# Add back some pad bytes. This could be done more efficiently
# w.r.t. the de-padding being done above, but sigh...
if blocksize > 0 and len(s) % blocksize:
s = (blocksize - len(s) % blocksize) * '\000' + s
return s
def _bytelist2longBigEndian(list):
"Transform a list of characters into a list of longs."
imax = len(list)/4
hl = [0L] * imax
j = 0
i = 0
while i < imax:
b0 = long(ord(list[j])) << 24
b1 = long(ord(list[j+1])) << 16
b2 = long(ord(list[j+2])) << 8
b3 = long(ord(list[j+3]))
hl[i] = b0 | b1 | b2 | b3
i = i+1
j = j+4
return hl
def _rotateLeft(x, n):
"Rotate x (32 bit) left n bits circularly."
return (x << n) | (x >> (32-n))
# ======================================================================
# The SHA transformation functions
#
# ======================================================================
def f0_19(B, C, D):
return (B & (C ^ D)) ^ D
def f20_39(B, C, D):
return B ^ C ^ D
def f40_59(B, C, D):
return ((B | C) & D) | (B & C)
def f60_79(B, C, D):
return B ^ C ^ D
# Microsoft's lovely addition...
def f6_42(B, C, D):
return (B + C) ^ C
f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
# ...and delightful changes
f[3] = f20_39
f[6] = f6_42
f[10] = f20_39
f[15] = f20_39
f[26] = f0_19
f[31] = f40_59
f[42] = f6_42
f[51] = f20_39
f[68] = f0_19
# Constants to be used
K = [
0x5A827999L, # ( 0 <= t <= 19)
0x6ED9EBA1L, # (20 <= t <= 39)
0x8F1BBCDCL, # (40 <= t <= 59)
0xCA62C1D6L # (60 <= t <= 79)
]
class mssha1(object):
"An implementation of the MD5 hash function in pure Python."
def __init__(self):
"Initialisation."
# Initial message length in bits(!).
self.length = 0L
self.count = [0, 0]
# Initial empty message as a sequence of bytes (8 bit characters).
self.input = []
# Call a separate init function, that can be used repeatedly
# to start from scratch on the same object.
self.init()
def init(self):
"Initialize the message-digest and set all fields to zero."
self.length = 0L
self.input = []
# Initial 160 bit message digest (5 times 32 bit).
# Also changed by Microsoft from standard.
self.H0 = 0x32107654L
self.H1 = 0x23016745L
self.H2 = 0xC4E680A2L
self.H3 = 0xDC679823L
self.H4 = 0xD0857A34L
def _transform(self, W):
for t in range(16, 80):
W.append(_rotateLeft(
W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL)
A = self.H0
B = self.H1
C = self.H2
D = self.H3
E = self.H4
for t in xrange(0, 80):
TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
E = D
D = C
C = _rotateLeft(B, 30) & 0xffffffffL
B = A
A = TEMP & 0xffffffffL
self.H0 = (self.H0 + A) & 0xffffffffL
self.H1 = (self.H1 + B) & 0xffffffffL
self.H2 = (self.H2 + C) & 0xffffffffL
self.H3 = (self.H3 + D) & 0xffffffffL
self.H4 = (self.H4 + E) & 0xffffffffL
# Down from here all methods follow the Python Standard Library
# API of the sha module.
def update(self, inBuf):
"""Add to the current message.
Update the mssha1 object with the string arg. Repeated calls
are equivalent to a single call with the concatenation of all
the arguments, i.e. s.update(a); s.update(b) is equivalent
to s.update(a+b).
The hash is immediately calculated for all full blocks. The final
calculation is made in digest(). It will calculate 1-2 blocks,
depending on how much padding we have to add. This allows us to
keep an intermediate value for the hash, so that we only need to
make minimal recalculation if we call update() to add more data
to the hashed string.
"""
leninBuf = long(len(inBuf))
# Compute number of bytes mod 64.
index = (self.count[1] >> 3) & 0x3FL
# Update number of bits.
self.count[1] = self.count[1] + (leninBuf << 3)
if self.count[1] < (leninBuf << 3):
self.count[0] = self.count[0] + 1
self.count[0] = self.count[0] + (leninBuf >> 29)
partLen = 64 - index
if leninBuf >= partLen:
self.input[index:] = list(inBuf[:partLen])
self._transform(_bytelist2longBigEndian(self.input))
i = partLen
while i + 63 < leninBuf:
self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
i = i + 64
else:
self.input = list(inBuf[i:leninBuf])
else:
i = 0
self.input = self.input + list(inBuf)
def digest(self):
"""Terminate the message-digest computation and return digest.
Return the digest of the strings passed to the update()
method so far. This is a 16-byte string which may contain
non-ASCII characters, including null bytes.
"""
H0 = self.H0
H1 = self.H1
H2 = self.H2
H3 = self.H3
H4 = self.H4
input = [] + self.input
count = [] + self.count
index = (self.count[1] >> 3) & 0x3fL
if index < 56:
padLen = 56 - index
else:
padLen = 120 - index
padding = ['\200'] + ['\000'] * 63
self.update(padding[:padLen])
# Append length (before padding).
bits = _bytelist2longBigEndian(self.input[:56]) + count
self._transform(bits)
# Store state in digest.
digest = _long2bytesBigEndian(self.H0, 4) + \
_long2bytesBigEndian(self.H1, 4) + \
_long2bytesBigEndian(self.H2, 4) + \
_long2bytesBigEndian(self.H3, 4) + \
_long2bytesBigEndian(self.H4, 4)
self.H0 = H0
self.H1 = H1
self.H2 = H2
self.H3 = H3
self.H4 = H4
self.input = input
self.count = count
return digest
def hexdigest(self):
"""Terminate and return digest in HEX form.
Like digest() except the digest is returned as a string of
length 32, containing only hexadecimal digits. This may be
used to exchange the value safely in email or other non-
binary environments.
"""
return ''.join(['%02x' % ord(c) for c in self.digest()])
def copy(self):
"""Return a clone object.
Return a copy ('clone') of the md5 object. This can be used
to efficiently compute the digests of strings that share
a common initial substring.
"""
return copy.deepcopy(self)
# ======================================================================
# Mimic Python top-level functions from standard library API
# for consistency with the md5 module of the standard library.
# ======================================================================
# These are mandatory variables in the module. They have constant values
# in the SHA standard.
digest_size = digestsize = 20
blocksize = 1
def new(arg=None):
"""Return a new mssha1 crypto object.
If arg is present, the method call update(arg) is made.
"""
crypto = mssha1()
if arg:
crypto.update(arg)
return crypto
if __name__ == '__main__':
def main():
import sys
file = None
if len(sys.argv) > 2:
print "usage: %s [FILE]" % sys.argv[0]
return
elif len(sys.argv) < 2:
file = sys.stdin
else:
file = open(sys.argv[1], 'rb')
context = new()
data = file.read(16384)
while data:
context.update(data)
data = file.read(16384)
file.close()
digest = context.hexdigest().upper()
for i in xrange(0, 40, 8):
print digest[i:i+8],
print
main()

View File

@ -0,0 +1,805 @@
'''
Support for reading LIT files.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
'and Marshall T. Vandegrift <llasram@gmail.com>'
import sys, struct, cStringIO, os
import functools
import codecs
from itertools import repeat
from calibre import relpath
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFReader
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
import calibre.ebooks.lit.mssha1 as mssha1
import calibre.ebooks.lit.msdes as msdes
import calibre.utils.lzx as lzx
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE package
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
"""
HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE html PUBLIC
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
"""
DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
CONTROL_TAG = 4
CONTROL_WINDOW_SIZE = 12
RESET_NENTRIES = 4
RESET_HDRLEN = 12
RESET_UCLENGTH = 16
RESET_INTERVAL = 32
FLAG_OPENING = (1 << 0)
FLAG_CLOSING = (1 << 1)
FLAG_BLOCK = (1 << 2)
FLAG_HEAD = (1 << 3)
FLAG_ATOM = (1 << 4)
XML_ENTITIES = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
def u32(bytes):
return struct.unpack('<L', bytes[:4])[0]
def u16(bytes):
return struct.unpack('<H', bytes[:2])[0]
def int32(bytes):
return struct.unpack('<l', bytes[:4])[0]
def encint(bytes, remaining):
pos, val = 0, 0
while remaining > 0:
b = ord(bytes[pos])
pos += 1
remaining -= 1
val <<= 7
val |= (b & 0x7f)
if b & 0x80 == 0: break
return val, bytes[pos:], remaining
def msguid(bytes):
values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
return "{%08lX-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X}" % values
def read_utf8_char(bytes, pos):
c = ord(bytes[pos])
mask = 0x80
if (c & mask):
elsize = 0
while c & mask:
mask >>= 1
elsize += 1
if (mask <= 1) or (mask == 0x40):
raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos]))
else:
elsize = 1
if elsize > 1:
if elsize + pos > len(bytes):
raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos]))
c &= (mask - 1)
for i in range(1, elsize):
b = ord(bytes[pos+i])
if (b & 0xC0) != 0x80:
raise LitError(
'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i]))
c = (c << 6) | (b & 0x3F)
return unichr(c), pos+elsize
class UnBinary(object):
def __init__(self, bin, manifest, map=OPF_MAP):
self.manifest = manifest
self.tag_map, self.attr_map, self.tag_to_attr_map = map
self.opf = map is OPF_MAP
self.bin = bin
self.buf = cStringIO.StringIO()
self.ampersands = []
self.binary_to_text()
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
self.escape_ampersands()
def escape_ampersands(self):
offset = 0
for pos in self.ampersands:
test = self.raw[pos+offset:pos+offset+6]
if test.startswith('&#') and ';' in test:
continue
escape = True
for ent in XML_ENTITIES:
if test.startswith(ent):
escape = False
break
if not escape:
continue
self.raw = '&amp;'.join(
(self.raw[:pos+offset], self.raw[pos+offset+1:]))
offset += 4
def item_path(self, internal_id):
try:
return self.manifest[internal_id].path
except KeyError:
return internal_id
def __unicode__(self):
return self.raw
def binary_to_text(self, base=0, depth=0):
tag_name = current_map = None
dynamic_tag = errors = 0
in_censorship = is_goingdown = False
state = 'text'
index = base
flags = 0
while index < len(self.bin):
c, index = read_utf8_char(self.bin, index)
oc = ord(c)
if state == 'text':
if oc == 0:
state = 'get flags'
continue
elif c == '\v':
c = '\n'
elif c == '&':
self.ampersands.append(self.buf.tell()-1)
self.buf.write(c.encode('utf-8'))
elif state == 'get flags':
if oc == 0:
state = 'text'
continue
flags = oc
state = 'get tag'
elif state == 'get tag':
state = 'text' if oc == 0 else 'get attr'
if flags & FLAG_OPENING:
tag = oc
self.buf.write('<')
if not (flags & FLAG_CLOSING):
is_goingdown = True
if tag == 0x8000:
state = 'get custom length'
continue
if flags & FLAG_ATOM:
raise LitError('TODO: Atoms not yet implemented')
elif tag < len(self.tag_map):
tag_name = self.tag_map[tag]
current_map = self.tag_to_attr_map[tag]
else:
dynamic_tag += 1
errors += 1
tag_name = '?'+unichr(tag)+'?'
current_map = self.tag_to_attr_map[tag]
print 'WARNING: tag %s unknown' % unichr(tag)
self.buf.write(unicode(tag_name).encode('utf-8'))
elif flags & FLAG_CLOSING:
if depth == 0:
raise LitError('Extra closing tag')
return index
elif state == 'get attr':
in_censorship = False
if oc == 0:
if not is_goingdown:
tag_name = None
dynamic_tag = 0
self.buf.write(' />')
else:
self.buf.write('>')
index = self.binary_to_text(base=index, depth=depth+1)
is_goingdown = False
if not tag_name:
raise LitError('Tag ends before it begins.')
self.buf.write(u''.join(
('</', tag_name, '>')).encode('utf-8'))
dynamic_tag = 0
tag_name = None
state = 'text'
else:
if oc == 0x8000:
state = 'get attr length'
continue
attr = None
if oc in current_map and current_map[oc]:
attr = current_map[oc]
elif oc in self.attr_map:
attr = self.attr_map[oc]
if not attr or not isinstance(attr, basestring):
raise LitError(
'Unknown attribute %d in tag %s' % (oc, tag_name))
if attr.startswith('%'):
in_censorship = True
state = 'get value length'
continue
self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
if attr in ['href', 'src']:
state = 'get href length'
else:
state = 'get value length'
elif state == 'get value length':
if not in_censorship:
self.buf.write('"')
count = oc - 1
if count == 0:
if not in_censorship:
self.buf.write('"')
in_censorship = False
state = 'get attr'
continue
state = 'get value'
if oc == 0xffff:
continue
if count < 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count)
elif state == 'get value':
if count == 0xfffe:
if not in_censorship:
self.buf.write('%s"' % (oc - 1))
in_censorship = False
state = 'get attr'
elif count > 0:
if not in_censorship:
self.buf.write(unicode(c).encode('utf-8'))
count -= 1
if count == 0:
if not in_censorship:
self.buf.write('"')
in_censorship = False
state = 'get attr'
elif state == 'get custom length':
count = oc - 1
if count <= 0 or count > len(self.bin)-index:
raise LitError('Invalid character count %d' % count)
dynamic_tag += 1
state = 'get custom'
tag_name = ''
elif state == 'get custom':
tag_name += c
count -= 1
if count == 0:
self.buf.write(unicode(tag_name).encode('utf-8'))
state = 'get attr'
elif state == 'get attr length':
count = oc - 1
if count <= 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count)
self.buf.write(' ')
state = 'get custom attr'
elif state == 'get custom attr':
self.buf.write(unicode(c).encode('utf-8'))
count -= 1
if count == 0:
self.buf.write('=')
state = 'get value length'
elif state == 'get href length':
count = oc - 1
if count <= 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count)
href = ''
state = 'get href'
elif state == 'get href':
href += c
count -= 1
if count == 0:
doc, m, frag = href[1:].partition('#')
path = self.item_path(doc)
if m and frag:
path += m + frag
self.buf.write((u'"%s"' % path).encode('utf-8'))
state = 'get attr'
return index
class DirectoryEntry(object):
def __init__(self, name, section, offset, size):
self.name = name
self.section = section
self.offset = offset
self.size = size
def __repr__(self):
return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
% (repr(self.name), self.section, self.offset, self.size)
def __str__(self):
return repr(self)
class ManifestItem(object):
def __init__(self, original, internal, mime_type, offset, root, state):
self.original = original
self.internal = internal
self.mime_type = mime_type
self.offset = offset
self.root = root
self.state = state
# Some paths in Fictionwise "multiformat" LIT files contain '..' (!?)
nodes = original.split('/')
path = []
for node in nodes:
if node == '..':
if path: path.pop()
continue
path.append(node)
self.path = os.path.join(*path)
def __eq__(self, other):
if hasattr(other, 'internal'):
return self.internal == other.internal
return self.internal == other
def __repr__(self):
return "ManifestItem(internal=%s, path=%s)" \
% (repr(self.internal), repr(self.path))
def preserve(function):
def wrapper(self, *args, **kwargs):
opos = self._stream.tell()
try:
return function(self, *args, **kwargs)
finally:
self._stream.seek(opos)
functools.update_wrapper(wrapper, function)
return wrapper
class LitReader(object):
PIECE_SIZE = 16
def magic():
@preserve
def fget(self):
self._stream.seek(0)
return self._stream.read(8)
return property(fget=fget)
magic = magic()
def version():
def fget(self):
self._stream.seek(8)
return u32(self._stream.read(4))
return property(fget=fget)
version = version()
def hdr_len():
@preserve
def fget(self):
self._stream.seek(12)
return int32(self._stream.read(4))
return property(fget=fget)
hdr_len = hdr_len()
def num_pieces():
@preserve
def fget(self):
self._stream.seek(16)
return int32(self._stream.read(4))
return property(fget=fget)
num_pieces = num_pieces()
def sec_hdr_len():
@preserve
def fget(self):
self._stream.seek(20)
return int32(self._stream.read(4))
return property(fget=fget)
sec_hdr_len = sec_hdr_len()
def guid():
@preserve
def fget(self):
self._stream.seek(24)
return self._stream.read(16)
return property(fget=fget)
guid = guid()
def header():
@preserve
def fget(self):
size = self.hdr_len \
+ (self.num_pieces * self.PIECE_SIZE) \
+ self.sec_hdr_len
self._stream.seek(0)
return self._stream.read(size)
return property(fget=fget)
header = header()
def __init__(self, filename_or_stream):
if hasattr(filename_or_stream, 'read'):
self._stream = filename_or_stream
else:
self._stream = open(filename_or_stream, 'rb')
if self.magic != 'ITOLITLS':
raise LitError('Not a valid LIT file')
if self.version != 1:
raise LitError('Unknown LIT version %d'%(self.version,))
self.entries = {}
self._read_secondary_header()
self._read_header_pieces()
self._read_section_names()
self._read_manifest()
self._read_meta()
self._read_drm()
@preserve
def __len__(self):
self._stream.seek(0, 2)
return self._stream.tell()
@preserve
def _read_raw(self, offset, size):
self._stream.seek(offset)
return self._stream.read(size)
def _read_content(self, offset, size):
return self._read_raw(self.content_offset + offset, size)
def _read_secondary_header(self):
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
bytes = self._read_raw(offset, self.sec_hdr_len)
offset = int32(bytes[4:])
while offset < len(bytes):
blocktype = bytes[offset:offset+4]
blockver = u32(bytes[offset+4:])
if blocktype == 'CAOL':
if blockver != 2:
raise LitError(
'Unknown CAOL block format %d' % blockver)
self.creator_id = u32(bytes[offset+12:])
self.entry_chunklen = u32(bytes[offset+20:])
self.count_chunklen = u32(bytes[offset+24:])
self.entry_unknown = u32(bytes[offset+28:])
self.count_unknown = u32(bytes[offset+32:])
offset += 48
elif blocktype == 'ITSF':
if blockver != 4:
raise LitError(
'Unknown ITSF block format %d' % blockver)
if u32(bytes[offset+4+16:]):
raise LitError('This file has a 64bit content offset')
self.content_offset = u32(bytes[offset+16:])
self.timestamp = u32(bytes[offset+24:])
self.language_id = u32(bytes[offset+28:])
offset += 48
if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset')
def _read_header_pieces(self):
src = self.header[self.hdr_len:]
for i in range(self.num_pieces):
piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
raise LitError('Piece %s has 64bit value' % repr(piece))
offset, size = u32(piece), int32(piece[8:])
piece = self._read_raw(offset, size)
if i == 0:
continue # Dont need this piece
elif i == 1:
if u32(piece[8:]) != self.entry_chunklen or \
u32(piece[12:]) != self.entry_unknown:
raise LitError('Secondary header does not match piece')
self._read_directory(piece)
elif i == 2:
if u32(piece[8:]) != self.count_chunklen or \
u32(piece[12:]) != self.count_unknown:
raise LitError('Secondary header does not match piece')
continue # No data needed from this piece
elif i == 3:
self.piece3_guid = piece
elif i == 4:
self.piece4_guid = piece
def _read_directory(self, piece):
if not piece.startswith('IFCM'):
raise LitError('Header piece #1 is not main directory.')
chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
if (32 + (num_chunks * chunk_size)) != len(piece):
raise LitError('IFCM HEADER has incorrect length')
for i in xrange(num_chunks):
offset = 32 + (i * chunk_size)
chunk = piece[offset:offset + chunk_size]
tag, chunk = chunk[:4], chunk[4:]
if tag != 'AOLL': continue
remaining, chunk = int32(chunk[:4]), chunk[4:]
if remaining >= chunk_size:
raise LitError('AOLL remaining count is negative')
remaining = chunk_size - (remaining + 48)
entries = u16(chunk[-2:])
if entries == 0:
# Hopefully will work even without a correct entries count
entries = (2 ** 16) - 1
chunk = chunk[40:]
for j in xrange(entries):
if remaining <= 0: break
namelen, chunk, remaining = encint(chunk, remaining)
if namelen != (namelen & 0x7fffffff):
raise LitError('Directory entry had 64bit name length.')
if namelen > remaining - 3:
raise LitError('Read past end of directory chunk')
name, chunk = chunk[:namelen], chunk[namelen:]
section, chunk, remaining = encint(chunk, remaining)
offset, chunk, remaining = encint(chunk, remaining)
size, chunk, remaining = encint(chunk, remaining)
entry = DirectoryEntry(name, section, offset, size)
self.entries[name] = entry
def _read_section_names(self):
if '::DataSpace/NameList' not in self.entries:
raise LitError('Lit file does not have a valid NameList')
raw = self.get_file('::DataSpace/NameList')
if len(raw) < 4:
raise LitError('Invalid Namelist section')
pos = 4
self.num_sections = u16(raw[2:pos])
self.section_names = [""]*self.num_sections
self.section_data = [None]*self.num_sections
for section in range(self.num_sections):
size = u16(raw[pos:pos+2])
pos += 2
size = size*2 + 2
if pos + size > len(raw):
raise LitError('Invalid Namelist section')
self.section_names[section] = \
raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
pos += size
def _read_manifest(self):
if '/manifest' not in self.entries:
raise LitError('Lit file does not have a valid manifest')
raw = self.get_file('/manifest')
self.manifest = {}
while raw:
slen, raw = ord(raw[0]), raw[1:]
if slen == 0: break
root, raw = raw[:slen].decode('utf8'), raw[slen:]
if not raw:
raise LitError('Truncated manifest')
for state in ['spine', 'not spine', 'css', 'images']:
num_files, raw = int32(raw), raw[4:]
if num_files == 0: continue
for i in xrange(num_files):
if len(raw) < 5:
raise LitError('Truncated manifest')
offset, raw = u32(raw), raw[4:]
slen, raw = ord(raw[0]), raw[1:]
internal, raw = raw[:slen].decode('utf8'), raw[slen:]
slen, raw = ord(raw[0]), raw[1:]
original, raw = raw[:slen].decode('utf8'), raw[slen:]
slen, raw = ord(raw[0]), raw[1:]
mime_type, raw = raw[:slen].decode('utf8'), raw[slen+1:]
self.manifest[internal] = ManifestItem(
original, internal, mime_type, offset, root, state)
mlist = self.manifest.values()
shared = mlist[0].path
for item in mlist[1:]:
path = item.path
while not path.startswith(shared):
shared = shared[:-1]
if shared == '':
break
else:
slen = len(shared)
for item in mlist:
item.path = item.path[slen:]
def _read_meta(self):
raw = self.get_file('/meta')
try:
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
except LitError:
if 'PENGUIN group' not in raw: raise
print "WARNING: attempting PENGUIN malformed OPF fix"
raw = raw.replace(
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
self.meta = xml
def _read_drm(self):
self.drmlevel = 0
if '/DRMStorage/Licenses/EUL' in self.entries:
self.drmlevel = 5
elif '/DRMStorage/DRMBookplate' in self.entries:
self.drmlevel = 3
elif '/DRMStorage/DRMSealed' in self.entries:
self.drmlevel = 1
else:
return
des = msdes.new(self._calculate_deskey())
bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed'))
if bookkey[0] != '\000':
raise LitError('Unable to decrypt title key!')
self.bookkey = bookkey[1:9]
def _calculate_deskey(self):
hashfiles = ['/meta', '/DRMStorage/DRMSource']
if self.drmlevel == 3:
hashfiles.append('/DRMStorage/DRMBookplate')
prepad = 2
hash = mssha1.new()
for name in hashfiles:
data = self.get_file(name)
if prepad > 0:
data = ("\000" * prepad) + data
prepad = 0
postpad = 64 - (len(data) % 64)
if postpad < 64:
data = data + ("\000" * postpad)
hash.update(data)
digest = hash.digest()
key = [0] * 8
for i in xrange(0, len(digest)):
key[i % 8] ^= ord(digest[i])
return ''.join(chr(x) for x in key)
def get_markup_file(self, name):
raw = self.get_file(name)
decl, map = (OPF_DECL, OPF_MAP) \
if name == '/meta' else (HTML_DECL, HTML_MAP)
xml = decl + unicode(UnBinary(raw, self.manifest, map))
return xml
def get_file(self, name):
entry = self.entries[name]
if entry.section == 0:
return self._read_content(entry.offset, entry.size)
section = self.get_section(entry.section)
return section[entry.offset:entry.offset+entry.size]
def get_section(self, section):
data = self.section_data[section]
if not data:
data = self._get_section(section)
self.section_data[section] = data
return data
def _get_section(self, section):
name = self.section_names[section]
path = '::DataSpace/Storage/' + name
transform = self.get_file(path + '/Transform/List')
content = self.get_file(path + '/Content')
control = self.get_file(path + '/ControlData')
while len(transform) >= 16:
csize = (int32(control) + 1) * 4
if csize > len(control) or csize <= 0:
raise LitError("ControlData is too short")
guid = msguid(transform)
if guid == DESENCRYPT_GUID:
content = self._decrypt(content)
control = control[csize:]
elif guid == LZXCOMPRESS_GUID:
reset_table = self.get_file(
'/'.join(('::DataSpace/Storage', name, 'Transform',
LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
content = self._decompress(content, control, reset_table)
control = control[csize:]
else:
raise LitError("Unrecognized transform: %s." % repr(guid))
transform = transform[16:]
return content
def _decrypt(self, content):
if self.drmlevel == 5:
raise LitError('Cannot extract content from a DRM protected ebook')
return msdes.new(self.bookkey).decrypt(content)
def _decompress(self, content, control, reset_table):
if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC":
raise LitError("Invalid ControlData tag value")
if len(reset_table) < (RESET_INTERVAL + 8):
raise LitError("Reset table is too short")
if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
raise LitError("Reset table has 64bit value for UCLENGTH")
result = []
window_size = 14
u = u32(control[CONTROL_WINDOW_SIZE:])
while u > 0:
u >>= 1
window_size += 1
if window_size < 15 or window_size > 21:
raise LitError("Invalid window in ControlData")
lzx.init(window_size)
ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8
uclength = int32(reset_table[RESET_UCLENGTH:])
accum = int32(reset_table[RESET_INTERVAL:])
bytes_remaining = uclength
window_bytes = (1 << window_size)
base = 0
while ofs_entry < len(reset_table):
if accum >= window_bytes:
accum = 0
size = int32(reset_table[ofs_entry:])
u = int32(reset_table[ofs_entry + 4:])
if u != 0:
raise LitError("Reset table entry greater than 32 bits")
if size >= len(content):
raise("Reset table entry out of bounds")
if bytes_remaining >= window_bytes:
lzx.reset()
result.append(
lzx.decompress(content[base:size], window_bytes))
bytes_remaining -= window_bytes
base = size
accum += int32(reset_table[RESET_INTERVAL:])
ofs_entry += 8
if bytes_remaining < window_bytes and bytes_remaining > 0:
lzx.reset()
result.append(lzx.decompress(content[base:], bytes_remaining))
bytes_remaining = 0
if bytes_remaining > 0:
raise LitError("Failed to completely decompress section")
return ''.join(result)
def extract_content(self, output_dir=os.getcwdu()):
output_dir = os.path.abspath(output_dir)
try:
opf_path = os.path.splitext(
os.path.basename(self._stream.name))[0] + '.opf'
except AttributeError:
opf_path = 'content.opf'
opf_path = os.path.join(output_dir, opf_path)
self._ensure_dir(opf_path)
with open(opf_path, 'w') as f:
f.write(self.meta.encode('utf-8'))
for entry in self.manifest.values():
path = os.path.join(output_dir, entry.path)
self._ensure_dir(path)
with open(path, 'w') as f:
if 'spine' in entry.state:
name = '/'.join(('/data', entry.internal, 'content'))
f.write(self.get_markup_file(name).encode('utf-8'))
else:
name = '/'.join(('/data', entry.internal))
f.write(self.get_file(name))
def _ensure_dir(self, path):
dir = os.path.dirname(path)
if not os.path.isdir(dir):
os.makedirs(dir)
def option_parser():
from calibre import OptionParser
parser = OptionParser(usage=_('%prog [options] LITFILE'))
parser.add_option(
'-o', '--output-dir', default='.',
help=_('Output directory. Defaults to current directory.'))
parser.add_option(
'--verbose', default=False, action='store_true',
help='Useful for debugging.')
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
lr = LitReader(args[1])
lr.extract_content(opts.output_dir)
print _('OEB ebook created in'), opts.output_dir
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,734 +1,25 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Support for reading the metadata from a lit file.
Support for reading the metadata from a LIT file.
'''
import sys, struct, cStringIO, os
from itertools import repeat
import sys, cStringIO, os
from calibre import relpath
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFReader
OPF_ATTR_MAP = [
None,
"href",
"%never-used",
"%guid",
"%minimum_level",
"%attr5",
"id",
"href",
"media-type",
"fallback",
"idref",
"xmlns:dc",
"xmlns:oebpackage",
"role",
"file-as",
"event",
"scheme",
"title",
"type",
"unique-identifier",
"name",
"content",
"xml:lang",
]
OPF_TAG_MAP = [
None,
"package",
"dc:Title",
"dc:Creator",
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
"manifest",
"item",
"spine",
"itemref",
"metadata",
"dc-metadata",
"dc:Subject",
"dc:Description",
"dc:Publisher",
"dc:Contributor",
"dc:Date",
"dc:Type",
"dc:Format",
"dc:Identifier",
"dc:Source",
"dc:Language",
"dc:Relation",
"dc:Coverage",
"dc:Rights",
"x-metadata",
"meta",
"tours",
"tour",
"site",
"guide",
"reference",
None,
]
class DirectoryEntry(object):
def __init__(self, name, section, offset, size):
self.name = name
self.section = section
self.offset = offset
self.size = size
def __repr__(self):
return '%s\n\tSection: %d\n\tOffset: %d\n\tSize: %d\n'%(self.name,
self.section, self.offset, self.size)
def __str__(self):
return repr(self)
class LitReadError(Exception):
pass
def u32(bytes):
b = struct.unpack('BBBB', bytes[:4])
return b[0] + (b[1] << 8) + (b[2] << 16) + (b[3] << 32)
def u16(bytes):
b = struct.unpack('BB', bytes[:2])
return b[0] + (b[1] << 8)
def int32(bytes):
return u32(bytes)&0x7FFFFFFF
def encint(bytes, remaining):
pos, val = 0, 0
while remaining > 0:
b = ord(bytes[pos])
pos += 1
remaining -= 1
val <<= 7
val |= (b & 0x7f)
if b & 0x80 == 0: break
return val, bytes[pos:], remaining
def read_utf8_char(bytes, pos):
c = ord(bytes[pos])
mask = 0x80
if (c & mask):
elsize = 0
while c & mask:
mask >>= 1
elsize += 1
if (mask <= 1) or (mask == 0x40):
raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos])))
else:
elsize = 1
if elsize > 1:
if elsize + pos > len(bytes):
raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos])))
c &= (mask - 1)
for i in range(1, elsize):
b = ord(bytes[pos+i])
if (b & 0xC0) != 0x80:
raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos:pos+i])))
c = (c << 6) | (b & 0x3F)
return unichr(c), pos+elsize
FLAG_OPENING = 1
FLAG_CLOSING = 2
FLAG_BLOCK = 4
FLAG_HEAD = 8
FLAG_ATOM = 16
XML_ENTITIES = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
class UnBinary(object):
def __init__(self, bin, manifest, attr_map=OPF_ATTR_MAP, tag_map=OPF_TAG_MAP,
tag_to_attr_map=[[] for i in range(43)]):
self.manifest = manifest
self.pending_indent = 0
self.lingering_space = 0
self.was_in_text = 0
self.attr_map = attr_map
self.tag_map = tag_map
self.tag_to_attr_map = tag_to_attr_map
self.opf = self.attr_map is OPF_ATTR_MAP
self.bin = bin
self.buf = cStringIO.StringIO()
self.ampersands = []
self.binary_to_text()
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
self.escape_ampersands()
def escape_ampersands(self):
offset = 0
for pos in self.ampersands:
test = self.raw[pos+offset:pos+offset+6]
if test.startswith('&#') and ';' in test:
continue
escape = True
for ent in XML_ENTITIES:
if test.startswith(ent):
escape = False
break
if not escape:
continue
self.raw = self.raw[:pos+offset] + '&amp;' + self.raw[pos+offset+1:]
offset += 4
def write_spaces(self, depth):
self.buf.write(u''.join(repeat(' ', depth)))
def item_path(self, internal_id):
for i in self.manifest:
if i == internal_id:
return i.path
raise LitReadError('Could not find item %s'%(internal_id,))
def __unicode__(self):
return self.raw
def binary_to_text(self, base=0, depth=0):
space_enabled, saved_space_enabled = 1, 0
was_indented, is_goingdown = 0, 0
tag_name = current_map = None
dynamic_tag = errors = in_censorship = 0
state = 'text'
index = base
flags = 0
while index < len(self.bin):
c, index = read_utf8_char(self.bin, index)
if state == 'text':
if ord(c) == 0:
state = 'get flags'
continue
if (not self.was_in_text) or space_enabled:
space_enabled = 0;
if c in (' ', '\t', '\n', '\r'):
space_enabled += 1
else:
self.was_in_text = 1
if c == '\v':
c = '\n'
pending_indent = 0
if c == '&':
self.ampersands.append(self.buf.tell()-1)
self.buf.write(c.encode('utf-8') if isinstance(c, unicode) else c)
elif state == 'get flags':
if ord(c) == 0:
state = 'text'
continue
flags = ord(c)
state = 'get tag'
elif state == 'get tag':
state = 'text' if ord(c) == 0 else 'get attr'
if flags & FLAG_OPENING:
if space_enabled and ((not self.was_in_text) or (flags &(FLAG_BLOCK|FLAG_HEAD))):
self.pending_indent += 1
if self.pending_indent or self.opf:
was_indented += 1
self.buf.write(u'\n')
self.write_spaces(depth)
pending_indent = 0
if (flags & FLAG_HEAD) or (flags & FLAG_BLOCK) or \
self.opf or depth == 0:
pending_indent = 1
tag = ord(c)
self.buf.write('<')
if not (flags & FLAG_CLOSING):
is_goingdown = 1
if tag == 0x8000:
state = 'get custom length'
continue
if flags & FLAG_ATOM:
raise LitReadError('TODO: Atoms not yet implemented')
elif tag < len(self.tag_map):
tag_name = self.tag_map[tag]
current_map = self.tag_to_attr_map[tag]
else:
dynamic_tag += 1
errors += 1
tag_name = '?'+unichr(tag)+'?'
current_map = self.tag_to_attr_map[tag]
print 'WARNING: tag %s unknown'%(unichr(tag),)
self.buf.write(unicode(tag_name).encode('utf-8'))
elif flags & FLAG_CLOSING:
#if depth == 0:
# raise LitReadError('Extra closing tag')
self.lingering_space = space_enabled
return index
elif state == 'get attr':
in_censorship = 0
if ord(c) == 0:
if not is_goingdown:
tag_name = None
dynamic_tag = 0
self.buf.write(' />')
else:
self.buf.write('>')
if not self.opf and (flags & (FLAG_BLOCK|FLAG_HEAD)):
pending_indent += 1
index = self.binary_to_text(base=index, depth=depth+1)
is_goingdown = 0
if not tag_name:
raise LitReadError('Tag ends before it begins.')
saved_space_enabled = space_enabled
space_enabled = self.lingering_space
if space_enabled and was_indented and not self.was_in_text:
self.buf.write('\n')
self.write_spaces(depth)
self.buf.write('</'+tag_name+'>')
if (space_enabled and self.opf) or (flags & (FLAG_BLOCK|FLAG_HEAD)):
self.pending_indent += 1
dynamic_tag = 0
tag_name = None
space_enabled = saved_space_enabled
self.was_in_text = 0
state = 'text'
else:
if ord(c) == 0x8000:
state = 'get attr length'
continue
attr = None
if ord(c) < len(current_map) and current_map[ord(c)]:
attr = current_map[ord(c)]
elif ord(c) < len(self.attr_map):
attr = self.attr_map[ord(c)]
if not attr or not isinstance(attr, basestring):
raise LitReadError('Unknown attribute %d in tag %s'%(ord(c), tag_name))
if attr.startswith('%'):
in_censorship = 1
state = 'get value length'
continue
self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
if attr in ['href', 'src']:
state = 'get href'
else:
state = 'get value length'
elif state == 'get value length':
if not in_censorship:
self.buf.write('"')
char_count = ord(c) - 1
if not char_count:
if not in_censorship:
self.buf.write('"')
in_censorship = 0
state = 'get attr'
state = 'get value'
if ord(c) == 0xffff:
continue
if char_count < 0 or char_count > len(self.bin)-index:
raise LitReadError('Invalid character count %d'%(char_count,))
elif state == 'get value':
if char_count == 0xfffe:
if not in_censorship:
self.buf.write(str(ord(c)-1))
in_censorship = 0
state = 'get attr'
elif char_count:
if not in_censorship:
self.buf.write(c)
char_count -= 1
if not char_count:
if not in_censorship:
self.buf.write('"')
in_censorship = 0
state = 'get attr'
elif state == 'get custom length':
char_count = ord(c) - 1
if char_count <= 0 or char_count > len(self.bin)-index:
raise LitReadError('Invalid character count %d'%(char_count,))
dynamic_tag += 1
state = 'get custom'
tag_name = ''
elif state == 'get custom':
tag += c
char_count -= 1
if not char_count:
self.buf.write(tag_name)
state = 'get attr'
elif state == 'get attr length':
char_count = ord(c) - 1
if char_count <= 0 or char_count > len(self.bin)-index:
raise LitReadError('Invalid character count %d'%(char_count,))
self.buf.write(' ')
state = 'get custom attr'
elif state == 'get custom attr':
self.buf.write(c)
char_count -= 1
if not char_count:
self.buf.write('=')
state = 'get value length'
elif state == 'get href':
char_count = ord(c) - 1
if char_count <= 0:
raise LitReadError('Invalid character count %d'%(char_count,))
href = self.bin[index+1:index+char_count].decode('ascii')
index += char_count
doc, m, frag = href.partition('#')
path = self.item_path(doc)
if m and frag:
path += m+frag
self.buf.write((u'"%s"'%(path,)).encode('utf-8'))
state = 'get attr'
self.lingering_space = space_enabled
return index
class ManifestItem(object):
def __init__(self, original, internal, mime_type, offset, root, state):
self.original = original
self.internal = internal
self.mime_type = mime_type
self.offset = offset
self.root = root
self.state = state
self.prefix = 'images' if state == 'images' else 'css' if state == 'css' else ''
self.prefix = self.prefix + os.sep if self.prefix else ''
self.path = self.prefix + self.original
def __eq__(self, other):
if hasattr(other, 'internal'):
return self.internal == other.internal
return self.internal == other
def __repr__(self):
return self.internal + u'->' + self.path
class LitFile(object):
PIECE_SIZE = 16
@apply
def magic():
def fget(self):
opos = self._stream.tell()
self._stream.seek(0)
val = self._stream.read(8)
self._stream.seek(opos)
return val
return property(fget=fget)
@apply
def version():
def fget(self):
opos = self._stream.tell()
self._stream.seek(8)
val = u32(self._stream.read(4))
self._stream.seek(opos)
return val
return property(fget=fget)
@apply
def hdr_len():
def fget(self):
opos = self._stream.tell()
self._stream.seek(12)
val = int32(self._stream.read(4))
self._stream.seek(opos)
return val
return property(fget=fget)
@apply
def num_pieces():
def fget(self):
opos = self._stream.tell()
self._stream.seek(16)
val = int32(self._stream.read(4))
self._stream.seek(opos)
return val
return property(fget=fget)
@apply
def sec_hdr_len():
def fget(self):
opos = self._stream.tell()
self._stream.seek(20)
val = int32(self._stream.read(4))
self._stream.seek(opos)
return val
return property(fget=fget)
@apply
def guid():
def fget(self):
opos = self._stream.tell()
self._stream.seek(24)
val = self._stream.read(16)
self._stream.seek(opos)
return val
return property(fget=fget)
@apply
def header():
def fget(self):
opos = self._stream.tell()
size = self.hdr_len + self.num_pieces*self.PIECE_SIZE + self.sec_hdr_len
self._stream.seek(0)
val = self._stream.read(size)
self._stream.seek(opos)
return val
return property(fget=fget)
def __init__(self, stream):
self._stream = stream
if self.magic != 'ITOLITLS':
raise LitReadError('Not a valid LIT file')
if self.version != 1:
raise LitReadError('Unknown LIT version %d'%(self.version,))
self.read_secondary_header()
self.read_header_pieces()
def read_secondary_header(self):
opos = self._stream.tell()
try:
self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
bytes = self._stream.read(self.sec_hdr_len)
offset = int32(bytes[4:])
while offset < len(bytes):
blocktype = bytes[offset:offset+4]
blockver = u32(bytes[offset+4:])
if blocktype == 'CAOL':
if blockver != 2:
raise LitReadError('Unknown CAOL block format %d'%(blockver,))
self.creator_id = u32(bytes[offset+12:])
self.entry_chunklen = u32(bytes[offset+20:])
self.count_chunklen = u32(bytes[offset+24:])
self.entry_unknown = u32(bytes[offset+28:])
self.count_unknown = u32(bytes[offset+32:])
offset += 48
elif blocktype == 'ITSF':
if blockver != 4:
raise LitReadError('Unknown ITSF block format %d'%(blockver,))
if u32(bytes[offset+4+16:]):
raise LitReadError('This file has a 64bit content offset')
self.content_offset = u32(bytes[offset+16:])
self.timestamp = u32(bytes[offset+24:])
self.language_id = u32(bytes[offset+28:])
offset += 48
if not hasattr(self, 'content_offset'):
raise LitReadError('Could not figure out the content offset')
finally:
self._stream.seek(opos)
def read_header_pieces(self):
opos = self._stream.tell()
try:
src = self.header[self.hdr_len:]
for i in range(self.num_pieces):
piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
raise LitReadError('Piece %s has 64bit value'%(repr(piece),))
offset, size = u32(piece), int32(piece[8:])
self._stream.seek(offset)
piece = self._stream.read(size)
if i == 0:
continue # Dont need this piece
elif i == 1:
if u32(piece[8:]) != self.entry_chunklen or \
u32(piece[12:]) != self.entry_unknown:
raise LitReadError('Secondary header does not match piece')
self.read_directory(piece)
elif i == 2:
if u32(piece[8:]) != self.count_chunklen or \
u32(piece[12:]) != self.count_unknown:
raise LitReadError('Secondary header does not match piece')
continue # No data needed from this piece
elif i == 3:
self.piece3_guid = piece
elif i == 4:
self.piece4_guid = piece
finally:
self._stream.seek(opos)
def read_directory(self, piece):
self.entries = []
if not piece.startswith('IFCM'):
raise LitReadError('Header piece #1 is not main directory.')
chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
if 32 + chunk_size*num_chunks != len(piece):
raise LitReadError('IFCM HEADER has incorrect length')
for chunk in range(num_chunks):
p = 32 + chunk*chunk_size
if piece[p:p+4] != 'AOLL':
continue
remaining = chunk_size - int32(piece[p+4:p+8]) - 48
if remaining < 0:
raise LitReadError('AOLL remaining count is negative')
entries = u16(piece[p+chunk_size-2:])
if entries <= 0: # Hopefully everything will work even without a correct entries count
entries = (2**16)-1
piece = piece[p+48:]
i = 0
while i < entries:
if remaining <= 0: break
namelen, piece, remaining = encint(piece, remaining)
if namelen != (namelen & 0x7fffffff):
raise LitReadError('Directory entry had 64bit name length.')
if namelen > remaining - 3:
raise LitReadError('Read past end of directory chunk')
name = piece[:namelen]
piece = piece[namelen:]
section, piece, remaining = encint(piece, remaining)
offset, piece, remaining = encint(piece, remaining)
size, piece, remaining = encint(piece, remaining)
entry = DirectoryEntry(name, section, offset, size)
if name == '::DataSpace/NameList':
self.read_section_names(entry)
elif name == '/manifest':
self.read_manifest(entry)
elif name == '/meta':
self.read_meta(entry)
self.entries.append(entry)
i += 1
if not hasattr(self, 'sections'):
raise LitReadError('Lit file does not have a valid NameList')
if not hasattr(self, 'manifest'):
raise LitReadError('Lit file does not have a valid manifest')
def read_section_names(self, entry):
opos = self._stream.tell()
try:
self._stream.seek(self.content_offset + entry.offset)
raw = self._stream.read(entry.size)
if len(raw) < 4:
raise LitReadError('Invalid Namelist section')
pos = 4
self.num_sections = u16(raw[2:pos])
self.sections = {}
for section in range(self.num_sections):
size = u16(raw[pos:pos+2])
pos += 2
size = size*2 + 2
if pos + size > len(raw):
raise LitReadError('Invalid Namelist section')
self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
pos += size
finally:
self._stream.seek(opos)
def read_manifest(self, entry):
opos = self._stream.tell()
try:
self.manifest = []
self._stream.seek(self.content_offset + entry.offset)
raw = self._stream.read(entry.size)
pos = 0
while pos < len(raw):
size = ord(raw[pos])
if size == 0: break
pos += 1
root = raw[pos:pos+size].decode('utf8')
pos += size
if pos >= len(raw):
raise LitReadError('Truncated manifest.')
for state in ['spine', 'not spine', 'css', 'images']:
num_files = int32(raw[pos:pos+4])
pos += 4
if num_files == 0: continue
i = 0
while i < num_files:
if pos+5 >= len(raw):
raise LitReadError('Truncated manifest.')
offset = u32(raw[pos:pos+4])
pos += 4
slen = ord(raw[pos])
pos += 1
internal = raw[pos:pos+slen].decode('utf8')
pos += slen
slen = ord(raw[pos])
pos += 1
original = raw[pos:pos+slen].decode('utf8')
pos += slen
slen = ord(raw[pos])
pos += 1
mime_type = raw[pos:pos+slen].decode('utf8')
pos += slen +1
self.manifest.append(ManifestItem(original, internal, mime_type, offset, root, state))
i += 1
finally:
self._stream.seek(opos)
def read_meta(self, entry):
opos = self._stream.tell()
try:
self._stream.seek(self.content_offset + entry.offset)
raw = self._stream.read(entry.size)
xml = \
'''\
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE package
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
'''+\
unicode(UnBinary(raw, self.manifest))
self.meta = xml
finally:
self._stream.seek(opos)
def read_image(self, internal_name):
cover_entry = None
for entry in self.entries:
if internal_name in entry.name:
cover_entry = entry
break
opos = self._stream.tell()
try:
self._stream.seek(self.content_offset + cover_entry.offset)
return self._stream.read(cover_entry.size)
finally:
self._stream.seek(opos)
from calibre.ebooks.lit.reader import LitReader
def get_metadata(stream):
try:
litfile = LitFile(stream)
litfile = LitReader(stream)
src = litfile.meta.encode('utf-8')
mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd())
cover_url, cover_item = mi.cover, None
if cover_url:
cover_url = relpath(cover_url, os.getcwd())
for item in litfile.manifest:
for item in litfile.manifest.values():
if item.path == cover_url:
cover_item = item.internal
if cover_item is not None:
@ -737,26 +28,28 @@ def get_metadata(stream):
ext = 'jpg'
else:
ext = ext.lower()
cd = litfile.read_image(cover_item)
mi.cover_data = (ext, cd) if cd else (None, None)
cd = litfile.get_file('/data/' + cover_item)
mi.cover_data = (ext, cd) if cd else (None, None)
except:
title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown'
mi = MetaInformation(title, ['Unknown'])
return mi
def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, _('Usage: %s file.lit')%(args[0],)
print >>sys.stderr, _('Usage: %s file.lit') % args[0]
return 1
mi = get_metadata(open(args[1], 'rb'))
fname = args[1]
mi = get_metadata(open(fname, 'rb'))
print unicode(mi)
if mi.cover_data[1]:
cover = os.path.abspath(os.path.splitext(os.path.basename(args[1]))[0] + '.' + mi.cover_data[0])
cover = os.path.abspath(
'.'.join((os.path.splitext(os.path.basename(fname))[0],
mi.cover_data[0])))
open(cover, 'wb').write(mi.cover_data[1])
print _('Cover saved to'), cover
return 0
if __name__ == '__main__':
sys.exit(main())
sys.exit(main())

View File

@ -46,6 +46,7 @@ entry_points = {
'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main',
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
'lit2oeb = calibre.ebooks.lit.reader:main',
'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main',
'calibre-fontconfig = calibre.utils.fontconfig:main',

View File

@ -0,0 +1,5 @@
from distutils.core import setup, Extension
setup(name="lzx", version="1.0",
ext_modules=[Extension('lzx', sources=['lzx/lzxmodule.c', 'lzx/lzxd.c'],
include_dirs=['lzx'])])

169
src/calibre/utils/lzx/lzx.h Normal file
View File

@ -0,0 +1,169 @@
/* This file is part of libmspack.
* (C) 2003-2004 Stuart Caie.
*
* The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
* by Microsoft Corporation.
*
* libmspack is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
*
* For further details, see the file COPYING.LIB distributed with libmspack
*/
#include <sys/types.h>
#ifndef MSPACK_LZX_H
#define MSPACK_LZX_H 1
/* LZX compression / decompression definitions */
/* some constants defined by the LZX specification */
#define LZX_MIN_MATCH (2)
#define LZX_MAX_MATCH (257)
#define LZX_NUM_CHARS (256)
#define LZX_BLOCKTYPE_INVALID (0) /* also blocktypes 4-7 invalid */
#define LZX_BLOCKTYPE_VERBATIM (1)
#define LZX_BLOCKTYPE_ALIGNED (2)
#define LZX_BLOCKTYPE_UNCOMPRESSED (3)
#define LZX_PRETREE_NUM_ELEMENTS (20)
#define LZX_ALIGNED_NUM_ELEMENTS (8) /* aligned offset tree #elements */
#define LZX_NUM_PRIMARY_LENGTHS (7) /* this one missing from spec! */
#define LZX_NUM_SECONDARY_LENGTHS (249) /* length tree #elements */
/* LZX huffman defines: tweak tablebits as desired */
#define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS)
#define LZX_PRETREE_TABLEBITS (6)
#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)
#define LZX_MAINTREE_TABLEBITS (12)
#define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1)
#define LZX_LENGTH_TABLEBITS (12)
#define LZX_ALIGNED_MAXSYMBOLS (LZX_ALIGNED_NUM_ELEMENTS)
#define LZX_ALIGNED_TABLEBITS (7)
#define LZX_LENTABLE_SAFETY (64) /* table decoding overruns are allowed */
#define LZX_FRAME_SIZE (32768) /* the size of a frame in LZX */
struct lzxd_stream {
struct mspack_system *sys; /* I/O routines */
struct mspack_file *input; /* input file handle */
struct mspack_file *output; /* output file handle */
off_t offset; /* number of bytes actually output */
off_t length; /* overall decompressed length of stream */
unsigned char *window; /* decoding window */
unsigned int window_size; /* window size */
unsigned int window_posn; /* decompression offset within window */
unsigned int frame_posn; /* current frame offset within in window */
unsigned int frame; /* the number of 32kb frames processed */
unsigned int reset_interval; /* which frame do we reset the compressor? */
unsigned int R0, R1, R2; /* for the LRU offset system */
unsigned int block_length; /* uncompressed length of this LZX block */
unsigned int block_remaining; /* uncompressed bytes still left to decode */
signed int intel_filesize; /* magic header value used for transform */
signed int intel_curpos; /* current offset in transform space */
unsigned char intel_started; /* has intel E8 decoding started? */
unsigned char block_type; /* type of the current block */
unsigned char header_read; /* have we started decoding at all yet? */
unsigned char posn_slots; /* how many posn slots in stream? */
unsigned char input_end; /* have we reached the end of input? */
int error;
/* I/O buffering */
unsigned char *inbuf, *i_ptr, *i_end, *o_ptr, *o_end;
unsigned int bit_buffer, bits_left, inbuf_size;
/* huffman code lengths */
unsigned char PRETREE_len [LZX_PRETREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
unsigned char MAINTREE_len [LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
unsigned char LENGTH_len [LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
unsigned char ALIGNED_len [LZX_ALIGNED_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
/* huffman decoding tables */
unsigned short PRETREE_table [(1 << LZX_PRETREE_TABLEBITS) +
(LZX_PRETREE_MAXSYMBOLS * 2)];
unsigned short MAINTREE_table[(1 << LZX_MAINTREE_TABLEBITS) +
(LZX_MAINTREE_MAXSYMBOLS * 2)];
unsigned short LENGTH_table [(1 << LZX_LENGTH_TABLEBITS) +
(LZX_LENGTH_MAXSYMBOLS * 2)];
unsigned short ALIGNED_table [(1 << LZX_ALIGNED_TABLEBITS) +
(LZX_ALIGNED_MAXSYMBOLS * 2)];
/* this is used purely for doing the intel E8 transform */
unsigned char e8_buf[LZX_FRAME_SIZE];
};
/* allocates LZX decompression state for decoding the given stream.
*
* - returns NULL if window_bits is outwith the range 15 to 21 (inclusive).
*
* - uses system->alloc() to allocate memory
*
* - returns NULL if not enough memory
*
* - window_bits is the size of the LZX window, from 32Kb (15) to 2Mb (21).
*
* - reset_interval is how often the bitstream is reset, measured in
* multiples of 32Kb bytes output. For CAB LZX streams, this is always 0
* (does not occur).
*
* - input_buffer_size is how many bytes to use as an input bitstream buffer
*
* - output_length is the length in bytes of the entirely decompressed
* output stream, if known in advance. It is used to correctly perform
* the Intel E8 transformation, which must stop 6 bytes before the very
* end of the decompressed stream. It is not otherwise used or adhered
* to. If the full decompressed length is known in advance, set it here.
* If it is NOT known, use the value 0, and call lzxd_set_output_length()
* once it is known. If never set, 4 of the final 6 bytes of the output
* stream may be incorrect.
*/
extern struct lzxd_stream *lzxd_init(struct mspack_system *system,
struct mspack_file *input,
struct mspack_file *output,
int window_bits,
int reset_interval,
int input_buffer_size,
off_t output_length);
/* see description of output_length in lzxd_init() */
extern void lzxd_set_output_length(struct lzxd_stream *lzx,
off_t output_length);
/* decompresses, or decompresses more of, an LZX stream.
*
* - out_bytes of data will be decompressed and the function will return
* with an MSPACK_ERR_OK return code.
*
* - decompressing will stop as soon as out_bytes is reached. if the true
* amount of bytes decoded spills over that amount, they will be kept for
* a later invocation of lzxd_decompress().
*
* - the output bytes will be passed to the system->write() function given in
* lzxd_init(), using the output file handle given in lzxd_init(). More
* than one call may be made to system->write().
*
* - LZX will read input bytes as necessary using the system->read() function
* given in lzxd_init(), using the input file handle given in lzxd_init().
* This will continue until system->read() returns 0 bytes, or an error.
* input streams should convey an "end of input stream" by refusing to
* supply all the bytes that LZX asks for when they reach the end of the
* stream, rather than return an error code.
*
* - if an error code other than MSPACK_ERR_OK is returned, the stream should
* be considered unusable and lzxd_decompress() should not be called again
* on this stream.
*/
extern int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes);
/* frees all state associated with an LZX data stream
*
* - calls system->free() using the system pointer given in lzxd_init()
*/
void lzxd_free(struct lzxd_stream *lzx);
#endif

View File

@ -0,0 +1,905 @@
/* This file is part of libmspack.
* (C) 2003-2004 Stuart Caie.
*
* The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
* by Microsoft Corporation.
*
* libmspack is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
*
* For further details, see the file COPYING.LIB distributed with libmspack
*/
/* LZX decompression implementation */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <mspack.h>
#include <system.h>
#include <lzx.h>
/* Microsoft's LZX document and their implementation of the
* com.ms.util.cab Java package do not concur.
*
* In the LZX document, there is a table showing the correlation between
* window size and the number of position slots. It states that the 1MB
* window = 40 slots and the 2MB window = 42 slots. In the implementation,
* 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
* first slot whose position base is equal to or more than the required
* window size'. This would explain why other tables in the document refer
* to 50 slots rather than 42.
*
* The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
* is not defined in the specification.
*
* The LZX document does not state the uncompressed block has an
* uncompressed length field. Where does this length field come from, so
* we can know how large the block is? The implementation has it as the 24
* bits following after the 3 blocktype bits, before the alignment
* padding.
*
* The LZX document states that aligned offset blocks have their aligned
* offset huffman tree AFTER the main and length trees. The implementation
* suggests that the aligned offset tree is BEFORE the main and length
* trees.
*
* The LZX document decoding algorithm states that, in an aligned offset
* block, if an extra_bits value is 1, 2 or 3, then that number of bits
* should be read and the result added to the match offset. This is
* correct for 1 and 2, but not 3, where just a huffman symbol (using the
* aligned tree) should be read.
*
* Regarding the E8 preprocessing, the LZX document states 'No translation
* may be performed on the last 6 bytes of the input block'. This is
* correct. However, the pseudocode provided checks for the *E8 leader*
* up to the last 6 bytes. If the leader appears between -10 and -7 bytes
* from the end, this would cause the next four bytes to be modified, at
* least one of which would be in the last 6 bytes, which is not allowed
* according to the spec.
*
* The specification states that the huffman trees must always contain at
* least one element. However, many CAB files contain blocks where the
* length tree is completely empty (because there are no matches), and
* this is expected to succeed.
*/
/* LZX decompressor input macros
*
* STORE_BITS stores bitstream state in lzxd_stream structure
* RESTORE_BITS restores bitstream state from lzxd_stream structure
* READ_BITS(var,n) takes N bits from the buffer and puts them in var
* ENSURE_BITS(n) ensures there are at least N bits in the bit buffer.
* PEEK_BITS(n) extracts without removing N bits from the bit buffer
* REMOVE_BITS(n) removes N bits from the bit buffer
*
* These bit access routines work by using the area beyond the MSB and the
* LSB as a free source of zeroes when shifting. This avoids having to
* mask any bits. So we have to know the bit width of the bit buffer
* variable.
*
* The bit buffer datatype should be at least 32 bits wide: it must be
* possible to ENSURE_BITS(16), so it must be possible to add 16 new bits
* to the bit buffer when the bit buffer already has 1 to 15 bits left.
*/
#if HAVE_LIMITS_H
# include <limits.h>
#endif
#ifndef CHAR_BIT
# define CHAR_BIT (8)
#endif
#define BITBUF_WIDTH (sizeof(bit_buffer) * CHAR_BIT)
#define STORE_BITS do { \
lzx->i_ptr = i_ptr; \
lzx->i_end = i_end; \
lzx->bit_buffer = bit_buffer; \
lzx->bits_left = bits_left; \
} while (0)
#define RESTORE_BITS do { \
i_ptr = lzx->i_ptr; \
i_end = lzx->i_end; \
bit_buffer = lzx->bit_buffer; \
bits_left = lzx->bits_left; \
} while (0)
#define ENSURE_BITS(nbits) \
while (bits_left < (nbits)) { \
if (i_ptr >= i_end) { \
if (lzxd_read_input(lzx)) return lzx->error; \
i_ptr = lzx->i_ptr; \
i_end = lzx->i_end; \
} \
bit_buffer |= ((i_ptr[1] << 8) | i_ptr[0]) \
<< (BITBUF_WIDTH - 16 - bits_left); \
bits_left += 16; \
i_ptr += 2; \
}
#define PEEK_BITS(nbits) (bit_buffer >> (BITBUF_WIDTH - (nbits)))
#define REMOVE_BITS(nbits) ((bit_buffer <<= (nbits)), (bits_left -= (nbits)))
#define READ_BITS(val, nbits) do { \
ENSURE_BITS(nbits); \
(val) = PEEK_BITS(nbits); \
REMOVE_BITS(nbits); \
} while (0)
static int lzxd_read_input(struct lzxd_stream *lzx) {
int read = lzx->sys->read(lzx->input, &lzx->inbuf[0], (int)lzx->inbuf_size);
if (read < 0) return lzx->error = MSPACK_ERR_READ;
/* huff decode's ENSURE_BYTES(16) might overrun the input stream, even
* if those bits aren't used, so fake 2 more bytes */
if (read == 0) {
if (lzx->input_end) {
D(("out of input bytes"))
return lzx->error = MSPACK_ERR_READ;
}
else {
read = 2;
lzx->inbuf[0] = lzx->inbuf[1] = 0;
lzx->input_end = 1;
}
}
lzx->i_ptr = &lzx->inbuf[0];
lzx->i_end = &lzx->inbuf[read];
return MSPACK_ERR_OK;
}
/* Huffman decoding macros */
/* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
* bitstream using the stated table and puts it in var.
*/
#define READ_HUFFSYM(tbl, var) do { \
/* huffman symbols can be up to 16 bits long */ \
ENSURE_BITS(16); \
/* immediate table lookup of [tablebits] bits of the code */ \
sym = lzx->tbl##_table[PEEK_BITS(LZX_##tbl##_TABLEBITS)]; \
/* is the symbol is longer than [tablebits] bits? (i=node index) */ \
if (sym >= LZX_##tbl##_MAXSYMBOLS) { \
/* decode remaining bits by tree traversal */ \
i = 1 << (BITBUF_WIDTH - LZX_##tbl##_TABLEBITS); \
do { \
/* one less bit. error if we run out of bits before decode */ \
i >>= 1; \
if (i == 0) { \
D(("out of bits in huffman decode")) \
return lzx->error = MSPACK_ERR_DECRUNCH; \
} \
/* double node index and add 0 (left branch) or 1 (right) */ \
sym <<= 1; sym |= (bit_buffer & i) ? 1 : 0; \
/* hop to next node index / decoded symbol */ \
sym = lzx->tbl##_table[sym]; \
/* while we are still in node indicies, not decoded symbols */ \
} while (sym >= LZX_##tbl##_MAXSYMBOLS); \
} \
/* result */ \
(var) = sym; \
/* look up the code length of that symbol and discard those bits */ \
i = lzx->tbl##_len[sym]; \
REMOVE_BITS(i); \
} while (0)
/* BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */
#define BUILD_TABLE(tbl) \
if (make_decode_table(LZX_##tbl##_MAXSYMBOLS, LZX_##tbl##_TABLEBITS, \
&lzx->tbl##_len[0], &lzx->tbl##_table[0])) \
{ \
D(("failed to build %s table", #tbl)) \
return lzx->error = MSPACK_ERR_DECRUNCH; \
}
/* make_decode_table(nsyms, nbits, length[], table[])
*
* This function was coded by David Tritscher. It builds a fast huffman
* decoding table from a canonical huffman code lengths table.
*
* nsyms = total number of symbols in this huffman tree.
* nbits = any symbols with a code length of nbits or less can be decoded
* in one lookup of the table.
* length = A table to get code lengths from [0 to syms-1]
* table = The table to fill up with decoded symbols and pointers.
*
* Returns 0 for OK or 1 for error
*/
static int make_decode_table(unsigned int nsyms, unsigned int nbits,
unsigned char *length, unsigned short *table)
{
register unsigned short sym;
register unsigned int leaf, fill;
register unsigned char bit_num;
unsigned int pos = 0; /* the current position in the decode table */
unsigned int table_mask = 1 << nbits;
unsigned int bit_mask = table_mask >> 1; /* don't do 0 length codes */
unsigned int next_symbol = bit_mask; /* base of allocation for long codes */
/* fill entries for codes short enough for a direct mapping */
for (bit_num = 1; bit_num <= nbits; bit_num++) {
for (sym = 0; sym < nsyms; sym++) {
if (length[sym] != bit_num) continue;
leaf = pos;
if((pos += bit_mask) > table_mask) return 1; /* table overrun */
/* fill all possible lookups of this symbol with the symbol itself */
for (fill = bit_mask; fill-- > 0;) table[leaf++] = sym;
}
bit_mask >>= 1;
}
/* full table already? */
if (pos == table_mask) return 0;
/* clear the remainder of the table */
for (sym = pos; sym < table_mask; sym++) table[sym] = 0xFFFF;
/* allow codes to be up to nbits+16 long, instead of nbits */
pos <<= 16;
table_mask <<= 16;
bit_mask = 1 << 15;
for (bit_num = nbits+1; bit_num <= 16; bit_num++) {
for (sym = 0; sym < nsyms; sym++) {
if (length[sym] != bit_num) continue;
leaf = pos >> 16;
for (fill = 0; fill < bit_num - nbits; fill++) {
/* if this path hasn't been taken yet, 'allocate' two entries */
if (table[leaf] == 0xFFFF) {
table[(next_symbol << 1)] = 0xFFFF;
table[(next_symbol << 1) + 1] = 0xFFFF;
table[leaf] = next_symbol++;
}
/* follow the path and select either left or right for next bit */
leaf = table[leaf] << 1;
if ((pos >> (15-fill)) & 1) leaf++;
}
table[leaf] = sym;
if ((pos += bit_mask) > table_mask) return 1; /* table overflow */
}
bit_mask >>= 1;
}
/* full table? */
if (pos == table_mask) return 0;
/* either erroneous table, or all elements are 0 - let's find out. */
for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;
return 0;
}
/* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
* first to last in the given table. The code lengths are stored in their
* own special LZX way.
*/
#define READ_LENGTHS(tbl, first, last) do { \
STORE_BITS; \
if (lzxd_read_lens(lzx, &lzx->tbl##_len[0], (first), \
(unsigned int)(last))) return lzx->error; \
RESTORE_BITS; \
} while (0)
static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens,
unsigned int first, unsigned int last)
{
/* bit buffer and huffman symbol decode variables */
register unsigned int bit_buffer;
register int bits_left, i;
register unsigned short sym;
unsigned char *i_ptr, *i_end;
unsigned int x, y;
int z;
RESTORE_BITS;
/* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */
for (x = 0; x < 20; x++) {
READ_BITS(y, 4);
lzx->PRETREE_len[x] = y;
}
BUILD_TABLE(PRETREE);
for (x = first; x < last; ) {
READ_HUFFSYM(PRETREE, z);
if (z == 17) {
/* code = 17, run of ([read 4 bits]+4) zeros */
READ_BITS(y, 4); y += 4;
while (y--) lens[x++] = 0;
}
else if (z == 18) {
/* code = 18, run of ([read 5 bits]+20) zeros */
READ_BITS(y, 5); y += 20;
while (y--) lens[x++] = 0;
}
else if (z == 19) {
/* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */
READ_BITS(y, 1); y += 4;
READ_HUFFSYM(PRETREE, z);
z = lens[x] - z; if (z < 0) z += 17;
while (y--) lens[x++] = z;
}
else {
/* code = 0 to 16, delta current length entry */
z = lens[x] - z; if (z < 0) z += 17;
lens[x++] = z;
}
}
STORE_BITS;
return MSPACK_ERR_OK;
}
/* LZX static data tables:
*
* LZX uses 'position slots' to represent match offsets. For every match,
* a small 'position slot' number and a small offset from that slot are
* encoded instead of one large offset.
*
* position_base[] is an index to the position slot bases
*
* extra_bits[] states how many bits of offset-from-base data is needed.
*/
static unsigned int position_base[51];
static unsigned char extra_bits[51];
static void lzxd_static_init(void) {
int i, j;
for (i = 0, j = 0; i < 51; i += 2) {
extra_bits[i] = j; /* 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7... */
extra_bits[i+1] = j;
if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */
}
for (i = 0, j = 0; i < 51; i++) {
position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */
j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */
}
}
static void lzxd_reset_state(struct lzxd_stream *lzx) {
int i;
lzx->R0 = 1;
lzx->R1 = 1;
lzx->R2 = 1;
lzx->header_read = 0;
lzx->block_remaining = 0;
lzx->block_type = LZX_BLOCKTYPE_INVALID;
/* initialise tables to 0 (because deltas will be applied to them) */
for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0;
for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) lzx->LENGTH_len[i] = 0;
}
/*-------- main LZX code --------*/
struct lzxd_stream *lzxd_init(struct mspack_system *system,
struct mspack_file *input,
struct mspack_file *output,
int window_bits,
int reset_interval,
int input_buffer_size,
off_t output_length)
{
unsigned int window_size = 1 << window_bits;
struct lzxd_stream *lzx;
if (!system) return NULL;
/* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
if (window_bits < 15 || window_bits > 21) return NULL;
input_buffer_size = (input_buffer_size + 1) & -2;
if (!input_buffer_size) return NULL;
/* initialise static data */
lzxd_static_init();
/* allocate decompression state */
if (!(lzx = system->alloc(system, sizeof(struct lzxd_stream)))) {
return NULL;
}
/* allocate decompression window and input buffer */
lzx->window = system->alloc(system, (size_t) window_size);
lzx->inbuf = system->alloc(system, (size_t) input_buffer_size);
if (!lzx->window || !lzx->inbuf) {
system->free(lzx->window);
system->free(lzx->inbuf);
system->free(lzx);
return NULL;
}
/* initialise decompression state */
lzx->sys = system;
lzx->input = input;
lzx->output = output;
lzx->offset = 0;
lzx->length = output_length;
lzx->inbuf_size = input_buffer_size;
lzx->window_size = 1 << window_bits;
lzx->window_posn = 0;
lzx->frame_posn = 0;
lzx->frame = 0;
lzx->reset_interval = reset_interval;
lzx->intel_filesize = 0;
lzx->intel_curpos = 0;
/* window bits: 15 16 17 18 19 20 21
* position slots: 30 32 34 36 38 42 50 */
lzx->posn_slots = ((window_bits == 21) ? 50 :
((window_bits == 20) ? 42 : (window_bits << 1)));
lzx->intel_started = 0;
lzx->input_end = 0;
lzx->error = MSPACK_ERR_OK;
lzx->i_ptr = lzx->i_end = &lzx->inbuf[0];
lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0];
lzx->bit_buffer = lzx->bits_left = 0;
lzxd_reset_state(lzx);
return lzx;
}
void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) {
if (lzx) lzx->length = out_bytes;
}
int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
/* bitstream reading and huffman variables */
register unsigned int bit_buffer;
register int bits_left, i=0;
register unsigned short sym;
unsigned char *i_ptr, *i_end;
int match_length, length_footer, extra, verbatim_bits, bytes_todo;
int this_run, main_element, aligned_bits, j;
unsigned char *window, *runsrc, *rundest, buf[12];
unsigned int frame_size=0, end_frame, match_offset, window_posn;
unsigned int R0, R1, R2;
/* easy answers */
if (!lzx || (out_bytes < 0)) return MSPACK_ERR_ARGS;
if (lzx->error) return lzx->error;
/* flush out any stored-up bytes before we begin */
i = lzx->o_end - lzx->o_ptr;
if ((off_t) i > out_bytes) i = (int) out_bytes;
if (i) {
if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) {
return lzx->error = MSPACK_ERR_WRITE;
}
lzx->o_ptr += i;
lzx->offset += i;
out_bytes -= i;
}
if (out_bytes == 0) return MSPACK_ERR_OK;
/* restore local state */
RESTORE_BITS;
window = lzx->window;
window_posn = lzx->window_posn;
R0 = lzx->R0;
R1 = lzx->R1;
R2 = lzx->R2;
end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1;
while (lzx->frame < end_frame) {
/* have we reached the reset interval? (if there is one?) */
if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) {
if (lzx->block_remaining) {
D(("%d bytes remaining at reset interval", lzx->block_remaining))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
/* re-read the intel header and reset the huffman lengths */
lzxd_reset_state(lzx);
}
/* read header if necessary */
if (!lzx->header_read) {
/* read 1 bit. if bit=0, intel filesize = 0.
* if bit=1, read intel filesize (32 bits) */
j = 0; READ_BITS(i, 1); if (i) { READ_BITS(i, 16); READ_BITS(j, 16); }
lzx->intel_filesize = (i << 16) | j;
lzx->header_read = 1;
}
/* calculate size of frame: all frames are 32k except the final frame
* which is 32kb or less. this can only be calculated when lzx->length
* has been filled in. */
frame_size = LZX_FRAME_SIZE;
if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) {
frame_size = lzx->length - lzx->offset;
}
/* decode until one more frame is available */
bytes_todo = lzx->frame_posn + frame_size - window_posn;
while (bytes_todo > 0) {
/* initialise new block, if one is needed */
if (lzx->block_remaining == 0) {
/* realign if previous block was an odd-sized UNCOMPRESSED block */
if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) &&
(lzx->block_length & 1))
{
if (i_ptr == i_end) {
if (lzxd_read_input(lzx)) return lzx->error;
i_ptr = lzx->i_ptr;
i_end = lzx->i_end;
}
i_ptr++;
}
/* read block type (3 bits) and block length (24 bits) */
READ_BITS(lzx->block_type, 3);
READ_BITS(i, 16); READ_BITS(j, 8);
lzx->block_remaining = lzx->block_length = (i << 8) | j;
/*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/
/* read individual block headers */
switch (lzx->block_type) {
case LZX_BLOCKTYPE_ALIGNED:
/* read lengths of and build aligned huffman decoding tree */
for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; }
BUILD_TABLE(ALIGNED);
/* no break -- rest of aligned header is same as verbatim */
case LZX_BLOCKTYPE_VERBATIM:
/* read lengths of and build main huffman decoding tree */
READ_LENGTHS(MAINTREE, 0, 256);
READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + (lzx->posn_slots << 3));
BUILD_TABLE(MAINTREE);
/* if the literal 0xE8 is anywhere in the block... */
if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1;
/* read lengths of and build lengths huffman decoding tree */
READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
BUILD_TABLE(LENGTH);
break;
case LZX_BLOCKTYPE_UNCOMPRESSED:
/* because we can't assume otherwise */
lzx->intel_started = 1;
/* read 1-16 (not 0-15) bits to align to bytes */
ENSURE_BITS(16);
if (bits_left > 16) i_ptr -= 2;
bits_left = 0; bit_buffer = 0;
/* read 12 bytes of stored R0 / R1 / R2 values */
for (rundest = &buf[0], i = 0; i < 12; i++) {
if (i_ptr == i_end) {
if (lzxd_read_input(lzx)) return lzx->error;
i_ptr = lzx->i_ptr;
i_end = lzx->i_end;
}
*rundest++ = *i_ptr++;
}
R0 = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
R1 = buf[4] | (buf[5] << 8) | (buf[6] << 16) | (buf[7] << 24);
R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24);
break;
default:
D(("bad block type"))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
}
/* decode more of the block:
* run = min(what's available, what's needed) */
this_run = lzx->block_remaining;
if (this_run > bytes_todo) this_run = bytes_todo;
/* assume we decode exactly this_run bytes, for now */
bytes_todo -= this_run;
lzx->block_remaining -= this_run;
/* decode at least this_run bytes */
switch (lzx->block_type) {
case LZX_BLOCKTYPE_VERBATIM:
while (this_run > 0) {
READ_HUFFSYM(MAINTREE, main_element);
if (main_element < LZX_NUM_CHARS) {
/* literal: 0 to LZX_NUM_CHARS-1 */
window[window_posn++] = main_element;
this_run--;
}
else {
/* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
main_element -= LZX_NUM_CHARS;
/* get match length */
match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
READ_HUFFSYM(LENGTH, length_footer);
match_length += length_footer;
}
match_length += LZX_MIN_MATCH;
/* get match offset */
switch ((match_offset = (main_element >> 3))) {
case 0: match_offset = R0; break;
case 1: match_offset = R1; R1=R0; R0 = match_offset; break;
case 2: match_offset = R2; R2=R0; R0 = match_offset; break;
case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break;
default:
extra = extra_bits[match_offset];
READ_BITS(verbatim_bits, extra);
match_offset = position_base[match_offset] - 2 + verbatim_bits;
R2 = R1; R1 = R0; R0 = match_offset;
}
if ((window_posn + match_length) > lzx->window_size) {
D(("match ran over window wrap"))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
/* copy match */
rundest = &window[window_posn];
i = match_length;
/* does match offset wrap the window? */
if (match_offset > window_posn) {
/* j = length from match offset to end of window */
j = match_offset - window_posn;
if (j > (int) lzx->window_size) {
D(("match offset beyond window boundaries"))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
runsrc = &window[lzx->window_size - j];
if (j < i) {
/* if match goes over the window edge, do two copy runs */
i -= j; while (j-- > 0) *rundest++ = *runsrc++;
runsrc = window;
}
while (i-- > 0) *rundest++ = *runsrc++;
}
else {
runsrc = rundest - match_offset;
while (i-- > 0) *rundest++ = *runsrc++;
}
this_run -= match_length;
window_posn += match_length;
}
} /* while (this_run > 0) */
break;
case LZX_BLOCKTYPE_ALIGNED:
while (this_run > 0) {
READ_HUFFSYM(MAINTREE, main_element);
if (main_element < LZX_NUM_CHARS) {
/* literal: 0 to LZX_NUM_CHARS-1 */
window[window_posn++] = main_element;
this_run--;
}
else {
/* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
main_element -= LZX_NUM_CHARS;
/* get match length */
match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
READ_HUFFSYM(LENGTH, length_footer);
match_length += length_footer;
}
match_length += LZX_MIN_MATCH;
/* get match offset */
switch ((match_offset = (main_element >> 3))) {
case 0: match_offset = R0; break;
case 1: match_offset = R1; R1 = R0; R0 = match_offset; break;
case 2: match_offset = R2; R2 = R0; R0 = match_offset; break;
default:
extra = extra_bits[match_offset];
match_offset = position_base[match_offset] - 2;
if (extra > 3) {
/* verbatim and aligned bits */
extra -= 3;
READ_BITS(verbatim_bits, extra);
match_offset += (verbatim_bits << 3);
READ_HUFFSYM(ALIGNED, aligned_bits);
match_offset += aligned_bits;
}
else if (extra == 3) {
/* aligned bits only */
READ_HUFFSYM(ALIGNED, aligned_bits);
match_offset += aligned_bits;
}
else if (extra > 0) { /* extra==1, extra==2 */
/* verbatim bits only */
READ_BITS(verbatim_bits, extra);
match_offset += verbatim_bits;
}
else /* extra == 0 */ {
/* ??? not defined in LZX specification! */
match_offset = 1;
}
/* update repeated offset LRU queue */
R2 = R1; R1 = R0; R0 = match_offset;
}
if ((window_posn + match_length) > lzx->window_size) {
D(("match ran over window wrap"))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
/* copy match */
rundest = &window[window_posn];
i = match_length;
/* does match offset wrap the window? */
if (match_offset > window_posn) {
/* j = length from match offset to end of window */
j = match_offset - window_posn;
if (j > (int) lzx->window_size) {
D(("match offset beyond window boundaries"))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
runsrc = &window[lzx->window_size - j];
if (j < i) {
/* if match goes over the window edge, do two copy runs */
i -= j; while (j-- > 0) *rundest++ = *runsrc++;
runsrc = window;
}
while (i-- > 0) *rundest++ = *runsrc++;
}
else {
runsrc = rundest - match_offset;
while (i-- > 0) *rundest++ = *runsrc++;
}
this_run -= match_length;
window_posn += match_length;
}
} /* while (this_run > 0) */
break;
case LZX_BLOCKTYPE_UNCOMPRESSED:
/* as this_run is limited not to wrap a frame, this also means it
* won't wrap the window (as the window is a multiple of 32k) */
rundest = &window[window_posn];
window_posn += this_run;
while (this_run > 0) {
if ((i = i_end - i_ptr)) {
if (i > this_run) i = this_run;
lzx->sys->copy(i_ptr, rundest, (size_t) i);
rundest += i;
i_ptr += i;
this_run -= i;
}
else {
if (lzxd_read_input(lzx)) return lzx->error;
i_ptr = lzx->i_ptr;
i_end = lzx->i_end;
}
}
break;
default:
D(("Default Here."));
return lzx->error = MSPACK_ERR_DECRUNCH; /* might as well */
}
/* did the final match overrun our desired this_run length? */
if (this_run < 0) {
if ((unsigned int)(-this_run) > lzx->block_remaining) {
D(("overrun went past end of block by %d (%d remaining)",
-this_run, lzx->block_remaining ))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
lzx->block_remaining -= -this_run;
}
} /* while (bytes_todo > 0) */
/* streams don't extend over frame boundaries */
if ((window_posn - lzx->frame_posn) != frame_size) {
D(("decode beyond output frame limits! %d != %d",
window_posn - lzx->frame_posn, frame_size))
/* Ignored */
#if 0
return lzx->error = MSPACK_ERR_DECRUNCH;
#endif
}
/* re-align input bitstream */
if (bits_left > 0) ENSURE_BITS(16);
if (bits_left & 15) REMOVE_BITS(bits_left & 15);
/* check that we've used all of the previous frame first */
if (lzx->o_ptr != lzx->o_end) {
D(("%d avail bytes, new %d frame", lzx->o_end-lzx->o_ptr, frame_size))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
/* does this intel block _really_ need decoding? */
if (lzx->intel_started && lzx->intel_filesize &&
(lzx->frame <= 32768) && (frame_size > 10))
{
unsigned char *data = &lzx->e8_buf[0];
unsigned char *dataend = &lzx->e8_buf[frame_size - 10];
signed int curpos = lzx->intel_curpos;
signed int filesize = lzx->intel_filesize;
signed int abs_off, rel_off;
/* copy e8 block to the e8 buffer and tweak if needed */
lzx->o_ptr = data;
lzx->sys->copy(&lzx->window[lzx->frame_posn], data, frame_size);
while (data < dataend) {
if (*data++ != 0xE8) { curpos++; continue; }
abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
if ((abs_off >= -curpos) && (abs_off < filesize)) {
rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
data[0] = (unsigned char) rel_off;
data[1] = (unsigned char) (rel_off >> 8);
data[2] = (unsigned char) (rel_off >> 16);
data[3] = (unsigned char) (rel_off >> 24);
}
data += 4;
curpos += 5;
}
lzx->intel_curpos += frame_size;
}
else {
lzx->o_ptr = &lzx->window[lzx->frame_posn];
if (lzx->intel_filesize) lzx->intel_curpos += frame_size;
}
lzx->o_end = &lzx->o_ptr[frame_size];
/* write a frame */
i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size;
if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) {
return lzx->error = MSPACK_ERR_WRITE;
}
lzx->o_ptr += i;
lzx->offset += i;
out_bytes -= i;
/* advance frame start position */
lzx->frame_posn += frame_size;
lzx->frame++;
/* wrap window / frame position pointers */
if (window_posn == lzx->window_size) window_posn = 0;
if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0;
} /* while (lzx->frame < end_frame) */
if (out_bytes) {
D(("bytes left to output"))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
/* store local state */
STORE_BITS;
lzx->window_posn = window_posn;
lzx->R0 = R0;
lzx->R1 = R1;
lzx->R2 = R2;
return MSPACK_ERR_OK;
}
void lzxd_free(struct lzxd_stream *lzx) {
struct mspack_system *sys;
if (lzx) {
sys = lzx->sys;
sys->free(lzx->inbuf);
sys->free(lzx->window);
sys->free(lzx);
}
}

View File

@ -0,0 +1,218 @@
/* __license__ = 'GPL v3'
* __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
*
* Python module C glue code.
*/
#include <Python.h>
#include <mspack.h>
#include <lzx.h>
static char lzx_doc[] =
"Provide basic LZX decompression using the code from libmspack.";
static PyObject *LzxError = NULL;
typedef struct memory_file {
unsigned int magic; /* 0xB5 */
void * buffer;
int total_bytes;
int current_bytes;
} memory_file;
void *
glue_alloc(struct mspack_system *this, size_t bytes)
{
void *p = NULL;
p = (void *)malloc(bytes);
if (p == NULL) {
return (void *)PyErr_NoMemory();
}
return p;
}
void
glue_free(void *p)
{
free(p);
}
void
glue_copy(void *src, void *dest, size_t bytes)
{
memcpy(dest, src, bytes);
}
struct mspack_file *
glue_open(struct mspack_system *this, char *filename, int mode)
{
PyErr_SetString(LzxError, "MSPACK_OPEN unsupported");
return NULL;
}
void
glue_close(struct mspack_file *file)
{
return;
}
int
glue_read(struct mspack_file *file, void * buffer, int bytes)
{
memory_file *mem;
int remaining;
mem = (memory_file *)file;
if (mem->magic != 0xB5) return -1;
remaining = mem->total_bytes - mem->current_bytes;
if (!remaining) return 0;
if (bytes > remaining) bytes = remaining;
memcpy(buffer, (unsigned char *)mem->buffer + mem->current_bytes, bytes);
mem->current_bytes += bytes;
return bytes;
}
int
glue_write(struct mspack_file * file, void * buffer, int bytes)
{
memory_file *mem;
int remaining;
mem = (memory_file *)file;
if (mem->magic != 0xB5) return -1;
remaining = mem->total_bytes - mem->current_bytes;
if (!remaining) return 0;
if (bytes > remaining) {
PyErr_SetString(LzxError,
"MSPACK_WRITE tried to write beyond end of buffer");
bytes = remaining;
}
memcpy((unsigned char *)mem->buffer + mem->current_bytes, buffer, bytes);
mem->current_bytes += bytes;
return bytes;
}
struct mspack_system lzxglue_system = {
glue_open,
glue_close,
glue_read, /* Read */
glue_write, /* Write */
NULL, /* Seek */
NULL, /* Tell */
NULL, /* Message */
glue_alloc,
glue_free,
glue_copy,
NULL /* Termination */
};
int LZXwindow = 0;
struct lzxd_stream * lzx_stream = NULL;
/* Can't really init here, don't know enough */
static PyObject *
init(PyObject *self, PyObject *args)
{
int window = 0;
if (!PyArg_ParseTuple(args, "i", &window)) {
return NULL;
}
LZXwindow = window;
lzx_stream = NULL;
Py_RETURN_NONE;
}
/* Doesn't exist. Oh well, reinitialize state every time anyway */
static PyObject *
reset(PyObject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, "")) {
return NULL;
}
Py_RETURN_NONE;
}
//int LZXdecompress(unsigned char *inbuf, unsigned char *outbuf,
// unsigned int inlen, unsigned int outlen)
static PyObject *
decompress(PyObject *self, PyObject *args)
{
unsigned char *inbuf;
unsigned char *outbuf;
unsigned int inlen;
unsigned int outlen;
int err;
memory_file source;
memory_file dest;
PyObject *retval = NULL;
if (!PyArg_ParseTuple(args, "s#I", &inbuf, &inlen, &outlen)) {
return NULL;
}
retval = PyString_FromStringAndSize(NULL, outlen);
if (retval == NULL) {
return NULL;
}
outbuf = (unsigned char *)PyString_AS_STRING(retval);
source.magic = 0xB5;
source.buffer = inbuf;
source.current_bytes = 0;
source.total_bytes = inlen;
dest.magic = 0xB5;
dest.buffer = outbuf;
dest.current_bytes = 0;
dest.total_bytes = outlen;
lzx_stream = lzxd_init(&lzxglue_system, (struct mspack_file *)&source,
(struct mspack_file *)&dest, LZXwindow,
0x7fff /* Never reset, I do it */, 4096, outlen);
err = -1;
if (lzx_stream) err = lzxd_decompress(lzx_stream, outlen);
lzxd_free(lzx_stream);
lzx_stream = NULL;
if (err != MSPACK_ERR_OK) {
Py_DECREF(retval);
PyErr_SetString(LzxError, "LZX decompression failed");
}
return retval;
}
static PyMethodDef lzx_methods[] = {
{ "init", &init, METH_VARARGS, "Initialize the LZX decompressor" },
{ "reset", &reset, METH_VARARGS, "Reset the LZX decompressor" },
{ "decompress", &decompress, METH_VARARGS, "Run the LZX decompressor" },
{ NULL, NULL }
};
PyMODINIT_FUNC
initlzx(void)
{
PyObject *m;
m = Py_InitModule3("lzx", lzx_methods, lzx_doc);
if (m == NULL) {
return;
}
LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL);
Py_INCREF(LzxError);
PyModule_AddObject(m, "LzxError", LzxError);
return;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,66 @@
/* This file is part of libmspack.
* (C) 2003-2004 Stuart Caie.
*
* libmspack is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
*
* For further details, see the file COPYING.LIB distributed with libmspack
*/
#ifndef MSPACK_SYSTEM_H
#define MSPACK_SYSTEM_H 1
#ifdef _MSC_VER
#define inline
#endif
#ifdef DEBUG
# include <stdio.h>
# define D(x) do { printf("%s:%d (%s) ",__FILE__, __LINE__, __FUNCTION__); \
printf x ; fputc('\n', stdout); fflush(stdout);} while (0);
#else
# define D(x)
#endif
/* endian-neutral reading of little-endian data */
#define __egi32(a,n) ( (((a)[n+3]) << 24) | (((a)[n+2]) << 16) | \
(((a)[n+1]) << 8) | ((a)[n+0]) )
#define EndGetI64(a) ((((unsigned long long int) __egi32(a,4)) << 32) | \
((unsigned int) __egi32(a,0)))
#define EndGetI32(a) __egi32(a,0)
#define EndGetI16(a) ((((a)[1])<<8)|((a)[0]))
/* endian-neutral reading of big-endian data */
#define EndGetM32(a) ((((a)[0])<<24)|(((a)[1])<<16)|(((a)[2])<<8)|((a)[3]))
#define EndGetM16(a) ((((a)[0])<<8)|((a)[1]))
extern struct mspack_system *mspack_default_system;
/* returns the length of a file opened for reading */
extern int mspack_sys_filelen(struct mspack_system *system,
struct mspack_file *file, off_t *length);
/* validates a system structure */
extern int mspack_valid_system(struct mspack_system *sys);
/* Can't redfine intrinsics in Microsoft Visual C */
#ifndef _MSC_VER
/* inline memcmp() */
static inline int memcmp(const void *s1, const void *s2, size_t n) {
unsigned char *c1 = (unsigned char *) s1;
unsigned char *c2 = (unsigned char *) s2;
if (n == 0) return 0;
while (--n && (*c1 == *c2)) c1++, c2++;
return *c1 - *c2;
}
/* inline strlen() */
static inline size_t strlen(const char *s) {
const char *e = s;
while (*e) e++;
return e - s;
}
#endif
#endif