mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Initial merge of native replacement for convertlit (thanks to llasram)
This commit is contained in:
commit
c81e009e0d
5
src/calibre/ebooks/lit/__init__.py
Normal file
5
src/calibre/ebooks/lit/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
class LitError(Exception):
|
||||
pass
|
9
src/calibre/ebooks/lit/maps/__init__.py
Normal file
9
src/calibre/ebooks/lit/maps/__init__.py
Normal file
@ -0,0 +1,9 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
"""
|
||||
Microsoft LIT tag and attribute tables.
|
||||
"""
|
||||
|
||||
from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
|
||||
from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
|
1015
src/calibre/ebooks/lit/maps/html.py
Normal file
1015
src/calibre/ebooks/lit/maps/html.py
Normal file
File diff suppressed because it is too large
Load Diff
81
src/calibre/ebooks/lit/maps/opf.py
Normal file
81
src/calibre/ebooks/lit/maps/opf.py
Normal file
@ -0,0 +1,81 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
"""
|
||||
Microsoft LIT OPF tag and attribute tables, copied from ConvertLIT.
|
||||
"""
|
||||
|
||||
TAGS = [
|
||||
None,
|
||||
"package",
|
||||
"dc:Title",
|
||||
"dc:Creator",
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
"manifest",
|
||||
"item",
|
||||
"spine",
|
||||
"itemref",
|
||||
"metadata",
|
||||
"dc-metadata",
|
||||
"dc:Subject",
|
||||
"dc:Description",
|
||||
"dc:Publisher",
|
||||
"dc:Contributor",
|
||||
"dc:Date",
|
||||
"dc:Type",
|
||||
"dc:Format",
|
||||
"dc:Identifier",
|
||||
"dc:Source",
|
||||
"dc:Language",
|
||||
"dc:Relation",
|
||||
"dc:Coverage",
|
||||
"dc:Rights",
|
||||
"x-metadata",
|
||||
"meta",
|
||||
"tours",
|
||||
"tour",
|
||||
"site",
|
||||
"guide",
|
||||
"reference",
|
||||
None,
|
||||
]
|
||||
|
||||
ATTRS = {
|
||||
0x0001: "href",
|
||||
0x0002: "%never-used",
|
||||
0x0003: "%guid",
|
||||
0x0004: "%minimum_level",
|
||||
0x0005: "%attr5",
|
||||
0x0006: "id",
|
||||
0x0007: "href",
|
||||
0x0008: "media-type",
|
||||
0x0009: "fallback",
|
||||
0x000A: "idref",
|
||||
0x000B: "xmlns:dc",
|
||||
0x000C: "xmlns:oebpackage",
|
||||
0x000D: "role",
|
||||
0x000E: "file-as",
|
||||
0x000F: "event",
|
||||
0x0010: "scheme",
|
||||
0x0011: "title",
|
||||
0x0012: "type",
|
||||
0x0013: "unique-identifier",
|
||||
0x0014: "name",
|
||||
0x0015: "content",
|
||||
0x0016: "xml:lang",
|
||||
}
|
||||
|
||||
TAGS_ATTRS = [{} for i in xrange(43)]
|
||||
|
||||
MAP = (TAGS, ATTRS, TAGS_ATTRS)
|
489
src/calibre/ebooks/lit/msdes.py
Normal file
489
src/calibre/ebooks/lit/msdes.py
Normal file
@ -0,0 +1,489 @@
|
||||
# Re-modified for use in MS LIT decryption. Un-reversed the bytebit[] array.
|
||||
# Substituted Microsoft's absurd modified S-boxes. Modified the
|
||||
# encrypt/decrypt methods to handle more than one block at a time. Added a few
|
||||
# speed-ups supported by modern versions of Python. Added option 'psyco' use.
|
||||
#
|
||||
# And lo, all the previous notices follow:
|
||||
|
||||
# Modified DES encryption for VNC password authentication.
|
||||
# Ported from realvnc's java viewer by <cliechti@gmx.net>
|
||||
# I chose this package name because it is not compatible with the
|
||||
# original DES algorithm, e.g. found pycrypto.
|
||||
#
|
||||
# (C) 2003 chris <cliechti@gmx.net>
|
||||
# Released as free software under the Python License.
|
||||
#
|
||||
# You're free to use it for commercial and noncommercial
|
||||
# application, modify and redistribute it as long as the
|
||||
# copyright notices are intact. There are no warranties, not
|
||||
# even that it does what it says to do ;-)
|
||||
#
|
||||
# Original notice following:
|
||||
|
||||
# This DES class has been extracted from package Acme.Crypto for use in VNC.
|
||||
# The bytebit[] array has been reversed so that the most significant bit
|
||||
# in each byte of the key is ignored, not the least significant. Also the
|
||||
# unnecessary odd parity code has been removed.
|
||||
#
|
||||
# These changes are:
|
||||
# Copyright (C) 1999 AT&T Laboratories Cambridge. All Rights Reserved.
|
||||
#
|
||||
# This software is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#
|
||||
|
||||
# DesCipher - the DES encryption method
|
||||
#
|
||||
# The meat of this code is by Dave Zimmerman <dzimm@widget.com>, and is:
|
||||
#
|
||||
# Copyright (c) 1996 Widget Workshop, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software
|
||||
# and its documentation for NON-COMMERCIAL or COMMERCIAL purposes and
|
||||
# without fee is hereby granted, provided that this copyright notice is kept
|
||||
# intact.
|
||||
#
|
||||
# WIDGET WORKSHOP MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY
|
||||
# OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
# TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
# PARTICULAR PURPOSE, OR NON-INFRINGEMENT. WIDGET WORKSHOP SHALL NOT BE LIABLE
|
||||
# FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
|
||||
# DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
|
||||
#
|
||||
# THIS SOFTWARE IS NOT DESIGNED OR INTENDED FOR USE OR RESALE AS ON-LINE
|
||||
# CONTROL EQUIPMENT IN HAZARDOUS ENVIRONMENTS REQUIRING FAIL-SAFE
|
||||
# PERFORMANCE, SUCH AS IN THE OPERATION OF NUCLEAR FACILITIES, AIRCRAFT
|
||||
# NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL, DIRECT LIFE
|
||||
# SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH THE FAILURE OF THE
|
||||
# SOFTWARE COULD LEAD DIRECTLY TO DEATH, PERSONAL INJURY, OR SEVERE
|
||||
# PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH RISK ACTIVITIES"). WIDGET WORKSHOP
|
||||
# SPECIFICALLY DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR
|
||||
# HIGH RISK ACTIVITIES.
|
||||
#
|
||||
#
|
||||
# The rest is:
|
||||
#
|
||||
# Copyright (C) 1996 by Jef Poskanzer <jef@acme.com>. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
#
|
||||
# Visit the ACME Labs Java page for up-to-date versions of this and other
|
||||
# fine Java utilities: http://www.acme.com/java/
|
||||
|
||||
|
||||
#/ The DES encryption method.
|
||||
# <P>
|
||||
# This is surprisingly fast, for pure Java. On a SPARC 20, wrapped
|
||||
# in Acme.Crypto.EncryptedOutputStream or Acme.Crypto.EncryptedInputStream,
|
||||
# it does around 7000 bytes/second.
|
||||
# <P>
|
||||
# Most of this code is by Dave Zimmerman <dzimm@widget.com>, and is
|
||||
# Copyright (c) 1996 Widget Workshop, Inc. See the source file for details.
|
||||
# <P>
|
||||
# <A HREF="/resources/classes/Acme/Crypto/DesCipher.java">Fetch the software.</A><BR>
|
||||
# <A HREF="/resources/classes/Acme.tar.Z">Fetch the entire Acme package.</A>
|
||||
# <P>
|
||||
# @see Des3Cipher
|
||||
# @see EncryptedOutputStream
|
||||
# @see EncryptedInputStream
|
||||
|
||||
import struct
|
||||
|
||||
class DesCipher:
|
||||
# Constructor, byte-array key.
|
||||
def __init__(self, key):
|
||||
self.setKey(key)
|
||||
|
||||
#/ Set the key.
|
||||
def setKey(self, key):
|
||||
self.encryptKeys = self.deskey([ord(x) for x in key], 1)
|
||||
self.decryptKeys = self.deskey([ord(x) for x in key], 0)
|
||||
|
||||
# Turn an 8-byte key into internal keys.
|
||||
def deskey(self, keyBlock, encrypting):
|
||||
#~ int i, j, l, m, n;
|
||||
pc1m = [0]*56 #new int[56];
|
||||
pcr = [0]*56 #new int[56];
|
||||
kn = [0]*32 #new int[32];
|
||||
|
||||
for j in xrange(56):
|
||||
l = pc1[j]
|
||||
m = l & 07
|
||||
pc1m[j] = ((keyBlock[l >> 3] & bytebit[m]) != 0)
|
||||
for i in xrange(16):
|
||||
if encrypting:
|
||||
m = i << 1
|
||||
else:
|
||||
m = (15-i) << 1
|
||||
n = m + 1
|
||||
kn[m] = kn[n] = 0
|
||||
for j in xrange(28):
|
||||
l = j + totrot[i]
|
||||
if l < 28:
|
||||
pcr[j] = pc1m[l]
|
||||
else:
|
||||
pcr[j] = pc1m[l - 28]
|
||||
for j in xrange(28, 56):
|
||||
l = j + totrot[i]
|
||||
if l < 56:
|
||||
pcr[j] = pc1m[l]
|
||||
else:
|
||||
pcr[j] = pc1m[l - 28]
|
||||
for j in xrange(24):
|
||||
if pcr[pc2[j]] != 0:
|
||||
kn[m] |= bigbyte[j]
|
||||
if pcr[pc2[j+24]] != 0:
|
||||
kn[n] |= bigbyte[j]
|
||||
return self.cookey(kn)
|
||||
|
||||
def cookey(self, raw):
|
||||
#~ int raw0, raw1;
|
||||
#~ int rawi, KnLi;
|
||||
#~ int i;
|
||||
KnL = [0]*32
|
||||
|
||||
rawi = 0
|
||||
KnLi = 0
|
||||
for i in xrange(16):
|
||||
raw0 = raw[rawi]
|
||||
rawi += 1
|
||||
raw1 = raw[rawi]
|
||||
rawi += 1
|
||||
KnL[KnLi] = (raw0 & 0x00fc0000L) << 6
|
||||
KnL[KnLi] |= (raw0 & 0x00000fc0L) << 10
|
||||
KnL[KnLi] |= (raw1 & 0x00fc0000L) >> 10
|
||||
KnL[KnLi] |= (raw1 & 0x00000fc0L) >> 6
|
||||
KnLi += 1
|
||||
KnL[KnLi] = (raw0 & 0x0003f000L) << 12
|
||||
KnL[KnLi] |= (raw0 & 0x0000003fL) << 16
|
||||
KnL[KnLi] |= (raw1 & 0x0003f000L) >> 4
|
||||
KnL[KnLi] |= (raw1 & 0x0000003fL)
|
||||
KnLi += 1
|
||||
return KnL
|
||||
|
||||
# Block encryption routines.
|
||||
|
||||
#/ Encrypt a block of eight bytes.
|
||||
def encrypt(self, clearText):
|
||||
if len(clearText) % 8 != 0:
|
||||
raise TypeError, "length must be multiple of block size"
|
||||
result = []
|
||||
for base in xrange(0, len(clearText), 8):
|
||||
result.append(struct.pack(
|
||||
">LL", *self.des(struct.unpack(">LL", clearText[base:base+8]),
|
||||
self.encryptKeys)))
|
||||
return ''.join(result)
|
||||
|
||||
#/ Decrypt a block of eight bytes.
|
||||
def decrypt(self, cipherText):
|
||||
if len(cipherText) % 8 != 0:
|
||||
raise TypeError, "length must be multiple of block size"
|
||||
result = []
|
||||
for base in xrange(0, len(cipherText), 8):
|
||||
result.append(struct.pack(
|
||||
">LL", *self.des(struct.unpack(">LL", cipherText[base:base+8]),
|
||||
self.decryptKeys)))
|
||||
return ''.join(result)
|
||||
|
||||
# The DES function.
|
||||
def des(self, (leftt, right), keys):
|
||||
#~ int fval, work, right, leftt;
|
||||
#~ int round
|
||||
keysi = 0
|
||||
|
||||
work = ((leftt >> 4) ^ right) & 0x0f0f0f0fL
|
||||
right ^= work
|
||||
leftt ^= (work << 4) & 0xffffffffL
|
||||
|
||||
work = ((leftt >> 16) ^ right) & 0x0000ffffL
|
||||
right ^= work
|
||||
leftt ^= (work << 16) & 0xffffffffL
|
||||
|
||||
work = ((right >> 2) ^ leftt) & 0x33333333L
|
||||
leftt ^= work
|
||||
right ^= (work << 2) & 0xffffffffL
|
||||
|
||||
work = ((right >> 8) ^ leftt) & 0x00ff00ffL
|
||||
leftt ^= work
|
||||
right ^= (work << 8) & 0xffffffffL
|
||||
right = ((right << 1) | ((right >> 31) & 1)) & 0xffffffffL
|
||||
|
||||
work = (leftt ^ right) & 0xaaaaaaaaL
|
||||
leftt ^= work
|
||||
right ^= work
|
||||
leftt = ((leftt << 1) | ((leftt >> 31) & 1)) & 0xffffffffL
|
||||
|
||||
for round in xrange(8):
|
||||
work = ((right << 28) | (right >> 4)) & 0xffffffffL
|
||||
work ^= keys[keysi]
|
||||
keysi += 1
|
||||
fval = SP7[ work & 0x0000003fL ]
|
||||
fval |= SP5[(work >> 8) & 0x0000003fL ]
|
||||
fval |= SP3[(work >> 16) & 0x0000003fL ]
|
||||
fval |= SP1[(work >> 24) & 0x0000003fL ]
|
||||
work = right ^ keys[keysi]
|
||||
keysi += 1
|
||||
fval |= SP8[ work & 0x0000003fL ]
|
||||
fval |= SP6[(work >> 8) & 0x0000003fL ]
|
||||
fval |= SP4[(work >> 16) & 0x0000003fL ]
|
||||
fval |= SP2[(work >> 24) & 0x0000003fL ]
|
||||
leftt ^= fval
|
||||
work = ((leftt << 28) | (leftt >> 4)) & 0xffffffffL
|
||||
work ^= keys[keysi]
|
||||
keysi += 1
|
||||
fval = SP7[ work & 0x0000003fL ]
|
||||
fval |= SP5[(work >> 8) & 0x0000003fL ]
|
||||
fval |= SP3[(work >> 16) & 0x0000003fL ]
|
||||
fval |= SP1[(work >> 24) & 0x0000003fL ]
|
||||
work = leftt ^ keys[keysi]
|
||||
keysi += 1
|
||||
fval |= SP8[ work & 0x0000003fL ]
|
||||
fval |= SP6[(work >> 8) & 0x0000003fL ]
|
||||
fval |= SP4[(work >> 16) & 0x0000003fL ]
|
||||
fval |= SP2[(work >> 24) & 0x0000003fL ]
|
||||
right ^= fval
|
||||
|
||||
right = ((right << 31) | (right >> 1)) & 0xffffffffL
|
||||
work = (leftt ^ right) & 0xaaaaaaaaL
|
||||
leftt ^= work
|
||||
right ^= work
|
||||
leftt = ((leftt << 31) | (leftt >> 1)) & 0xffffffffL
|
||||
work = ((leftt >> 8) ^ right) & 0x00ff00ffL
|
||||
right ^= work
|
||||
leftt ^= (work << 8) & 0xffffffffL
|
||||
work = ((leftt >> 2) ^ right) & 0x33333333L
|
||||
right ^= work
|
||||
leftt ^= (work << 2) & 0xffffffffL
|
||||
work = ((right >> 16) ^ leftt) & 0x0000ffffL
|
||||
leftt ^= work
|
||||
right ^= (work << 16) & 0xffffffffL
|
||||
work = ((right >> 4) ^ leftt) & 0x0f0f0f0fL
|
||||
leftt ^= work
|
||||
right ^= (work << 4) & 0xffffffffL
|
||||
return right, leftt
|
||||
|
||||
# Tables, permutations, S-boxes, etc.
|
||||
|
||||
bytebit = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01]
|
||||
|
||||
bigbyte = [
|
||||
0x800000, 0x400000, 0x200000, 0x100000,
|
||||
0x080000, 0x040000, 0x020000, 0x010000,
|
||||
0x008000, 0x004000, 0x002000, 0x001000,
|
||||
0x000800, 0x000400, 0x000200, 0x000100,
|
||||
0x000080, 0x000040, 0x000020, 0x000010,
|
||||
0x000008, 0x000004, 0x000002, 0x000001
|
||||
]
|
||||
|
||||
pc1 = [
|
||||
56, 48, 40, 32, 24, 16, 8,
|
||||
0, 57, 49, 41, 33, 25, 17,
|
||||
9, 1, 58, 50, 42, 34, 26,
|
||||
18, 10, 2, 59, 51, 43, 35,
|
||||
62, 54, 46, 38, 30, 22, 14,
|
||||
6, 61, 53, 45, 37, 29, 21,
|
||||
13, 5, 60, 52, 44, 36, 28,
|
||||
20, 12, 4, 27, 19, 11, 3
|
||||
]
|
||||
|
||||
totrot = [
|
||||
1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28
|
||||
]
|
||||
|
||||
pc2 = [
|
||||
13, 16, 10, 23, 0, 4,
|
||||
2, 27, 14, 5, 20, 9,
|
||||
22, 18, 11, 3 , 25, 7,
|
||||
15, 6, 26, 19, 12, 1,
|
||||
40, 51, 30, 36, 46, 54,
|
||||
29, 39, 50, 44, 32, 47,
|
||||
43, 48, 38, 55, 33, 52,
|
||||
45, 41, 49, 35, 28, 31,
|
||||
]
|
||||
|
||||
# Microsoft's modified S-boxes for LIT file encryption
|
||||
SP1 = [
|
||||
0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
|
||||
0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,
|
||||
0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L,
|
||||
0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L,
|
||||
0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L,
|
||||
0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L,
|
||||
0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L,
|
||||
0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L,
|
||||
0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L,
|
||||
0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L,
|
||||
0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L,
|
||||
0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L,
|
||||
0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L,
|
||||
0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L,
|
||||
0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L,
|
||||
0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L
|
||||
]
|
||||
SP2 = [
|
||||
0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L,
|
||||
0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L,
|
||||
0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L,
|
||||
0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L,
|
||||
0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L,
|
||||
0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L,
|
||||
0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L,
|
||||
0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L,
|
||||
0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L,
|
||||
0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L,
|
||||
0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L,
|
||||
0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L,
|
||||
0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L,
|
||||
0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L,
|
||||
0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L,
|
||||
0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L
|
||||
]
|
||||
SP3 = [
|
||||
0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L,
|
||||
0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L,
|
||||
0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L,
|
||||
0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L,
|
||||
0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L,
|
||||
0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L,
|
||||
0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L,
|
||||
0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L,
|
||||
0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L,
|
||||
0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L,
|
||||
0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L,
|
||||
0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L,
|
||||
0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L,
|
||||
0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L,
|
||||
0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L,
|
||||
0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L
|
||||
]
|
||||
SP4 = [
|
||||
0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L,
|
||||
0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L,
|
||||
0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L,
|
||||
0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L,
|
||||
0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L,
|
||||
0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L,
|
||||
0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L,
|
||||
0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L,
|
||||
0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L,
|
||||
0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L,
|
||||
0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L,
|
||||
0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L,
|
||||
0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L,
|
||||
0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L,
|
||||
0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L,
|
||||
0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L
|
||||
]
|
||||
SP5 = [
|
||||
0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L,
|
||||
0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L,
|
||||
0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L,
|
||||
0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L,
|
||||
0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L,
|
||||
0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L,
|
||||
0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L,
|
||||
0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L,
|
||||
0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L,
|
||||
0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L,
|
||||
0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L,
|
||||
0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L,
|
||||
0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L,
|
||||
0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L,
|
||||
0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L,
|
||||
0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L
|
||||
]
|
||||
SP6 = [
|
||||
0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L,
|
||||
0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L,
|
||||
0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L,
|
||||
0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L,
|
||||
0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L,
|
||||
0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L,
|
||||
0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L,
|
||||
0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L,
|
||||
0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L,
|
||||
0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L,
|
||||
0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L,
|
||||
0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L,
|
||||
0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L,
|
||||
0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L,
|
||||
0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L,
|
||||
0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L,
|
||||
]
|
||||
SP7 = [
|
||||
0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L,
|
||||
0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L,
|
||||
0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L,
|
||||
0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L,
|
||||
0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L,
|
||||
0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L,
|
||||
0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L,
|
||||
0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L,
|
||||
0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L,
|
||||
0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L,
|
||||
0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L,
|
||||
0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L,
|
||||
0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L,
|
||||
0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L,
|
||||
0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L,
|
||||
0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L,
|
||||
]
|
||||
SP8 = [
|
||||
0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L,
|
||||
0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L,
|
||||
0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L,
|
||||
0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L,
|
||||
0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L,
|
||||
0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L,
|
||||
0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L,
|
||||
0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L,
|
||||
0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L,
|
||||
0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L,
|
||||
0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L,
|
||||
0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L,
|
||||
0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L,
|
||||
0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L,
|
||||
0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L,
|
||||
0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L,
|
||||
]
|
||||
|
||||
def new(key):
|
||||
return DesCipher(key)
|
||||
|
||||
block_size = 8
|
||||
key_size = 8
|
||||
|
||||
try:
|
||||
import psyco
|
||||
psyco.bind(DesCipher.deskey)
|
||||
psyco.bind(DesCipher.cookey)
|
||||
psyco.bind(DesCipher.des)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
#test only:
|
||||
if __name__ == '__main__':
|
||||
des = DesCipher("\x01\x23\x45\x67\x89\xab\xcd\xef")
|
||||
print ''.join(
|
||||
"%02x" % ord(x) for x in des.encrypt("Now is t"))
|
||||
|
337
src/calibre/ebooks/lit/mssha1.py
Normal file
337
src/calibre/ebooks/lit/mssha1.py
Normal file
@ -0,0 +1,337 @@
|
||||
"""
|
||||
Modified version of SHA-1 used in Microsoft LIT files.
|
||||
|
||||
Adapted from the PyPy pure-Python SHA-1 implementation.
|
||||
"""
|
||||
|
||||
import struct, copy
|
||||
|
||||
# ======================================================================
|
||||
# Bit-Manipulation helpers
|
||||
#
|
||||
# _long2bytes() was contributed by Barry Warsaw
|
||||
# and is reused here with tiny modifications.
|
||||
# ======================================================================
|
||||
|
||||
def _long2bytesBigEndian(n, blocksize=0):
|
||||
"""Convert a long integer to a byte string.
|
||||
|
||||
If optional blocksize is given and greater than zero, pad the front
|
||||
of the byte string with binary zeros so that the length is a multiple
|
||||
of blocksize.
|
||||
"""
|
||||
|
||||
# After much testing, this algorithm was deemed to be the fastest.
|
||||
s = ''
|
||||
pack = struct.pack
|
||||
while n > 0:
|
||||
s = pack('>I', n & 0xffffffffL) + s
|
||||
n = n >> 32
|
||||
|
||||
# Strip off leading zeros.
|
||||
for i in range(len(s)):
|
||||
if s[i] != '\000':
|
||||
break
|
||||
else:
|
||||
# Only happens when n == 0.
|
||||
s = '\000'
|
||||
i = 0
|
||||
|
||||
s = s[i:]
|
||||
|
||||
# Add back some pad bytes. This could be done more efficiently
|
||||
# w.r.t. the de-padding being done above, but sigh...
|
||||
if blocksize > 0 and len(s) % blocksize:
|
||||
s = (blocksize - len(s) % blocksize) * '\000' + s
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def _bytelist2longBigEndian(list):
|
||||
"Transform a list of characters into a list of longs."
|
||||
|
||||
imax = len(list)/4
|
||||
hl = [0L] * imax
|
||||
|
||||
j = 0
|
||||
i = 0
|
||||
while i < imax:
|
||||
b0 = long(ord(list[j])) << 24
|
||||
b1 = long(ord(list[j+1])) << 16
|
||||
b2 = long(ord(list[j+2])) << 8
|
||||
b3 = long(ord(list[j+3]))
|
||||
hl[i] = b0 | b1 | b2 | b3
|
||||
i = i+1
|
||||
j = j+4
|
||||
|
||||
return hl
|
||||
|
||||
|
||||
def _rotateLeft(x, n):
|
||||
"Rotate x (32 bit) left n bits circularly."
|
||||
|
||||
return (x << n) | (x >> (32-n))
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# The SHA transformation functions
|
||||
#
|
||||
# ======================================================================
|
||||
|
||||
def f0_19(B, C, D):
|
||||
return (B & (C ^ D)) ^ D
|
||||
|
||||
def f20_39(B, C, D):
|
||||
return B ^ C ^ D
|
||||
|
||||
def f40_59(B, C, D):
|
||||
return ((B | C) & D) | (B & C)
|
||||
|
||||
def f60_79(B, C, D):
|
||||
return B ^ C ^ D
|
||||
|
||||
# Microsoft's lovely addition...
|
||||
def f6_42(B, C, D):
|
||||
return (B + C) ^ C
|
||||
|
||||
f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
|
||||
|
||||
# ...and delightful changes
|
||||
f[3] = f20_39
|
||||
f[6] = f6_42
|
||||
f[10] = f20_39
|
||||
f[15] = f20_39
|
||||
f[26] = f0_19
|
||||
f[31] = f40_59
|
||||
f[42] = f6_42
|
||||
f[51] = f20_39
|
||||
f[68] = f0_19
|
||||
|
||||
|
||||
# Constants to be used
|
||||
K = [
|
||||
0x5A827999L, # ( 0 <= t <= 19)
|
||||
0x6ED9EBA1L, # (20 <= t <= 39)
|
||||
0x8F1BBCDCL, # (40 <= t <= 59)
|
||||
0xCA62C1D6L # (60 <= t <= 79)
|
||||
]
|
||||
|
||||
class mssha1(object):
|
||||
"An implementation of the MD5 hash function in pure Python."
|
||||
|
||||
def __init__(self):
|
||||
"Initialisation."
|
||||
|
||||
# Initial message length in bits(!).
|
||||
self.length = 0L
|
||||
self.count = [0, 0]
|
||||
|
||||
# Initial empty message as a sequence of bytes (8 bit characters).
|
||||
self.input = []
|
||||
|
||||
# Call a separate init function, that can be used repeatedly
|
||||
# to start from scratch on the same object.
|
||||
self.init()
|
||||
|
||||
|
||||
def init(self):
|
||||
"Initialize the message-digest and set all fields to zero."
|
||||
|
||||
self.length = 0L
|
||||
self.input = []
|
||||
|
||||
# Initial 160 bit message digest (5 times 32 bit).
|
||||
# Also changed by Microsoft from standard.
|
||||
self.H0 = 0x32107654L
|
||||
self.H1 = 0x23016745L
|
||||
self.H2 = 0xC4E680A2L
|
||||
self.H3 = 0xDC679823L
|
||||
self.H4 = 0xD0857A34L
|
||||
|
||||
def _transform(self, W):
|
||||
for t in range(16, 80):
|
||||
W.append(_rotateLeft(
|
||||
W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL)
|
||||
|
||||
A = self.H0
|
||||
B = self.H1
|
||||
C = self.H2
|
||||
D = self.H3
|
||||
E = self.H4
|
||||
|
||||
for t in xrange(0, 80):
|
||||
TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
|
||||
E = D
|
||||
D = C
|
||||
C = _rotateLeft(B, 30) & 0xffffffffL
|
||||
B = A
|
||||
A = TEMP & 0xffffffffL
|
||||
|
||||
self.H0 = (self.H0 + A) & 0xffffffffL
|
||||
self.H1 = (self.H1 + B) & 0xffffffffL
|
||||
self.H2 = (self.H2 + C) & 0xffffffffL
|
||||
self.H3 = (self.H3 + D) & 0xffffffffL
|
||||
self.H4 = (self.H4 + E) & 0xffffffffL
|
||||
|
||||
|
||||
# Down from here all methods follow the Python Standard Library
|
||||
# API of the sha module.
|
||||
|
||||
def update(self, inBuf):
|
||||
"""Add to the current message.
|
||||
|
||||
Update the mssha1 object with the string arg. Repeated calls
|
||||
are equivalent to a single call with the concatenation of all
|
||||
the arguments, i.e. s.update(a); s.update(b) is equivalent
|
||||
to s.update(a+b).
|
||||
|
||||
The hash is immediately calculated for all full blocks. The final
|
||||
calculation is made in digest(). It will calculate 1-2 blocks,
|
||||
depending on how much padding we have to add. This allows us to
|
||||
keep an intermediate value for the hash, so that we only need to
|
||||
make minimal recalculation if we call update() to add more data
|
||||
to the hashed string.
|
||||
"""
|
||||
|
||||
leninBuf = long(len(inBuf))
|
||||
|
||||
# Compute number of bytes mod 64.
|
||||
index = (self.count[1] >> 3) & 0x3FL
|
||||
|
||||
# Update number of bits.
|
||||
self.count[1] = self.count[1] + (leninBuf << 3)
|
||||
if self.count[1] < (leninBuf << 3):
|
||||
self.count[0] = self.count[0] + 1
|
||||
self.count[0] = self.count[0] + (leninBuf >> 29)
|
||||
|
||||
partLen = 64 - index
|
||||
|
||||
if leninBuf >= partLen:
|
||||
self.input[index:] = list(inBuf[:partLen])
|
||||
self._transform(_bytelist2longBigEndian(self.input))
|
||||
i = partLen
|
||||
while i + 63 < leninBuf:
|
||||
self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
|
||||
i = i + 64
|
||||
else:
|
||||
self.input = list(inBuf[i:leninBuf])
|
||||
else:
|
||||
i = 0
|
||||
self.input = self.input + list(inBuf)
|
||||
|
||||
|
||||
def digest(self):
|
||||
"""Terminate the message-digest computation and return digest.
|
||||
|
||||
Return the digest of the strings passed to the update()
|
||||
method so far. This is a 16-byte string which may contain
|
||||
non-ASCII characters, including null bytes.
|
||||
"""
|
||||
|
||||
H0 = self.H0
|
||||
H1 = self.H1
|
||||
H2 = self.H2
|
||||
H3 = self.H3
|
||||
H4 = self.H4
|
||||
input = [] + self.input
|
||||
count = [] + self.count
|
||||
|
||||
index = (self.count[1] >> 3) & 0x3fL
|
||||
|
||||
if index < 56:
|
||||
padLen = 56 - index
|
||||
else:
|
||||
padLen = 120 - index
|
||||
|
||||
padding = ['\200'] + ['\000'] * 63
|
||||
self.update(padding[:padLen])
|
||||
|
||||
# Append length (before padding).
|
||||
bits = _bytelist2longBigEndian(self.input[:56]) + count
|
||||
|
||||
self._transform(bits)
|
||||
|
||||
# Store state in digest.
|
||||
digest = _long2bytesBigEndian(self.H0, 4) + \
|
||||
_long2bytesBigEndian(self.H1, 4) + \
|
||||
_long2bytesBigEndian(self.H2, 4) + \
|
||||
_long2bytesBigEndian(self.H3, 4) + \
|
||||
_long2bytesBigEndian(self.H4, 4)
|
||||
|
||||
self.H0 = H0
|
||||
self.H1 = H1
|
||||
self.H2 = H2
|
||||
self.H3 = H3
|
||||
self.H4 = H4
|
||||
self.input = input
|
||||
self.count = count
|
||||
|
||||
return digest
|
||||
|
||||
|
||||
def hexdigest(self):
|
||||
"""Terminate and return digest in HEX form.
|
||||
|
||||
Like digest() except the digest is returned as a string of
|
||||
length 32, containing only hexadecimal digits. This may be
|
||||
used to exchange the value safely in email or other non-
|
||||
binary environments.
|
||||
"""
|
||||
return ''.join(['%02x' % ord(c) for c in self.digest()])
|
||||
|
||||
def copy(self):
|
||||
"""Return a clone object.
|
||||
|
||||
Return a copy ('clone') of the md5 object. This can be used
|
||||
to efficiently compute the digests of strings that share
|
||||
a common initial substring.
|
||||
"""
|
||||
|
||||
return copy.deepcopy(self)
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Mimic Python top-level functions from standard library API
|
||||
# for consistency with the md5 module of the standard library.
|
||||
# ======================================================================
|
||||
|
||||
# These are mandatory variables in the module. They have constant values
|
||||
# in the SHA standard.
|
||||
|
||||
digest_size = digestsize = 20
|
||||
blocksize = 1
|
||||
|
||||
def new(arg=None):
|
||||
"""Return a new mssha1 crypto object.
|
||||
|
||||
If arg is present, the method call update(arg) is made.
|
||||
"""
|
||||
|
||||
crypto = mssha1()
|
||||
if arg:
|
||||
crypto.update(arg)
|
||||
|
||||
return crypto
|
||||
|
||||
if __name__ == '__main__':
|
||||
def main():
|
||||
import sys
|
||||
file = None
|
||||
if len(sys.argv) > 2:
|
||||
print "usage: %s [FILE]" % sys.argv[0]
|
||||
return
|
||||
elif len(sys.argv) < 2:
|
||||
file = sys.stdin
|
||||
else:
|
||||
file = open(sys.argv[1], 'rb')
|
||||
context = new()
|
||||
data = file.read(16384)
|
||||
while data:
|
||||
context.update(data)
|
||||
data = file.read(16384)
|
||||
file.close()
|
||||
digest = context.hexdigest().upper()
|
||||
for i in xrange(0, 40, 8):
|
||||
print digest[i:i+8],
|
||||
print
|
||||
main()
|
805
src/calibre/ebooks/lit/reader.py
Normal file
805
src/calibre/ebooks/lit/reader.py
Normal file
@ -0,0 +1,805 @@
|
||||
'''
|
||||
Support for reading LIT files.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
||||
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys, struct, cStringIO, os
|
||||
import functools
|
||||
import codecs
|
||||
from itertools import repeat
|
||||
|
||||
from calibre import relpath
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf import OPFReader
|
||||
from calibre.ebooks.lit import LitError
|
||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||
import calibre.ebooks.lit.mssha1 as mssha1
|
||||
import calibre.ebooks.lit.msdes as msdes
|
||||
import calibre.utils.lzx as lzx
|
||||
|
||||
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE package
|
||||
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
|
||||
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
|
||||
"""
|
||||
HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE html PUBLIC
|
||||
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
|
||||
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
|
||||
"""
|
||||
|
||||
DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
|
||||
LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
|
||||
|
||||
CONTROL_TAG = 4
|
||||
CONTROL_WINDOW_SIZE = 12
|
||||
RESET_NENTRIES = 4
|
||||
RESET_HDRLEN = 12
|
||||
RESET_UCLENGTH = 16
|
||||
RESET_INTERVAL = 32
|
||||
|
||||
FLAG_OPENING = (1 << 0)
|
||||
FLAG_CLOSING = (1 << 1)
|
||||
FLAG_BLOCK = (1 << 2)
|
||||
FLAG_HEAD = (1 << 3)
|
||||
FLAG_ATOM = (1 << 4)
|
||||
XML_ENTITIES = ['&', ''', '<', '>', '"']
|
||||
|
||||
def u32(bytes):
|
||||
return struct.unpack('<L', bytes[:4])[0]
|
||||
|
||||
def u16(bytes):
|
||||
return struct.unpack('<H', bytes[:2])[0]
|
||||
|
||||
def int32(bytes):
|
||||
return struct.unpack('<l', bytes[:4])[0]
|
||||
|
||||
def encint(bytes, remaining):
|
||||
pos, val = 0, 0
|
||||
while remaining > 0:
|
||||
b = ord(bytes[pos])
|
||||
pos += 1
|
||||
remaining -= 1
|
||||
val <<= 7
|
||||
val |= (b & 0x7f)
|
||||
if b & 0x80 == 0: break
|
||||
return val, bytes[pos:], remaining
|
||||
|
||||
def msguid(bytes):
|
||||
values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
|
||||
return "{%08lX-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X}" % values
|
||||
|
||||
def read_utf8_char(bytes, pos):
|
||||
c = ord(bytes[pos])
|
||||
mask = 0x80
|
||||
if (c & mask):
|
||||
elsize = 0
|
||||
while c & mask:
|
||||
mask >>= 1
|
||||
elsize += 1
|
||||
if (mask <= 1) or (mask == 0x40):
|
||||
raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos]))
|
||||
else:
|
||||
elsize = 1
|
||||
if elsize > 1:
|
||||
if elsize + pos > len(bytes):
|
||||
raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos]))
|
||||
c &= (mask - 1)
|
||||
for i in range(1, elsize):
|
||||
b = ord(bytes[pos+i])
|
||||
if (b & 0xC0) != 0x80:
|
||||
raise LitError(
|
||||
'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i]))
|
||||
c = (c << 6) | (b & 0x3F)
|
||||
return unichr(c), pos+elsize
|
||||
|
||||
class UnBinary(object):
|
||||
def __init__(self, bin, manifest, map=OPF_MAP):
|
||||
self.manifest = manifest
|
||||
self.tag_map, self.attr_map, self.tag_to_attr_map = map
|
||||
self.opf = map is OPF_MAP
|
||||
self.bin = bin
|
||||
self.buf = cStringIO.StringIO()
|
||||
self.ampersands = []
|
||||
self.binary_to_text()
|
||||
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
|
||||
self.escape_ampersands()
|
||||
|
||||
def escape_ampersands(self):
|
||||
offset = 0
|
||||
for pos in self.ampersands:
|
||||
test = self.raw[pos+offset:pos+offset+6]
|
||||
if test.startswith('&#') and ';' in test:
|
||||
continue
|
||||
escape = True
|
||||
for ent in XML_ENTITIES:
|
||||
if test.startswith(ent):
|
||||
escape = False
|
||||
break
|
||||
if not escape:
|
||||
continue
|
||||
self.raw = '&'.join(
|
||||
(self.raw[:pos+offset], self.raw[pos+offset+1:]))
|
||||
offset += 4
|
||||
|
||||
def item_path(self, internal_id):
|
||||
try:
|
||||
return self.manifest[internal_id].path
|
||||
except KeyError:
|
||||
return internal_id
|
||||
|
||||
def __unicode__(self):
|
||||
return self.raw
|
||||
|
||||
def binary_to_text(self, base=0, depth=0):
|
||||
tag_name = current_map = None
|
||||
dynamic_tag = errors = 0
|
||||
in_censorship = is_goingdown = False
|
||||
state = 'text'
|
||||
index = base
|
||||
flags = 0
|
||||
|
||||
while index < len(self.bin):
|
||||
c, index = read_utf8_char(self.bin, index)
|
||||
oc = ord(c)
|
||||
|
||||
if state == 'text':
|
||||
if oc == 0:
|
||||
state = 'get flags'
|
||||
continue
|
||||
elif c == '\v':
|
||||
c = '\n'
|
||||
elif c == '&':
|
||||
self.ampersands.append(self.buf.tell()-1)
|
||||
self.buf.write(c.encode('utf-8'))
|
||||
|
||||
elif state == 'get flags':
|
||||
if oc == 0:
|
||||
state = 'text'
|
||||
continue
|
||||
flags = oc
|
||||
state = 'get tag'
|
||||
|
||||
elif state == 'get tag':
|
||||
state = 'text' if oc == 0 else 'get attr'
|
||||
if flags & FLAG_OPENING:
|
||||
tag = oc
|
||||
self.buf.write('<')
|
||||
if not (flags & FLAG_CLOSING):
|
||||
is_goingdown = True
|
||||
if tag == 0x8000:
|
||||
state = 'get custom length'
|
||||
continue
|
||||
if flags & FLAG_ATOM:
|
||||
raise LitError('TODO: Atoms not yet implemented')
|
||||
elif tag < len(self.tag_map):
|
||||
tag_name = self.tag_map[tag]
|
||||
current_map = self.tag_to_attr_map[tag]
|
||||
else:
|
||||
dynamic_tag += 1
|
||||
errors += 1
|
||||
tag_name = '?'+unichr(tag)+'?'
|
||||
current_map = self.tag_to_attr_map[tag]
|
||||
print 'WARNING: tag %s unknown' % unichr(tag)
|
||||
self.buf.write(unicode(tag_name).encode('utf-8'))
|
||||
elif flags & FLAG_CLOSING:
|
||||
if depth == 0:
|
||||
raise LitError('Extra closing tag')
|
||||
return index
|
||||
|
||||
elif state == 'get attr':
|
||||
in_censorship = False
|
||||
if oc == 0:
|
||||
if not is_goingdown:
|
||||
tag_name = None
|
||||
dynamic_tag = 0
|
||||
self.buf.write(' />')
|
||||
else:
|
||||
self.buf.write('>')
|
||||
index = self.binary_to_text(base=index, depth=depth+1)
|
||||
is_goingdown = False
|
||||
if not tag_name:
|
||||
raise LitError('Tag ends before it begins.')
|
||||
self.buf.write(u''.join(
|
||||
('</', tag_name, '>')).encode('utf-8'))
|
||||
dynamic_tag = 0
|
||||
tag_name = None
|
||||
state = 'text'
|
||||
else:
|
||||
if oc == 0x8000:
|
||||
state = 'get attr length'
|
||||
continue
|
||||
attr = None
|
||||
if oc in current_map and current_map[oc]:
|
||||
attr = current_map[oc]
|
||||
elif oc in self.attr_map:
|
||||
attr = self.attr_map[oc]
|
||||
if not attr or not isinstance(attr, basestring):
|
||||
raise LitError(
|
||||
'Unknown attribute %d in tag %s' % (oc, tag_name))
|
||||
if attr.startswith('%'):
|
||||
in_censorship = True
|
||||
state = 'get value length'
|
||||
continue
|
||||
self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
|
||||
if attr in ['href', 'src']:
|
||||
state = 'get href length'
|
||||
else:
|
||||
state = 'get value length'
|
||||
|
||||
elif state == 'get value length':
|
||||
if not in_censorship:
|
||||
self.buf.write('"')
|
||||
count = oc - 1
|
||||
if count == 0:
|
||||
if not in_censorship:
|
||||
self.buf.write('"')
|
||||
in_censorship = False
|
||||
state = 'get attr'
|
||||
continue
|
||||
state = 'get value'
|
||||
if oc == 0xffff:
|
||||
continue
|
||||
if count < 0 or count > (len(self.bin) - index):
|
||||
raise LitError('Invalid character count %d' % count)
|
||||
|
||||
elif state == 'get value':
|
||||
if count == 0xfffe:
|
||||
if not in_censorship:
|
||||
self.buf.write('%s"' % (oc - 1))
|
||||
in_censorship = False
|
||||
state = 'get attr'
|
||||
elif count > 0:
|
||||
if not in_censorship:
|
||||
self.buf.write(unicode(c).encode('utf-8'))
|
||||
count -= 1
|
||||
if count == 0:
|
||||
if not in_censorship:
|
||||
self.buf.write('"')
|
||||
in_censorship = False
|
||||
state = 'get attr'
|
||||
|
||||
elif state == 'get custom length':
|
||||
count = oc - 1
|
||||
if count <= 0 or count > len(self.bin)-index:
|
||||
raise LitError('Invalid character count %d' % count)
|
||||
dynamic_tag += 1
|
||||
state = 'get custom'
|
||||
tag_name = ''
|
||||
|
||||
elif state == 'get custom':
|
||||
tag_name += c
|
||||
count -= 1
|
||||
if count == 0:
|
||||
self.buf.write(unicode(tag_name).encode('utf-8'))
|
||||
state = 'get attr'
|
||||
|
||||
elif state == 'get attr length':
|
||||
count = oc - 1
|
||||
if count <= 0 or count > (len(self.bin) - index):
|
||||
raise LitError('Invalid character count %d' % count)
|
||||
self.buf.write(' ')
|
||||
state = 'get custom attr'
|
||||
|
||||
elif state == 'get custom attr':
|
||||
self.buf.write(unicode(c).encode('utf-8'))
|
||||
count -= 1
|
||||
if count == 0:
|
||||
self.buf.write('=')
|
||||
state = 'get value length'
|
||||
|
||||
elif state == 'get href length':
|
||||
count = oc - 1
|
||||
if count <= 0 or count > (len(self.bin) - index):
|
||||
raise LitError('Invalid character count %d' % count)
|
||||
href = ''
|
||||
state = 'get href'
|
||||
|
||||
elif state == 'get href':
|
||||
href += c
|
||||
count -= 1
|
||||
if count == 0:
|
||||
doc, m, frag = href[1:].partition('#')
|
||||
path = self.item_path(doc)
|
||||
if m and frag:
|
||||
path += m + frag
|
||||
self.buf.write((u'"%s"' % path).encode('utf-8'))
|
||||
state = 'get attr'
|
||||
return index
|
||||
|
||||
class DirectoryEntry(object):
|
||||
def __init__(self, name, section, offset, size):
|
||||
self.name = name
|
||||
self.section = section
|
||||
self.offset = offset
|
||||
self.size = size
|
||||
|
||||
def __repr__(self):
|
||||
return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
|
||||
% (repr(self.name), self.section, self.offset, self.size)
|
||||
|
||||
def __str__(self):
|
||||
return repr(self)
|
||||
|
||||
class ManifestItem(object):
|
||||
def __init__(self, original, internal, mime_type, offset, root, state):
|
||||
self.original = original
|
||||
self.internal = internal
|
||||
self.mime_type = mime_type
|
||||
self.offset = offset
|
||||
self.root = root
|
||||
self.state = state
|
||||
# Some paths in Fictionwise "multiformat" LIT files contain '..' (!?)
|
||||
nodes = original.split('/')
|
||||
path = []
|
||||
for node in nodes:
|
||||
if node == '..':
|
||||
if path: path.pop()
|
||||
continue
|
||||
path.append(node)
|
||||
self.path = os.path.join(*path)
|
||||
|
||||
def __eq__(self, other):
|
||||
if hasattr(other, 'internal'):
|
||||
return self.internal == other.internal
|
||||
return self.internal == other
|
||||
|
||||
def __repr__(self):
|
||||
return "ManifestItem(internal=%s, path=%s)" \
|
||||
% (repr(self.internal), repr(self.path))
|
||||
|
||||
def preserve(function):
|
||||
def wrapper(self, *args, **kwargs):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
return function(self, *args, **kwargs)
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
functools.update_wrapper(wrapper, function)
|
||||
return wrapper
|
||||
|
||||
class LitReader(object):
|
||||
PIECE_SIZE = 16
|
||||
|
||||
def magic():
|
||||
@preserve
|
||||
def fget(self):
|
||||
self._stream.seek(0)
|
||||
return self._stream.read(8)
|
||||
return property(fget=fget)
|
||||
magic = magic()
|
||||
|
||||
def version():
|
||||
def fget(self):
|
||||
self._stream.seek(8)
|
||||
return u32(self._stream.read(4))
|
||||
return property(fget=fget)
|
||||
version = version()
|
||||
|
||||
def hdr_len():
|
||||
@preserve
|
||||
def fget(self):
|
||||
self._stream.seek(12)
|
||||
return int32(self._stream.read(4))
|
||||
return property(fget=fget)
|
||||
hdr_len = hdr_len()
|
||||
|
||||
def num_pieces():
|
||||
@preserve
|
||||
def fget(self):
|
||||
self._stream.seek(16)
|
||||
return int32(self._stream.read(4))
|
||||
return property(fget=fget)
|
||||
num_pieces = num_pieces()
|
||||
|
||||
def sec_hdr_len():
|
||||
@preserve
|
||||
def fget(self):
|
||||
self._stream.seek(20)
|
||||
return int32(self._stream.read(4))
|
||||
return property(fget=fget)
|
||||
sec_hdr_len = sec_hdr_len()
|
||||
|
||||
def guid():
|
||||
@preserve
|
||||
def fget(self):
|
||||
self._stream.seek(24)
|
||||
return self._stream.read(16)
|
||||
return property(fget=fget)
|
||||
guid = guid()
|
||||
|
||||
def header():
|
||||
@preserve
|
||||
def fget(self):
|
||||
size = self.hdr_len \
|
||||
+ (self.num_pieces * self.PIECE_SIZE) \
|
||||
+ self.sec_hdr_len
|
||||
self._stream.seek(0)
|
||||
return self._stream.read(size)
|
||||
return property(fget=fget)
|
||||
header = header()
|
||||
|
||||
def __init__(self, filename_or_stream):
|
||||
if hasattr(filename_or_stream, 'read'):
|
||||
self._stream = filename_or_stream
|
||||
else:
|
||||
self._stream = open(filename_or_stream, 'rb')
|
||||
if self.magic != 'ITOLITLS':
|
||||
raise LitError('Not a valid LIT file')
|
||||
if self.version != 1:
|
||||
raise LitError('Unknown LIT version %d'%(self.version,))
|
||||
self.entries = {}
|
||||
self._read_secondary_header()
|
||||
self._read_header_pieces()
|
||||
self._read_section_names()
|
||||
self._read_manifest()
|
||||
self._read_meta()
|
||||
self._read_drm()
|
||||
|
||||
@preserve
|
||||
def __len__(self):
|
||||
self._stream.seek(0, 2)
|
||||
return self._stream.tell()
|
||||
|
||||
@preserve
|
||||
def _read_raw(self, offset, size):
|
||||
self._stream.seek(offset)
|
||||
return self._stream.read(size)
|
||||
|
||||
def _read_content(self, offset, size):
|
||||
return self._read_raw(self.content_offset + offset, size)
|
||||
|
||||
def _read_secondary_header(self):
|
||||
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
|
||||
bytes = self._read_raw(offset, self.sec_hdr_len)
|
||||
offset = int32(bytes[4:])
|
||||
while offset < len(bytes):
|
||||
blocktype = bytes[offset:offset+4]
|
||||
blockver = u32(bytes[offset+4:])
|
||||
if blocktype == 'CAOL':
|
||||
if blockver != 2:
|
||||
raise LitError(
|
||||
'Unknown CAOL block format %d' % blockver)
|
||||
self.creator_id = u32(bytes[offset+12:])
|
||||
self.entry_chunklen = u32(bytes[offset+20:])
|
||||
self.count_chunklen = u32(bytes[offset+24:])
|
||||
self.entry_unknown = u32(bytes[offset+28:])
|
||||
self.count_unknown = u32(bytes[offset+32:])
|
||||
offset += 48
|
||||
elif blocktype == 'ITSF':
|
||||
if blockver != 4:
|
||||
raise LitError(
|
||||
'Unknown ITSF block format %d' % blockver)
|
||||
if u32(bytes[offset+4+16:]):
|
||||
raise LitError('This file has a 64bit content offset')
|
||||
self.content_offset = u32(bytes[offset+16:])
|
||||
self.timestamp = u32(bytes[offset+24:])
|
||||
self.language_id = u32(bytes[offset+28:])
|
||||
offset += 48
|
||||
if not hasattr(self, 'content_offset'):
|
||||
raise LitError('Could not figure out the content offset')
|
||||
|
||||
def _read_header_pieces(self):
|
||||
src = self.header[self.hdr_len:]
|
||||
for i in range(self.num_pieces):
|
||||
piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
|
||||
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
|
||||
raise LitError('Piece %s has 64bit value' % repr(piece))
|
||||
offset, size = u32(piece), int32(piece[8:])
|
||||
piece = self._read_raw(offset, size)
|
||||
if i == 0:
|
||||
continue # Dont need this piece
|
||||
elif i == 1:
|
||||
if u32(piece[8:]) != self.entry_chunklen or \
|
||||
u32(piece[12:]) != self.entry_unknown:
|
||||
raise LitError('Secondary header does not match piece')
|
||||
self._read_directory(piece)
|
||||
elif i == 2:
|
||||
if u32(piece[8:]) != self.count_chunklen or \
|
||||
u32(piece[12:]) != self.count_unknown:
|
||||
raise LitError('Secondary header does not match piece')
|
||||
continue # No data needed from this piece
|
||||
elif i == 3:
|
||||
self.piece3_guid = piece
|
||||
elif i == 4:
|
||||
self.piece4_guid = piece
|
||||
|
||||
def _read_directory(self, piece):
|
||||
if not piece.startswith('IFCM'):
|
||||
raise LitError('Header piece #1 is not main directory.')
|
||||
chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
|
||||
if (32 + (num_chunks * chunk_size)) != len(piece):
|
||||
raise LitError('IFCM HEADER has incorrect length')
|
||||
for i in xrange(num_chunks):
|
||||
offset = 32 + (i * chunk_size)
|
||||
chunk = piece[offset:offset + chunk_size]
|
||||
tag, chunk = chunk[:4], chunk[4:]
|
||||
if tag != 'AOLL': continue
|
||||
remaining, chunk = int32(chunk[:4]), chunk[4:]
|
||||
if remaining >= chunk_size:
|
||||
raise LitError('AOLL remaining count is negative')
|
||||
remaining = chunk_size - (remaining + 48)
|
||||
entries = u16(chunk[-2:])
|
||||
if entries == 0:
|
||||
# Hopefully will work even without a correct entries count
|
||||
entries = (2 ** 16) - 1
|
||||
chunk = chunk[40:]
|
||||
for j in xrange(entries):
|
||||
if remaining <= 0: break
|
||||
namelen, chunk, remaining = encint(chunk, remaining)
|
||||
if namelen != (namelen & 0x7fffffff):
|
||||
raise LitError('Directory entry had 64bit name length.')
|
||||
if namelen > remaining - 3:
|
||||
raise LitError('Read past end of directory chunk')
|
||||
name, chunk = chunk[:namelen], chunk[namelen:]
|
||||
section, chunk, remaining = encint(chunk, remaining)
|
||||
offset, chunk, remaining = encint(chunk, remaining)
|
||||
size, chunk, remaining = encint(chunk, remaining)
|
||||
entry = DirectoryEntry(name, section, offset, size)
|
||||
self.entries[name] = entry
|
||||
|
||||
def _read_section_names(self):
|
||||
if '::DataSpace/NameList' not in self.entries:
|
||||
raise LitError('Lit file does not have a valid NameList')
|
||||
raw = self.get_file('::DataSpace/NameList')
|
||||
if len(raw) < 4:
|
||||
raise LitError('Invalid Namelist section')
|
||||
pos = 4
|
||||
self.num_sections = u16(raw[2:pos])
|
||||
self.section_names = [""]*self.num_sections
|
||||
self.section_data = [None]*self.num_sections
|
||||
for section in range(self.num_sections):
|
||||
size = u16(raw[pos:pos+2])
|
||||
pos += 2
|
||||
size = size*2 + 2
|
||||
if pos + size > len(raw):
|
||||
raise LitError('Invalid Namelist section')
|
||||
self.section_names[section] = \
|
||||
raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
|
||||
pos += size
|
||||
|
||||
def _read_manifest(self):
|
||||
if '/manifest' not in self.entries:
|
||||
raise LitError('Lit file does not have a valid manifest')
|
||||
raw = self.get_file('/manifest')
|
||||
self.manifest = {}
|
||||
while raw:
|
||||
slen, raw = ord(raw[0]), raw[1:]
|
||||
if slen == 0: break
|
||||
root, raw = raw[:slen].decode('utf8'), raw[slen:]
|
||||
if not raw:
|
||||
raise LitError('Truncated manifest')
|
||||
for state in ['spine', 'not spine', 'css', 'images']:
|
||||
num_files, raw = int32(raw), raw[4:]
|
||||
if num_files == 0: continue
|
||||
for i in xrange(num_files):
|
||||
if len(raw) < 5:
|
||||
raise LitError('Truncated manifest')
|
||||
offset, raw = u32(raw), raw[4:]
|
||||
slen, raw = ord(raw[0]), raw[1:]
|
||||
internal, raw = raw[:slen].decode('utf8'), raw[slen:]
|
||||
slen, raw = ord(raw[0]), raw[1:]
|
||||
original, raw = raw[:slen].decode('utf8'), raw[slen:]
|
||||
slen, raw = ord(raw[0]), raw[1:]
|
||||
mime_type, raw = raw[:slen].decode('utf8'), raw[slen+1:]
|
||||
self.manifest[internal] = ManifestItem(
|
||||
original, internal, mime_type, offset, root, state)
|
||||
mlist = self.manifest.values()
|
||||
shared = mlist[0].path
|
||||
for item in mlist[1:]:
|
||||
path = item.path
|
||||
while not path.startswith(shared):
|
||||
shared = shared[:-1]
|
||||
if shared == '':
|
||||
break
|
||||
else:
|
||||
slen = len(shared)
|
||||
for item in mlist:
|
||||
item.path = item.path[slen:]
|
||||
|
||||
def _read_meta(self):
|
||||
raw = self.get_file('/meta')
|
||||
try:
|
||||
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
|
||||
except LitError:
|
||||
if 'PENGUIN group' not in raw: raise
|
||||
print "WARNING: attempting PENGUIN malformed OPF fix"
|
||||
raw = raw.replace(
|
||||
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
|
||||
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
|
||||
self.meta = xml
|
||||
|
||||
def _read_drm(self):
|
||||
self.drmlevel = 0
|
||||
if '/DRMStorage/Licenses/EUL' in self.entries:
|
||||
self.drmlevel = 5
|
||||
elif '/DRMStorage/DRMBookplate' in self.entries:
|
||||
self.drmlevel = 3
|
||||
elif '/DRMStorage/DRMSealed' in self.entries:
|
||||
self.drmlevel = 1
|
||||
else:
|
||||
return
|
||||
des = msdes.new(self._calculate_deskey())
|
||||
bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed'))
|
||||
if bookkey[0] != '\000':
|
||||
raise LitError('Unable to decrypt title key!')
|
||||
self.bookkey = bookkey[1:9]
|
||||
|
||||
def _calculate_deskey(self):
|
||||
hashfiles = ['/meta', '/DRMStorage/DRMSource']
|
||||
if self.drmlevel == 3:
|
||||
hashfiles.append('/DRMStorage/DRMBookplate')
|
||||
prepad = 2
|
||||
hash = mssha1.new()
|
||||
for name in hashfiles:
|
||||
data = self.get_file(name)
|
||||
if prepad > 0:
|
||||
data = ("\000" * prepad) + data
|
||||
prepad = 0
|
||||
postpad = 64 - (len(data) % 64)
|
||||
if postpad < 64:
|
||||
data = data + ("\000" * postpad)
|
||||
hash.update(data)
|
||||
digest = hash.digest()
|
||||
key = [0] * 8
|
||||
for i in xrange(0, len(digest)):
|
||||
key[i % 8] ^= ord(digest[i])
|
||||
return ''.join(chr(x) for x in key)
|
||||
|
||||
def get_markup_file(self, name):
|
||||
raw = self.get_file(name)
|
||||
decl, map = (OPF_DECL, OPF_MAP) \
|
||||
if name == '/meta' else (HTML_DECL, HTML_MAP)
|
||||
xml = decl + unicode(UnBinary(raw, self.manifest, map))
|
||||
return xml
|
||||
|
||||
def get_file(self, name):
|
||||
entry = self.entries[name]
|
||||
if entry.section == 0:
|
||||
return self._read_content(entry.offset, entry.size)
|
||||
section = self.get_section(entry.section)
|
||||
return section[entry.offset:entry.offset+entry.size]
|
||||
|
||||
def get_section(self, section):
|
||||
data = self.section_data[section]
|
||||
if not data:
|
||||
data = self._get_section(section)
|
||||
self.section_data[section] = data
|
||||
return data
|
||||
|
||||
def _get_section(self, section):
|
||||
name = self.section_names[section]
|
||||
path = '::DataSpace/Storage/' + name
|
||||
transform = self.get_file(path + '/Transform/List')
|
||||
content = self.get_file(path + '/Content')
|
||||
control = self.get_file(path + '/ControlData')
|
||||
while len(transform) >= 16:
|
||||
csize = (int32(control) + 1) * 4
|
||||
if csize > len(control) or csize <= 0:
|
||||
raise LitError("ControlData is too short")
|
||||
guid = msguid(transform)
|
||||
if guid == DESENCRYPT_GUID:
|
||||
content = self._decrypt(content)
|
||||
control = control[csize:]
|
||||
elif guid == LZXCOMPRESS_GUID:
|
||||
reset_table = self.get_file(
|
||||
'/'.join(('::DataSpace/Storage', name, 'Transform',
|
||||
LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
|
||||
content = self._decompress(content, control, reset_table)
|
||||
control = control[csize:]
|
||||
else:
|
||||
raise LitError("Unrecognized transform: %s." % repr(guid))
|
||||
transform = transform[16:]
|
||||
return content
|
||||
|
||||
def _decrypt(self, content):
|
||||
if self.drmlevel == 5:
|
||||
raise LitError('Cannot extract content from a DRM protected ebook')
|
||||
return msdes.new(self.bookkey).decrypt(content)
|
||||
|
||||
def _decompress(self, content, control, reset_table):
|
||||
if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC":
|
||||
raise LitError("Invalid ControlData tag value")
|
||||
if len(reset_table) < (RESET_INTERVAL + 8):
|
||||
raise LitError("Reset table is too short")
|
||||
if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
|
||||
raise LitError("Reset table has 64bit value for UCLENGTH")
|
||||
|
||||
result = []
|
||||
|
||||
window_size = 14
|
||||
u = u32(control[CONTROL_WINDOW_SIZE:])
|
||||
while u > 0:
|
||||
u >>= 1
|
||||
window_size += 1
|
||||
if window_size < 15 or window_size > 21:
|
||||
raise LitError("Invalid window in ControlData")
|
||||
lzx.init(window_size)
|
||||
|
||||
ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8
|
||||
uclength = int32(reset_table[RESET_UCLENGTH:])
|
||||
accum = int32(reset_table[RESET_INTERVAL:])
|
||||
bytes_remaining = uclength
|
||||
window_bytes = (1 << window_size)
|
||||
base = 0
|
||||
|
||||
while ofs_entry < len(reset_table):
|
||||
if accum >= window_bytes:
|
||||
accum = 0
|
||||
size = int32(reset_table[ofs_entry:])
|
||||
u = int32(reset_table[ofs_entry + 4:])
|
||||
if u != 0:
|
||||
raise LitError("Reset table entry greater than 32 bits")
|
||||
if size >= len(content):
|
||||
raise("Reset table entry out of bounds")
|
||||
if bytes_remaining >= window_bytes:
|
||||
lzx.reset()
|
||||
result.append(
|
||||
lzx.decompress(content[base:size], window_bytes))
|
||||
bytes_remaining -= window_bytes
|
||||
base = size
|
||||
accum += int32(reset_table[RESET_INTERVAL:])
|
||||
ofs_entry += 8
|
||||
if bytes_remaining < window_bytes and bytes_remaining > 0:
|
||||
lzx.reset()
|
||||
result.append(lzx.decompress(content[base:], bytes_remaining))
|
||||
bytes_remaining = 0
|
||||
if bytes_remaining > 0:
|
||||
raise LitError("Failed to completely decompress section")
|
||||
return ''.join(result)
|
||||
|
||||
def extract_content(self, output_dir=os.getcwdu()):
|
||||
output_dir = os.path.abspath(output_dir)
|
||||
try:
|
||||
opf_path = os.path.splitext(
|
||||
os.path.basename(self._stream.name))[0] + '.opf'
|
||||
except AttributeError:
|
||||
opf_path = 'content.opf'
|
||||
opf_path = os.path.join(output_dir, opf_path)
|
||||
self._ensure_dir(opf_path)
|
||||
with open(opf_path, 'w') as f:
|
||||
f.write(self.meta.encode('utf-8'))
|
||||
for entry in self.manifest.values():
|
||||
path = os.path.join(output_dir, entry.path)
|
||||
self._ensure_dir(path)
|
||||
with open(path, 'w') as f:
|
||||
if 'spine' in entry.state:
|
||||
name = '/'.join(('/data', entry.internal, 'content'))
|
||||
f.write(self.get_markup_file(name).encode('utf-8'))
|
||||
else:
|
||||
name = '/'.join(('/data', entry.internal))
|
||||
f.write(self.get_file(name))
|
||||
|
||||
def _ensure_dir(self, path):
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.isdir(dir):
|
||||
os.makedirs(dir)
|
||||
|
||||
def option_parser():
|
||||
from calibre import OptionParser
|
||||
parser = OptionParser(usage=_('%prog [options] LITFILE'))
|
||||
parser.add_option(
|
||||
'-o', '--output-dir', default='.',
|
||||
help=_('Output directory. Defaults to current directory.'))
|
||||
parser.add_option(
|
||||
'--verbose', default=False, action='store_true',
|
||||
help='Useful for debugging.')
|
||||
return parser
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
if len(args) != 2:
|
||||
parser.print_help()
|
||||
return 1
|
||||
lr = LitReader(args[1])
|
||||
lr.extract_content(opts.output_dir)
|
||||
print _('OEB ebook created in'), opts.output_dir
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -1,734 +1,25 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
Support for reading the metadata from a lit file.
|
||||
Support for reading the metadata from a LIT file.
|
||||
'''
|
||||
|
||||
import sys, struct, cStringIO, os
|
||||
from itertools import repeat
|
||||
import sys, cStringIO, os
|
||||
|
||||
from calibre import relpath
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf import OPFReader
|
||||
|
||||
OPF_ATTR_MAP = [
|
||||
None,
|
||||
"href",
|
||||
"%never-used",
|
||||
"%guid",
|
||||
"%minimum_level",
|
||||
"%attr5",
|
||||
"id",
|
||||
"href",
|
||||
"media-type",
|
||||
"fallback",
|
||||
"idref",
|
||||
"xmlns:dc",
|
||||
"xmlns:oebpackage",
|
||||
"role",
|
||||
"file-as",
|
||||
"event",
|
||||
"scheme",
|
||||
"title",
|
||||
"type",
|
||||
"unique-identifier",
|
||||
"name",
|
||||
"content",
|
||||
"xml:lang",
|
||||
]
|
||||
|
||||
OPF_TAG_MAP = [
|
||||
None,
|
||||
"package",
|
||||
"dc:Title",
|
||||
"dc:Creator",
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
"manifest",
|
||||
"item",
|
||||
"spine",
|
||||
"itemref",
|
||||
"metadata",
|
||||
"dc-metadata",
|
||||
"dc:Subject",
|
||||
"dc:Description",
|
||||
"dc:Publisher",
|
||||
"dc:Contributor",
|
||||
"dc:Date",
|
||||
"dc:Type",
|
||||
"dc:Format",
|
||||
"dc:Identifier",
|
||||
"dc:Source",
|
||||
"dc:Language",
|
||||
"dc:Relation",
|
||||
"dc:Coverage",
|
||||
"dc:Rights",
|
||||
"x-metadata",
|
||||
"meta",
|
||||
"tours",
|
||||
"tour",
|
||||
"site",
|
||||
"guide",
|
||||
"reference",
|
||||
None,
|
||||
]
|
||||
|
||||
class DirectoryEntry(object):
|
||||
def __init__(self, name, section, offset, size):
|
||||
self.name = name
|
||||
self.section = section
|
||||
self.offset = offset
|
||||
self.size = size
|
||||
|
||||
def __repr__(self):
|
||||
return '%s\n\tSection: %d\n\tOffset: %d\n\tSize: %d\n'%(self.name,
|
||||
self.section, self.offset, self.size)
|
||||
|
||||
def __str__(self):
|
||||
return repr(self)
|
||||
|
||||
class LitReadError(Exception):
|
||||
pass
|
||||
|
||||
def u32(bytes):
|
||||
b = struct.unpack('BBBB', bytes[:4])
|
||||
return b[0] + (b[1] << 8) + (b[2] << 16) + (b[3] << 32)
|
||||
|
||||
def u16(bytes):
|
||||
b = struct.unpack('BB', bytes[:2])
|
||||
return b[0] + (b[1] << 8)
|
||||
|
||||
def int32(bytes):
|
||||
return u32(bytes)&0x7FFFFFFF
|
||||
|
||||
def encint(bytes, remaining):
|
||||
pos, val = 0, 0
|
||||
while remaining > 0:
|
||||
b = ord(bytes[pos])
|
||||
pos += 1
|
||||
remaining -= 1
|
||||
val <<= 7
|
||||
val |= (b & 0x7f)
|
||||
if b & 0x80 == 0: break
|
||||
return val, bytes[pos:], remaining
|
||||
|
||||
def read_utf8_char(bytes, pos):
|
||||
c = ord(bytes[pos])
|
||||
mask = 0x80
|
||||
if (c & mask):
|
||||
elsize = 0
|
||||
while c & mask:
|
||||
mask >>= 1
|
||||
elsize += 1
|
||||
if (mask <= 1) or (mask == 0x40):
|
||||
raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos])))
|
||||
else:
|
||||
elsize = 1
|
||||
|
||||
|
||||
if elsize > 1:
|
||||
if elsize + pos > len(bytes):
|
||||
raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos])))
|
||||
c &= (mask - 1)
|
||||
for i in range(1, elsize):
|
||||
b = ord(bytes[pos+i])
|
||||
if (b & 0xC0) != 0x80:
|
||||
raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos:pos+i])))
|
||||
c = (c << 6) | (b & 0x3F)
|
||||
return unichr(c), pos+elsize
|
||||
|
||||
FLAG_OPENING = 1
|
||||
FLAG_CLOSING = 2
|
||||
FLAG_BLOCK = 4
|
||||
FLAG_HEAD = 8
|
||||
FLAG_ATOM = 16
|
||||
XML_ENTITIES = ['&', ''', '<', '>', '"']
|
||||
|
||||
class UnBinary(object):
|
||||
def __init__(self, bin, manifest, attr_map=OPF_ATTR_MAP, tag_map=OPF_TAG_MAP,
|
||||
tag_to_attr_map=[[] for i in range(43)]):
|
||||
self.manifest = manifest
|
||||
self.pending_indent = 0
|
||||
self.lingering_space = 0
|
||||
self.was_in_text = 0
|
||||
self.attr_map = attr_map
|
||||
self.tag_map = tag_map
|
||||
self.tag_to_attr_map = tag_to_attr_map
|
||||
self.opf = self.attr_map is OPF_ATTR_MAP
|
||||
self.bin = bin
|
||||
self.buf = cStringIO.StringIO()
|
||||
self.ampersands = []
|
||||
self.binary_to_text()
|
||||
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
|
||||
self.escape_ampersands()
|
||||
|
||||
def escape_ampersands(self):
|
||||
offset = 0
|
||||
for pos in self.ampersands:
|
||||
test = self.raw[pos+offset:pos+offset+6]
|
||||
if test.startswith('&#') and ';' in test:
|
||||
continue
|
||||
escape = True
|
||||
for ent in XML_ENTITIES:
|
||||
if test.startswith(ent):
|
||||
escape = False
|
||||
break
|
||||
if not escape:
|
||||
continue
|
||||
self.raw = self.raw[:pos+offset] + '&' + self.raw[pos+offset+1:]
|
||||
offset += 4
|
||||
|
||||
|
||||
def write_spaces(self, depth):
|
||||
self.buf.write(u''.join(repeat(' ', depth)))
|
||||
|
||||
def item_path(self, internal_id):
|
||||
for i in self.manifest:
|
||||
if i == internal_id:
|
||||
return i.path
|
||||
raise LitReadError('Could not find item %s'%(internal_id,))
|
||||
|
||||
def __unicode__(self):
|
||||
return self.raw
|
||||
|
||||
def binary_to_text(self, base=0, depth=0):
|
||||
space_enabled, saved_space_enabled = 1, 0
|
||||
was_indented, is_goingdown = 0, 0
|
||||
tag_name = current_map = None
|
||||
dynamic_tag = errors = in_censorship = 0
|
||||
|
||||
state = 'text'
|
||||
index = base
|
||||
flags = 0
|
||||
|
||||
while index < len(self.bin):
|
||||
c, index = read_utf8_char(self.bin, index)
|
||||
if state == 'text':
|
||||
if ord(c) == 0:
|
||||
state = 'get flags'
|
||||
continue
|
||||
if (not self.was_in_text) or space_enabled:
|
||||
space_enabled = 0;
|
||||
if c in (' ', '\t', '\n', '\r'):
|
||||
space_enabled += 1
|
||||
else:
|
||||
self.was_in_text = 1
|
||||
if c == '\v':
|
||||
c = '\n'
|
||||
pending_indent = 0
|
||||
if c == '&':
|
||||
self.ampersands.append(self.buf.tell()-1)
|
||||
self.buf.write(c.encode('utf-8') if isinstance(c, unicode) else c)
|
||||
elif state == 'get flags':
|
||||
if ord(c) == 0:
|
||||
state = 'text'
|
||||
continue
|
||||
flags = ord(c)
|
||||
state = 'get tag'
|
||||
elif state == 'get tag':
|
||||
state = 'text' if ord(c) == 0 else 'get attr'
|
||||
if flags & FLAG_OPENING:
|
||||
if space_enabled and ((not self.was_in_text) or (flags &(FLAG_BLOCK|FLAG_HEAD))):
|
||||
self.pending_indent += 1
|
||||
if self.pending_indent or self.opf:
|
||||
was_indented += 1
|
||||
self.buf.write(u'\n')
|
||||
self.write_spaces(depth)
|
||||
pending_indent = 0
|
||||
if (flags & FLAG_HEAD) or (flags & FLAG_BLOCK) or \
|
||||
self.opf or depth == 0:
|
||||
pending_indent = 1
|
||||
tag = ord(c)
|
||||
self.buf.write('<')
|
||||
if not (flags & FLAG_CLOSING):
|
||||
is_goingdown = 1
|
||||
if tag == 0x8000:
|
||||
state = 'get custom length'
|
||||
continue
|
||||
if flags & FLAG_ATOM:
|
||||
raise LitReadError('TODO: Atoms not yet implemented')
|
||||
elif tag < len(self.tag_map):
|
||||
tag_name = self.tag_map[tag]
|
||||
current_map = self.tag_to_attr_map[tag]
|
||||
else:
|
||||
dynamic_tag += 1
|
||||
errors += 1
|
||||
tag_name = '?'+unichr(tag)+'?'
|
||||
current_map = self.tag_to_attr_map[tag]
|
||||
print 'WARNING: tag %s unknown'%(unichr(tag),)
|
||||
|
||||
self.buf.write(unicode(tag_name).encode('utf-8'))
|
||||
elif flags & FLAG_CLOSING:
|
||||
#if depth == 0:
|
||||
# raise LitReadError('Extra closing tag')
|
||||
self.lingering_space = space_enabled
|
||||
return index
|
||||
elif state == 'get attr':
|
||||
in_censorship = 0
|
||||
if ord(c) == 0:
|
||||
if not is_goingdown:
|
||||
tag_name = None
|
||||
dynamic_tag = 0
|
||||
self.buf.write(' />')
|
||||
else:
|
||||
self.buf.write('>')
|
||||
if not self.opf and (flags & (FLAG_BLOCK|FLAG_HEAD)):
|
||||
pending_indent += 1
|
||||
index = self.binary_to_text(base=index, depth=depth+1)
|
||||
is_goingdown = 0
|
||||
if not tag_name:
|
||||
raise LitReadError('Tag ends before it begins.')
|
||||
saved_space_enabled = space_enabled
|
||||
space_enabled = self.lingering_space
|
||||
if space_enabled and was_indented and not self.was_in_text:
|
||||
self.buf.write('\n')
|
||||
self.write_spaces(depth)
|
||||
self.buf.write('</'+tag_name+'>')
|
||||
if (space_enabled and self.opf) or (flags & (FLAG_BLOCK|FLAG_HEAD)):
|
||||
self.pending_indent += 1
|
||||
dynamic_tag = 0
|
||||
tag_name = None
|
||||
space_enabled = saved_space_enabled
|
||||
|
||||
self.was_in_text = 0
|
||||
state = 'text'
|
||||
else:
|
||||
if ord(c) == 0x8000:
|
||||
state = 'get attr length'
|
||||
continue
|
||||
attr = None
|
||||
if ord(c) < len(current_map) and current_map[ord(c)]:
|
||||
attr = current_map[ord(c)]
|
||||
elif ord(c) < len(self.attr_map):
|
||||
attr = self.attr_map[ord(c)]
|
||||
|
||||
if not attr or not isinstance(attr, basestring):
|
||||
raise LitReadError('Unknown attribute %d in tag %s'%(ord(c), tag_name))
|
||||
|
||||
if attr.startswith('%'):
|
||||
in_censorship = 1
|
||||
state = 'get value length'
|
||||
continue
|
||||
|
||||
self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
|
||||
if attr in ['href', 'src']:
|
||||
state = 'get href'
|
||||
else:
|
||||
state = 'get value length'
|
||||
elif state == 'get value length':
|
||||
if not in_censorship:
|
||||
self.buf.write('"')
|
||||
char_count = ord(c) - 1
|
||||
if not char_count:
|
||||
if not in_censorship:
|
||||
self.buf.write('"')
|
||||
in_censorship = 0
|
||||
state = 'get attr'
|
||||
state = 'get value'
|
||||
if ord(c) == 0xffff:
|
||||
continue
|
||||
if char_count < 0 or char_count > len(self.bin)-index:
|
||||
raise LitReadError('Invalid character count %d'%(char_count,))
|
||||
elif state == 'get value':
|
||||
if char_count == 0xfffe:
|
||||
if not in_censorship:
|
||||
self.buf.write(str(ord(c)-1))
|
||||
in_censorship = 0
|
||||
state = 'get attr'
|
||||
elif char_count:
|
||||
if not in_censorship:
|
||||
self.buf.write(c)
|
||||
char_count -= 1
|
||||
if not char_count:
|
||||
if not in_censorship:
|
||||
self.buf.write('"')
|
||||
in_censorship = 0
|
||||
state = 'get attr'
|
||||
elif state == 'get custom length':
|
||||
char_count = ord(c) - 1
|
||||
if char_count <= 0 or char_count > len(self.bin)-index:
|
||||
raise LitReadError('Invalid character count %d'%(char_count,))
|
||||
dynamic_tag += 1
|
||||
state = 'get custom'
|
||||
tag_name = ''
|
||||
elif state == 'get custom':
|
||||
tag += c
|
||||
char_count -= 1
|
||||
if not char_count:
|
||||
self.buf.write(tag_name)
|
||||
state = 'get attr'
|
||||
elif state == 'get attr length':
|
||||
char_count = ord(c) - 1
|
||||
if char_count <= 0 or char_count > len(self.bin)-index:
|
||||
raise LitReadError('Invalid character count %d'%(char_count,))
|
||||
self.buf.write(' ')
|
||||
state = 'get custom attr'
|
||||
elif state == 'get custom attr':
|
||||
self.buf.write(c)
|
||||
char_count -= 1
|
||||
if not char_count:
|
||||
self.buf.write('=')
|
||||
state = 'get value length'
|
||||
elif state == 'get href':
|
||||
char_count = ord(c) - 1
|
||||
if char_count <= 0:
|
||||
raise LitReadError('Invalid character count %d'%(char_count,))
|
||||
href = self.bin[index+1:index+char_count].decode('ascii')
|
||||
index += char_count
|
||||
doc, m, frag = href.partition('#')
|
||||
path = self.item_path(doc)
|
||||
if m and frag:
|
||||
path += m+frag
|
||||
self.buf.write((u'"%s"'%(path,)).encode('utf-8'))
|
||||
state = 'get attr'
|
||||
|
||||
self.lingering_space = space_enabled
|
||||
return index
|
||||
|
||||
class ManifestItem(object):
|
||||
|
||||
def __init__(self, original, internal, mime_type, offset, root, state):
|
||||
self.original = original
|
||||
self.internal = internal
|
||||
self.mime_type = mime_type
|
||||
self.offset = offset
|
||||
self.root = root
|
||||
self.state = state
|
||||
self.prefix = 'images' if state == 'images' else 'css' if state == 'css' else ''
|
||||
self.prefix = self.prefix + os.sep if self.prefix else ''
|
||||
self.path = self.prefix + self.original
|
||||
|
||||
def __eq__(self, other):
|
||||
if hasattr(other, 'internal'):
|
||||
return self.internal == other.internal
|
||||
return self.internal == other
|
||||
|
||||
def __repr__(self):
|
||||
return self.internal + u'->' + self.path
|
||||
|
||||
class LitFile(object):
|
||||
|
||||
PIECE_SIZE = 16
|
||||
|
||||
@apply
|
||||
def magic():
|
||||
def fget(self):
|
||||
opos = self._stream.tell()
|
||||
self._stream.seek(0)
|
||||
val = self._stream.read(8)
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
return property(fget=fget)
|
||||
|
||||
@apply
|
||||
def version():
|
||||
def fget(self):
|
||||
opos = self._stream.tell()
|
||||
self._stream.seek(8)
|
||||
val = u32(self._stream.read(4))
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
return property(fget=fget)
|
||||
|
||||
@apply
|
||||
def hdr_len():
|
||||
def fget(self):
|
||||
opos = self._stream.tell()
|
||||
self._stream.seek(12)
|
||||
val = int32(self._stream.read(4))
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
return property(fget=fget)
|
||||
|
||||
@apply
|
||||
def num_pieces():
|
||||
def fget(self):
|
||||
opos = self._stream.tell()
|
||||
self._stream.seek(16)
|
||||
val = int32(self._stream.read(4))
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
return property(fget=fget)
|
||||
|
||||
@apply
|
||||
def sec_hdr_len():
|
||||
def fget(self):
|
||||
opos = self._stream.tell()
|
||||
self._stream.seek(20)
|
||||
val = int32(self._stream.read(4))
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
return property(fget=fget)
|
||||
|
||||
@apply
|
||||
def guid():
|
||||
def fget(self):
|
||||
opos = self._stream.tell()
|
||||
self._stream.seek(24)
|
||||
val = self._stream.read(16)
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
return property(fget=fget)
|
||||
|
||||
@apply
|
||||
def header():
|
||||
def fget(self):
|
||||
opos = self._stream.tell()
|
||||
size = self.hdr_len + self.num_pieces*self.PIECE_SIZE + self.sec_hdr_len
|
||||
self._stream.seek(0)
|
||||
val = self._stream.read(size)
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
return property(fget=fget)
|
||||
|
||||
def __init__(self, stream):
|
||||
self._stream = stream
|
||||
if self.magic != 'ITOLITLS':
|
||||
raise LitReadError('Not a valid LIT file')
|
||||
if self.version != 1:
|
||||
raise LitReadError('Unknown LIT version %d'%(self.version,))
|
||||
self.read_secondary_header()
|
||||
self.read_header_pieces()
|
||||
|
||||
|
||||
def read_secondary_header(self):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
|
||||
bytes = self._stream.read(self.sec_hdr_len)
|
||||
offset = int32(bytes[4:])
|
||||
|
||||
while offset < len(bytes):
|
||||
blocktype = bytes[offset:offset+4]
|
||||
blockver = u32(bytes[offset+4:])
|
||||
|
||||
if blocktype == 'CAOL':
|
||||
if blockver != 2:
|
||||
raise LitReadError('Unknown CAOL block format %d'%(blockver,))
|
||||
self.creator_id = u32(bytes[offset+12:])
|
||||
self.entry_chunklen = u32(bytes[offset+20:])
|
||||
self.count_chunklen = u32(bytes[offset+24:])
|
||||
self.entry_unknown = u32(bytes[offset+28:])
|
||||
self.count_unknown = u32(bytes[offset+32:])
|
||||
offset += 48
|
||||
elif blocktype == 'ITSF':
|
||||
if blockver != 4:
|
||||
raise LitReadError('Unknown ITSF block format %d'%(blockver,))
|
||||
if u32(bytes[offset+4+16:]):
|
||||
raise LitReadError('This file has a 64bit content offset')
|
||||
self.content_offset = u32(bytes[offset+16:])
|
||||
self.timestamp = u32(bytes[offset+24:])
|
||||
self.language_id = u32(bytes[offset+28:])
|
||||
offset += 48
|
||||
|
||||
if not hasattr(self, 'content_offset'):
|
||||
raise LitReadError('Could not figure out the content offset')
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
|
||||
def read_header_pieces(self):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
src = self.header[self.hdr_len:]
|
||||
for i in range(self.num_pieces):
|
||||
piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
|
||||
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
|
||||
raise LitReadError('Piece %s has 64bit value'%(repr(piece),))
|
||||
offset, size = u32(piece), int32(piece[8:])
|
||||
self._stream.seek(offset)
|
||||
piece = self._stream.read(size)
|
||||
if i == 0:
|
||||
continue # Dont need this piece
|
||||
elif i == 1:
|
||||
if u32(piece[8:]) != self.entry_chunklen or \
|
||||
u32(piece[12:]) != self.entry_unknown:
|
||||
raise LitReadError('Secondary header does not match piece')
|
||||
self.read_directory(piece)
|
||||
elif i == 2:
|
||||
if u32(piece[8:]) != self.count_chunklen or \
|
||||
u32(piece[12:]) != self.count_unknown:
|
||||
raise LitReadError('Secondary header does not match piece')
|
||||
continue # No data needed from this piece
|
||||
elif i == 3:
|
||||
self.piece3_guid = piece
|
||||
elif i == 4:
|
||||
self.piece4_guid = piece
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
|
||||
def read_directory(self, piece):
|
||||
self.entries = []
|
||||
if not piece.startswith('IFCM'):
|
||||
raise LitReadError('Header piece #1 is not main directory.')
|
||||
chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
|
||||
|
||||
if 32 + chunk_size*num_chunks != len(piece):
|
||||
raise LitReadError('IFCM HEADER has incorrect length')
|
||||
|
||||
for chunk in range(num_chunks):
|
||||
p = 32 + chunk*chunk_size
|
||||
if piece[p:p+4] != 'AOLL':
|
||||
continue
|
||||
remaining = chunk_size - int32(piece[p+4:p+8]) - 48
|
||||
if remaining < 0:
|
||||
raise LitReadError('AOLL remaining count is negative')
|
||||
|
||||
entries = u16(piece[p+chunk_size-2:])
|
||||
|
||||
if entries <= 0: # Hopefully everything will work even without a correct entries count
|
||||
entries = (2**16)-1
|
||||
|
||||
piece = piece[p+48:]
|
||||
i = 0
|
||||
while i < entries:
|
||||
if remaining <= 0: break
|
||||
namelen, piece, remaining = encint(piece, remaining)
|
||||
if namelen != (namelen & 0x7fffffff):
|
||||
raise LitReadError('Directory entry had 64bit name length.')
|
||||
if namelen > remaining - 3:
|
||||
raise LitReadError('Read past end of directory chunk')
|
||||
name = piece[:namelen]
|
||||
piece = piece[namelen:]
|
||||
section, piece, remaining = encint(piece, remaining)
|
||||
offset, piece, remaining = encint(piece, remaining)
|
||||
size, piece, remaining = encint(piece, remaining)
|
||||
|
||||
entry = DirectoryEntry(name, section, offset, size)
|
||||
|
||||
if name == '::DataSpace/NameList':
|
||||
self.read_section_names(entry)
|
||||
elif name == '/manifest':
|
||||
self.read_manifest(entry)
|
||||
elif name == '/meta':
|
||||
self.read_meta(entry)
|
||||
self.entries.append(entry)
|
||||
i += 1
|
||||
|
||||
if not hasattr(self, 'sections'):
|
||||
raise LitReadError('Lit file does not have a valid NameList')
|
||||
|
||||
if not hasattr(self, 'manifest'):
|
||||
raise LitReadError('Lit file does not have a valid manifest')
|
||||
|
||||
def read_section_names(self, entry):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(self.content_offset + entry.offset)
|
||||
raw = self._stream.read(entry.size)
|
||||
if len(raw) < 4:
|
||||
raise LitReadError('Invalid Namelist section')
|
||||
pos = 4
|
||||
self.num_sections = u16(raw[2:pos])
|
||||
|
||||
self.sections = {}
|
||||
for section in range(self.num_sections):
|
||||
size = u16(raw[pos:pos+2])
|
||||
pos += 2
|
||||
size = size*2 + 2
|
||||
if pos + size > len(raw):
|
||||
raise LitReadError('Invalid Namelist section')
|
||||
self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
|
||||
pos += size
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
|
||||
def read_manifest(self, entry):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self.manifest = []
|
||||
self._stream.seek(self.content_offset + entry.offset)
|
||||
raw = self._stream.read(entry.size)
|
||||
pos = 0
|
||||
while pos < len(raw):
|
||||
size = ord(raw[pos])
|
||||
if size == 0: break
|
||||
pos += 1
|
||||
root = raw[pos:pos+size].decode('utf8')
|
||||
pos += size
|
||||
if pos >= len(raw):
|
||||
raise LitReadError('Truncated manifest.')
|
||||
for state in ['spine', 'not spine', 'css', 'images']:
|
||||
num_files = int32(raw[pos:pos+4])
|
||||
pos += 4
|
||||
if num_files == 0: continue
|
||||
|
||||
i = 0
|
||||
while i < num_files:
|
||||
if pos+5 >= len(raw):
|
||||
raise LitReadError('Truncated manifest.')
|
||||
offset = u32(raw[pos:pos+4])
|
||||
pos += 4
|
||||
|
||||
slen = ord(raw[pos])
|
||||
pos += 1
|
||||
internal = raw[pos:pos+slen].decode('utf8')
|
||||
pos += slen
|
||||
|
||||
slen = ord(raw[pos])
|
||||
pos += 1
|
||||
original = raw[pos:pos+slen].decode('utf8')
|
||||
pos += slen
|
||||
|
||||
slen = ord(raw[pos])
|
||||
pos += 1
|
||||
mime_type = raw[pos:pos+slen].decode('utf8')
|
||||
pos += slen +1
|
||||
|
||||
self.manifest.append(ManifestItem(original, internal, mime_type, offset, root, state))
|
||||
i += 1
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
|
||||
def read_meta(self, entry):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(self.content_offset + entry.offset)
|
||||
raw = self._stream.read(entry.size)
|
||||
|
||||
xml = \
|
||||
'''\
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE package
|
||||
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
|
||||
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
|
||||
'''+\
|
||||
unicode(UnBinary(raw, self.manifest))
|
||||
self.meta = xml
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
|
||||
def read_image(self, internal_name):
|
||||
cover_entry = None
|
||||
for entry in self.entries:
|
||||
if internal_name in entry.name:
|
||||
cover_entry = entry
|
||||
break
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(self.content_offset + cover_entry.offset)
|
||||
return self._stream.read(cover_entry.size)
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
from calibre.ebooks.lit.reader import LitReader
|
||||
|
||||
def get_metadata(stream):
|
||||
try:
|
||||
litfile = LitFile(stream)
|
||||
litfile = LitReader(stream)
|
||||
src = litfile.meta.encode('utf-8')
|
||||
mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd())
|
||||
cover_url, cover_item = mi.cover, None
|
||||
if cover_url:
|
||||
cover_url = relpath(cover_url, os.getcwd())
|
||||
for item in litfile.manifest:
|
||||
for item in litfile.manifest.values():
|
||||
if item.path == cover_url:
|
||||
cover_item = item.internal
|
||||
if cover_item is not None:
|
||||
@ -737,26 +28,28 @@ def get_metadata(stream):
|
||||
ext = 'jpg'
|
||||
else:
|
||||
ext = ext.lower()
|
||||
cd = litfile.read_image(cover_item)
|
||||
cd = litfile.get_file('/data/' + cover_item)
|
||||
mi.cover_data = (ext, cd) if cd else (None, None)
|
||||
except:
|
||||
title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown'
|
||||
mi = MetaInformation(title, ['Unknown'])
|
||||
return mi
|
||||
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
if len(args) != 2:
|
||||
print >>sys.stderr, _('Usage: %s file.lit')%(args[0],)
|
||||
print >>sys.stderr, _('Usage: %s file.lit') % args[0]
|
||||
return 1
|
||||
mi = get_metadata(open(args[1], 'rb'))
|
||||
fname = args[1]
|
||||
mi = get_metadata(open(fname, 'rb'))
|
||||
print unicode(mi)
|
||||
if mi.cover_data[1]:
|
||||
cover = os.path.abspath(os.path.splitext(os.path.basename(args[1]))[0] + '.' + mi.cover_data[0])
|
||||
cover = os.path.abspath(
|
||||
'.'.join((os.path.splitext(os.path.basename(fname))[0],
|
||||
mi.cover_data[0])))
|
||||
open(cover, 'wb').write(mi.cover_data[1])
|
||||
print _('Cover saved to'), cover
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
|
@ -46,6 +46,7 @@ entry_points = {
|
||||
'librarything = calibre.ebooks.metadata.library_thing:main',
|
||||
'mobi2oeb = calibre.ebooks.mobi.reader:main',
|
||||
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
|
||||
'lit2oeb = calibre.ebooks.lit.reader:main',
|
||||
'calibre-debug = calibre.debug:main',
|
||||
'calibredb = calibre.library.cli:main',
|
||||
'calibre-fontconfig = calibre.utils.fontconfig:main',
|
||||
|
5
src/calibre/utils/lzx-setup.py
Normal file
5
src/calibre/utils/lzx-setup.py
Normal file
@ -0,0 +1,5 @@
|
||||
from distutils.core import setup, Extension
|
||||
|
||||
setup(name="lzx", version="1.0",
|
||||
ext_modules=[Extension('lzx', sources=['lzx/lzxmodule.c', 'lzx/lzxd.c'],
|
||||
include_dirs=['lzx'])])
|
169
src/calibre/utils/lzx/lzx.h
Normal file
169
src/calibre/utils/lzx/lzx.h
Normal file
@ -0,0 +1,169 @@
|
||||
/* This file is part of libmspack.
|
||||
* (C) 2003-2004 Stuart Caie.
|
||||
*
|
||||
* The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
|
||||
* by Microsoft Corporation.
|
||||
*
|
||||
* libmspack is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
|
||||
*
|
||||
* For further details, see the file COPYING.LIB distributed with libmspack
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifndef MSPACK_LZX_H
|
||||
#define MSPACK_LZX_H 1
|
||||
|
||||
/* LZX compression / decompression definitions */
|
||||
|
||||
/* some constants defined by the LZX specification */
|
||||
#define LZX_MIN_MATCH (2)
|
||||
#define LZX_MAX_MATCH (257)
|
||||
#define LZX_NUM_CHARS (256)
|
||||
#define LZX_BLOCKTYPE_INVALID (0) /* also blocktypes 4-7 invalid */
|
||||
#define LZX_BLOCKTYPE_VERBATIM (1)
|
||||
#define LZX_BLOCKTYPE_ALIGNED (2)
|
||||
#define LZX_BLOCKTYPE_UNCOMPRESSED (3)
|
||||
#define LZX_PRETREE_NUM_ELEMENTS (20)
|
||||
#define LZX_ALIGNED_NUM_ELEMENTS (8) /* aligned offset tree #elements */
|
||||
#define LZX_NUM_PRIMARY_LENGTHS (7) /* this one missing from spec! */
|
||||
#define LZX_NUM_SECONDARY_LENGTHS (249) /* length tree #elements */
|
||||
|
||||
/* LZX huffman defines: tweak tablebits as desired */
|
||||
#define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS)
|
||||
#define LZX_PRETREE_TABLEBITS (6)
|
||||
#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)
|
||||
#define LZX_MAINTREE_TABLEBITS (12)
|
||||
#define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1)
|
||||
#define LZX_LENGTH_TABLEBITS (12)
|
||||
#define LZX_ALIGNED_MAXSYMBOLS (LZX_ALIGNED_NUM_ELEMENTS)
|
||||
#define LZX_ALIGNED_TABLEBITS (7)
|
||||
#define LZX_LENTABLE_SAFETY (64) /* table decoding overruns are allowed */
|
||||
|
||||
#define LZX_FRAME_SIZE (32768) /* the size of a frame in LZX */
|
||||
|
||||
struct lzxd_stream {
|
||||
struct mspack_system *sys; /* I/O routines */
|
||||
struct mspack_file *input; /* input file handle */
|
||||
struct mspack_file *output; /* output file handle */
|
||||
|
||||
off_t offset; /* number of bytes actually output */
|
||||
off_t length; /* overall decompressed length of stream */
|
||||
|
||||
unsigned char *window; /* decoding window */
|
||||
unsigned int window_size; /* window size */
|
||||
unsigned int window_posn; /* decompression offset within window */
|
||||
unsigned int frame_posn; /* current frame offset within in window */
|
||||
unsigned int frame; /* the number of 32kb frames processed */
|
||||
unsigned int reset_interval; /* which frame do we reset the compressor? */
|
||||
|
||||
unsigned int R0, R1, R2; /* for the LRU offset system */
|
||||
unsigned int block_length; /* uncompressed length of this LZX block */
|
||||
unsigned int block_remaining; /* uncompressed bytes still left to decode */
|
||||
|
||||
signed int intel_filesize; /* magic header value used for transform */
|
||||
signed int intel_curpos; /* current offset in transform space */
|
||||
|
||||
unsigned char intel_started; /* has intel E8 decoding started? */
|
||||
unsigned char block_type; /* type of the current block */
|
||||
unsigned char header_read; /* have we started decoding at all yet? */
|
||||
unsigned char posn_slots; /* how many posn slots in stream? */
|
||||
unsigned char input_end; /* have we reached the end of input? */
|
||||
|
||||
int error;
|
||||
|
||||
/* I/O buffering */
|
||||
unsigned char *inbuf, *i_ptr, *i_end, *o_ptr, *o_end;
|
||||
unsigned int bit_buffer, bits_left, inbuf_size;
|
||||
|
||||
/* huffman code lengths */
|
||||
unsigned char PRETREE_len [LZX_PRETREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
|
||||
unsigned char MAINTREE_len [LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
|
||||
unsigned char LENGTH_len [LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
|
||||
unsigned char ALIGNED_len [LZX_ALIGNED_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
|
||||
|
||||
/* huffman decoding tables */
|
||||
unsigned short PRETREE_table [(1 << LZX_PRETREE_TABLEBITS) +
|
||||
(LZX_PRETREE_MAXSYMBOLS * 2)];
|
||||
unsigned short MAINTREE_table[(1 << LZX_MAINTREE_TABLEBITS) +
|
||||
(LZX_MAINTREE_MAXSYMBOLS * 2)];
|
||||
unsigned short LENGTH_table [(1 << LZX_LENGTH_TABLEBITS) +
|
||||
(LZX_LENGTH_MAXSYMBOLS * 2)];
|
||||
unsigned short ALIGNED_table [(1 << LZX_ALIGNED_TABLEBITS) +
|
||||
(LZX_ALIGNED_MAXSYMBOLS * 2)];
|
||||
|
||||
/* this is used purely for doing the intel E8 transform */
|
||||
unsigned char e8_buf[LZX_FRAME_SIZE];
|
||||
};
|
||||
|
||||
/* allocates LZX decompression state for decoding the given stream.
|
||||
*
|
||||
* - returns NULL if window_bits is outwith the range 15 to 21 (inclusive).
|
||||
*
|
||||
* - uses system->alloc() to allocate memory
|
||||
*
|
||||
* - returns NULL if not enough memory
|
||||
*
|
||||
* - window_bits is the size of the LZX window, from 32Kb (15) to 2Mb (21).
|
||||
*
|
||||
* - reset_interval is how often the bitstream is reset, measured in
|
||||
* multiples of 32Kb bytes output. For CAB LZX streams, this is always 0
|
||||
* (does not occur).
|
||||
*
|
||||
* - input_buffer_size is how many bytes to use as an input bitstream buffer
|
||||
*
|
||||
* - output_length is the length in bytes of the entirely decompressed
|
||||
* output stream, if known in advance. It is used to correctly perform
|
||||
* the Intel E8 transformation, which must stop 6 bytes before the very
|
||||
* end of the decompressed stream. It is not otherwise used or adhered
|
||||
* to. If the full decompressed length is known in advance, set it here.
|
||||
* If it is NOT known, use the value 0, and call lzxd_set_output_length()
|
||||
* once it is known. If never set, 4 of the final 6 bytes of the output
|
||||
* stream may be incorrect.
|
||||
*/
|
||||
extern struct lzxd_stream *lzxd_init(struct mspack_system *system,
|
||||
struct mspack_file *input,
|
||||
struct mspack_file *output,
|
||||
int window_bits,
|
||||
int reset_interval,
|
||||
int input_buffer_size,
|
||||
off_t output_length);
|
||||
|
||||
/* see description of output_length in lzxd_init() */
|
||||
extern void lzxd_set_output_length(struct lzxd_stream *lzx,
|
||||
off_t output_length);
|
||||
|
||||
/* decompresses, or decompresses more of, an LZX stream.
|
||||
*
|
||||
* - out_bytes of data will be decompressed and the function will return
|
||||
* with an MSPACK_ERR_OK return code.
|
||||
*
|
||||
* - decompressing will stop as soon as out_bytes is reached. if the true
|
||||
* amount of bytes decoded spills over that amount, they will be kept for
|
||||
* a later invocation of lzxd_decompress().
|
||||
*
|
||||
* - the output bytes will be passed to the system->write() function given in
|
||||
* lzxd_init(), using the output file handle given in lzxd_init(). More
|
||||
* than one call may be made to system->write().
|
||||
*
|
||||
* - LZX will read input bytes as necessary using the system->read() function
|
||||
* given in lzxd_init(), using the input file handle given in lzxd_init().
|
||||
* This will continue until system->read() returns 0 bytes, or an error.
|
||||
* input streams should convey an "end of input stream" by refusing to
|
||||
* supply all the bytes that LZX asks for when they reach the end of the
|
||||
* stream, rather than return an error code.
|
||||
*
|
||||
* - if an error code other than MSPACK_ERR_OK is returned, the stream should
|
||||
* be considered unusable and lzxd_decompress() should not be called again
|
||||
* on this stream.
|
||||
*/
|
||||
extern int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes);
|
||||
|
||||
/* frees all state associated with an LZX data stream
|
||||
*
|
||||
* - calls system->free() using the system pointer given in lzxd_init()
|
||||
*/
|
||||
void lzxd_free(struct lzxd_stream *lzx);
|
||||
|
||||
#endif
|
905
src/calibre/utils/lzx/lzxd.c
Normal file
905
src/calibre/utils/lzx/lzxd.c
Normal file
@ -0,0 +1,905 @@
|
||||
/* This file is part of libmspack.
|
||||
* (C) 2003-2004 Stuart Caie.
|
||||
*
|
||||
* The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
|
||||
* by Microsoft Corporation.
|
||||
*
|
||||
* libmspack is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
|
||||
*
|
||||
* For further details, see the file COPYING.LIB distributed with libmspack
|
||||
*/
|
||||
|
||||
/* LZX decompression implementation */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include <mspack.h>
|
||||
#include <system.h>
|
||||
#include <lzx.h>
|
||||
|
||||
/* Microsoft's LZX document and their implementation of the
|
||||
* com.ms.util.cab Java package do not concur.
|
||||
*
|
||||
* In the LZX document, there is a table showing the correlation between
|
||||
* window size and the number of position slots. It states that the 1MB
|
||||
* window = 40 slots and the 2MB window = 42 slots. In the implementation,
|
||||
* 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
|
||||
* first slot whose position base is equal to or more than the required
|
||||
* window size'. This would explain why other tables in the document refer
|
||||
* to 50 slots rather than 42.
|
||||
*
|
||||
* The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
|
||||
* is not defined in the specification.
|
||||
*
|
||||
* The LZX document does not state the uncompressed block has an
|
||||
* uncompressed length field. Where does this length field come from, so
|
||||
* we can know how large the block is? The implementation has it as the 24
|
||||
* bits following after the 3 blocktype bits, before the alignment
|
||||
* padding.
|
||||
*
|
||||
* The LZX document states that aligned offset blocks have their aligned
|
||||
* offset huffman tree AFTER the main and length trees. The implementation
|
||||
* suggests that the aligned offset tree is BEFORE the main and length
|
||||
* trees.
|
||||
*
|
||||
* The LZX document decoding algorithm states that, in an aligned offset
|
||||
* block, if an extra_bits value is 1, 2 or 3, then that number of bits
|
||||
* should be read and the result added to the match offset. This is
|
||||
* correct for 1 and 2, but not 3, where just a huffman symbol (using the
|
||||
* aligned tree) should be read.
|
||||
*
|
||||
* Regarding the E8 preprocessing, the LZX document states 'No translation
|
||||
* may be performed on the last 6 bytes of the input block'. This is
|
||||
* correct. However, the pseudocode provided checks for the *E8 leader*
|
||||
* up to the last 6 bytes. If the leader appears between -10 and -7 bytes
|
||||
* from the end, this would cause the next four bytes to be modified, at
|
||||
* least one of which would be in the last 6 bytes, which is not allowed
|
||||
* according to the spec.
|
||||
*
|
||||
* The specification states that the huffman trees must always contain at
|
||||
* least one element. However, many CAB files contain blocks where the
|
||||
* length tree is completely empty (because there are no matches), and
|
||||
* this is expected to succeed.
|
||||
*/
|
||||
|
||||
|
||||
/* LZX decompressor input macros
|
||||
*
|
||||
* STORE_BITS stores bitstream state in lzxd_stream structure
|
||||
* RESTORE_BITS restores bitstream state from lzxd_stream structure
|
||||
* READ_BITS(var,n) takes N bits from the buffer and puts them in var
|
||||
* ENSURE_BITS(n) ensures there are at least N bits in the bit buffer.
|
||||
* PEEK_BITS(n) extracts without removing N bits from the bit buffer
|
||||
* REMOVE_BITS(n) removes N bits from the bit buffer
|
||||
*
|
||||
* These bit access routines work by using the area beyond the MSB and the
|
||||
* LSB as a free source of zeroes when shifting. This avoids having to
|
||||
* mask any bits. So we have to know the bit width of the bit buffer
|
||||
* variable.
|
||||
*
|
||||
* The bit buffer datatype should be at least 32 bits wide: it must be
|
||||
* possible to ENSURE_BITS(16), so it must be possible to add 16 new bits
|
||||
* to the bit buffer when the bit buffer already has 1 to 15 bits left.
|
||||
*/
|
||||
|
||||
#if HAVE_LIMITS_H
|
||||
# include <limits.h>
|
||||
#endif
|
||||
#ifndef CHAR_BIT
|
||||
# define CHAR_BIT (8)
|
||||
#endif
|
||||
#define BITBUF_WIDTH (sizeof(bit_buffer) * CHAR_BIT)
|
||||
|
||||
#define STORE_BITS do { \
|
||||
lzx->i_ptr = i_ptr; \
|
||||
lzx->i_end = i_end; \
|
||||
lzx->bit_buffer = bit_buffer; \
|
||||
lzx->bits_left = bits_left; \
|
||||
} while (0)
|
||||
|
||||
#define RESTORE_BITS do { \
|
||||
i_ptr = lzx->i_ptr; \
|
||||
i_end = lzx->i_end; \
|
||||
bit_buffer = lzx->bit_buffer; \
|
||||
bits_left = lzx->bits_left; \
|
||||
} while (0)
|
||||
|
||||
#define ENSURE_BITS(nbits) \
|
||||
while (bits_left < (nbits)) { \
|
||||
if (i_ptr >= i_end) { \
|
||||
if (lzxd_read_input(lzx)) return lzx->error; \
|
||||
i_ptr = lzx->i_ptr; \
|
||||
i_end = lzx->i_end; \
|
||||
} \
|
||||
bit_buffer |= ((i_ptr[1] << 8) | i_ptr[0]) \
|
||||
<< (BITBUF_WIDTH - 16 - bits_left); \
|
||||
bits_left += 16; \
|
||||
i_ptr += 2; \
|
||||
}
|
||||
|
||||
#define PEEK_BITS(nbits) (bit_buffer >> (BITBUF_WIDTH - (nbits)))
|
||||
|
||||
#define REMOVE_BITS(nbits) ((bit_buffer <<= (nbits)), (bits_left -= (nbits)))
|
||||
|
||||
#define READ_BITS(val, nbits) do { \
|
||||
ENSURE_BITS(nbits); \
|
||||
(val) = PEEK_BITS(nbits); \
|
||||
REMOVE_BITS(nbits); \
|
||||
} while (0)
|
||||
|
||||
static int lzxd_read_input(struct lzxd_stream *lzx) {
|
||||
int read = lzx->sys->read(lzx->input, &lzx->inbuf[0], (int)lzx->inbuf_size);
|
||||
if (read < 0) return lzx->error = MSPACK_ERR_READ;
|
||||
|
||||
/* huff decode's ENSURE_BYTES(16) might overrun the input stream, even
|
||||
* if those bits aren't used, so fake 2 more bytes */
|
||||
if (read == 0) {
|
||||
if (lzx->input_end) {
|
||||
D(("out of input bytes"))
|
||||
return lzx->error = MSPACK_ERR_READ;
|
||||
}
|
||||
else {
|
||||
read = 2;
|
||||
lzx->inbuf[0] = lzx->inbuf[1] = 0;
|
||||
lzx->input_end = 1;
|
||||
}
|
||||
}
|
||||
|
||||
lzx->i_ptr = &lzx->inbuf[0];
|
||||
lzx->i_end = &lzx->inbuf[read];
|
||||
|
||||
return MSPACK_ERR_OK;
|
||||
}
|
||||
|
||||
/* Huffman decoding macros */
|
||||
|
||||
/* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
|
||||
* bitstream using the stated table and puts it in var.
|
||||
*/
|
||||
#define READ_HUFFSYM(tbl, var) do { \
|
||||
/* huffman symbols can be up to 16 bits long */ \
|
||||
ENSURE_BITS(16); \
|
||||
/* immediate table lookup of [tablebits] bits of the code */ \
|
||||
sym = lzx->tbl##_table[PEEK_BITS(LZX_##tbl##_TABLEBITS)]; \
|
||||
/* is the symbol is longer than [tablebits] bits? (i=node index) */ \
|
||||
if (sym >= LZX_##tbl##_MAXSYMBOLS) { \
|
||||
/* decode remaining bits by tree traversal */ \
|
||||
i = 1 << (BITBUF_WIDTH - LZX_##tbl##_TABLEBITS); \
|
||||
do { \
|
||||
/* one less bit. error if we run out of bits before decode */ \
|
||||
i >>= 1; \
|
||||
if (i == 0) { \
|
||||
D(("out of bits in huffman decode")) \
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH; \
|
||||
} \
|
||||
/* double node index and add 0 (left branch) or 1 (right) */ \
|
||||
sym <<= 1; sym |= (bit_buffer & i) ? 1 : 0; \
|
||||
/* hop to next node index / decoded symbol */ \
|
||||
sym = lzx->tbl##_table[sym]; \
|
||||
/* while we are still in node indicies, not decoded symbols */ \
|
||||
} while (sym >= LZX_##tbl##_MAXSYMBOLS); \
|
||||
} \
|
||||
/* result */ \
|
||||
(var) = sym; \
|
||||
/* look up the code length of that symbol and discard those bits */ \
|
||||
i = lzx->tbl##_len[sym]; \
|
||||
REMOVE_BITS(i); \
|
||||
} while (0)
|
||||
|
||||
/* BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */
|
||||
#define BUILD_TABLE(tbl) \
|
||||
if (make_decode_table(LZX_##tbl##_MAXSYMBOLS, LZX_##tbl##_TABLEBITS, \
|
||||
&lzx->tbl##_len[0], &lzx->tbl##_table[0])) \
|
||||
{ \
|
||||
D(("failed to build %s table", #tbl)) \
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH; \
|
||||
}
|
||||
|
||||
/* make_decode_table(nsyms, nbits, length[], table[])
|
||||
*
|
||||
* This function was coded by David Tritscher. It builds a fast huffman
|
||||
* decoding table from a canonical huffman code lengths table.
|
||||
*
|
||||
* nsyms = total number of symbols in this huffman tree.
|
||||
* nbits = any symbols with a code length of nbits or less can be decoded
|
||||
* in one lookup of the table.
|
||||
* length = A table to get code lengths from [0 to syms-1]
|
||||
* table = The table to fill up with decoded symbols and pointers.
|
||||
*
|
||||
* Returns 0 for OK or 1 for error
|
||||
*/
|
||||
|
||||
static int make_decode_table(unsigned int nsyms, unsigned int nbits,
|
||||
unsigned char *length, unsigned short *table)
|
||||
{
|
||||
register unsigned short sym;
|
||||
register unsigned int leaf, fill;
|
||||
register unsigned char bit_num;
|
||||
unsigned int pos = 0; /* the current position in the decode table */
|
||||
unsigned int table_mask = 1 << nbits;
|
||||
unsigned int bit_mask = table_mask >> 1; /* don't do 0 length codes */
|
||||
unsigned int next_symbol = bit_mask; /* base of allocation for long codes */
|
||||
|
||||
/* fill entries for codes short enough for a direct mapping */
|
||||
for (bit_num = 1; bit_num <= nbits; bit_num++) {
|
||||
for (sym = 0; sym < nsyms; sym++) {
|
||||
if (length[sym] != bit_num) continue;
|
||||
leaf = pos;
|
||||
if((pos += bit_mask) > table_mask) return 1; /* table overrun */
|
||||
/* fill all possible lookups of this symbol with the symbol itself */
|
||||
for (fill = bit_mask; fill-- > 0;) table[leaf++] = sym;
|
||||
}
|
||||
bit_mask >>= 1;
|
||||
}
|
||||
|
||||
/* full table already? */
|
||||
if (pos == table_mask) return 0;
|
||||
|
||||
/* clear the remainder of the table */
|
||||
for (sym = pos; sym < table_mask; sym++) table[sym] = 0xFFFF;
|
||||
|
||||
/* allow codes to be up to nbits+16 long, instead of nbits */
|
||||
pos <<= 16;
|
||||
table_mask <<= 16;
|
||||
bit_mask = 1 << 15;
|
||||
|
||||
for (bit_num = nbits+1; bit_num <= 16; bit_num++) {
|
||||
for (sym = 0; sym < nsyms; sym++) {
|
||||
if (length[sym] != bit_num) continue;
|
||||
|
||||
leaf = pos >> 16;
|
||||
for (fill = 0; fill < bit_num - nbits; fill++) {
|
||||
/* if this path hasn't been taken yet, 'allocate' two entries */
|
||||
if (table[leaf] == 0xFFFF) {
|
||||
table[(next_symbol << 1)] = 0xFFFF;
|
||||
table[(next_symbol << 1) + 1] = 0xFFFF;
|
||||
table[leaf] = next_symbol++;
|
||||
}
|
||||
/* follow the path and select either left or right for next bit */
|
||||
leaf = table[leaf] << 1;
|
||||
if ((pos >> (15-fill)) & 1) leaf++;
|
||||
}
|
||||
table[leaf] = sym;
|
||||
|
||||
if ((pos += bit_mask) > table_mask) return 1; /* table overflow */
|
||||
}
|
||||
bit_mask >>= 1;
|
||||
}
|
||||
|
||||
/* full table? */
|
||||
if (pos == table_mask) return 0;
|
||||
|
||||
/* either erroneous table, or all elements are 0 - let's find out. */
|
||||
for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
|
||||
* first to last in the given table. The code lengths are stored in their
|
||||
* own special LZX way.
|
||||
*/
|
||||
#define READ_LENGTHS(tbl, first, last) do { \
|
||||
STORE_BITS; \
|
||||
if (lzxd_read_lens(lzx, &lzx->tbl##_len[0], (first), \
|
||||
(unsigned int)(last))) return lzx->error; \
|
||||
RESTORE_BITS; \
|
||||
} while (0)
|
||||
|
||||
static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens,
|
||||
unsigned int first, unsigned int last)
|
||||
{
|
||||
/* bit buffer and huffman symbol decode variables */
|
||||
register unsigned int bit_buffer;
|
||||
register int bits_left, i;
|
||||
register unsigned short sym;
|
||||
unsigned char *i_ptr, *i_end;
|
||||
|
||||
unsigned int x, y;
|
||||
int z;
|
||||
|
||||
RESTORE_BITS;
|
||||
|
||||
/* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */
|
||||
for (x = 0; x < 20; x++) {
|
||||
READ_BITS(y, 4);
|
||||
lzx->PRETREE_len[x] = y;
|
||||
}
|
||||
BUILD_TABLE(PRETREE);
|
||||
|
||||
for (x = first; x < last; ) {
|
||||
READ_HUFFSYM(PRETREE, z);
|
||||
if (z == 17) {
|
||||
/* code = 17, run of ([read 4 bits]+4) zeros */
|
||||
READ_BITS(y, 4); y += 4;
|
||||
while (y--) lens[x++] = 0;
|
||||
}
|
||||
else if (z == 18) {
|
||||
/* code = 18, run of ([read 5 bits]+20) zeros */
|
||||
READ_BITS(y, 5); y += 20;
|
||||
while (y--) lens[x++] = 0;
|
||||
}
|
||||
else if (z == 19) {
|
||||
/* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */
|
||||
READ_BITS(y, 1); y += 4;
|
||||
READ_HUFFSYM(PRETREE, z);
|
||||
z = lens[x] - z; if (z < 0) z += 17;
|
||||
while (y--) lens[x++] = z;
|
||||
}
|
||||
else {
|
||||
/* code = 0 to 16, delta current length entry */
|
||||
z = lens[x] - z; if (z < 0) z += 17;
|
||||
lens[x++] = z;
|
||||
}
|
||||
}
|
||||
|
||||
STORE_BITS;
|
||||
|
||||
return MSPACK_ERR_OK;
|
||||
}
|
||||
|
||||
/* LZX static data tables:
|
||||
*
|
||||
* LZX uses 'position slots' to represent match offsets. For every match,
|
||||
* a small 'position slot' number and a small offset from that slot are
|
||||
* encoded instead of one large offset.
|
||||
*
|
||||
* position_base[] is an index to the position slot bases
|
||||
*
|
||||
* extra_bits[] states how many bits of offset-from-base data is needed.
|
||||
*/
|
||||
static unsigned int position_base[51];
|
||||
static unsigned char extra_bits[51];
|
||||
|
||||
static void lzxd_static_init(void) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0, j = 0; i < 51; i += 2) {
|
||||
extra_bits[i] = j; /* 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7... */
|
||||
extra_bits[i+1] = j;
|
||||
if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */
|
||||
}
|
||||
|
||||
for (i = 0, j = 0; i < 51; i++) {
|
||||
position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */
|
||||
j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */
|
||||
}
|
||||
}
|
||||
|
||||
static void lzxd_reset_state(struct lzxd_stream *lzx) {
|
||||
int i;
|
||||
|
||||
lzx->R0 = 1;
|
||||
lzx->R1 = 1;
|
||||
lzx->R2 = 1;
|
||||
lzx->header_read = 0;
|
||||
lzx->block_remaining = 0;
|
||||
lzx->block_type = LZX_BLOCKTYPE_INVALID;
|
||||
|
||||
/* initialise tables to 0 (because deltas will be applied to them) */
|
||||
for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0;
|
||||
for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) lzx->LENGTH_len[i] = 0;
|
||||
}
|
||||
|
||||
/*-------- main LZX code --------*/
|
||||
|
||||
struct lzxd_stream *lzxd_init(struct mspack_system *system,
|
||||
struct mspack_file *input,
|
||||
struct mspack_file *output,
|
||||
int window_bits,
|
||||
int reset_interval,
|
||||
int input_buffer_size,
|
||||
off_t output_length)
|
||||
{
|
||||
unsigned int window_size = 1 << window_bits;
|
||||
struct lzxd_stream *lzx;
|
||||
|
||||
if (!system) return NULL;
|
||||
|
||||
/* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
|
||||
if (window_bits < 15 || window_bits > 21) return NULL;
|
||||
|
||||
input_buffer_size = (input_buffer_size + 1) & -2;
|
||||
if (!input_buffer_size) return NULL;
|
||||
|
||||
/* initialise static data */
|
||||
lzxd_static_init();
|
||||
|
||||
/* allocate decompression state */
|
||||
if (!(lzx = system->alloc(system, sizeof(struct lzxd_stream)))) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* allocate decompression window and input buffer */
|
||||
lzx->window = system->alloc(system, (size_t) window_size);
|
||||
lzx->inbuf = system->alloc(system, (size_t) input_buffer_size);
|
||||
if (!lzx->window || !lzx->inbuf) {
|
||||
system->free(lzx->window);
|
||||
system->free(lzx->inbuf);
|
||||
system->free(lzx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* initialise decompression state */
|
||||
lzx->sys = system;
|
||||
lzx->input = input;
|
||||
lzx->output = output;
|
||||
lzx->offset = 0;
|
||||
lzx->length = output_length;
|
||||
|
||||
lzx->inbuf_size = input_buffer_size;
|
||||
lzx->window_size = 1 << window_bits;
|
||||
lzx->window_posn = 0;
|
||||
lzx->frame_posn = 0;
|
||||
lzx->frame = 0;
|
||||
lzx->reset_interval = reset_interval;
|
||||
lzx->intel_filesize = 0;
|
||||
lzx->intel_curpos = 0;
|
||||
|
||||
/* window bits: 15 16 17 18 19 20 21
|
||||
* position slots: 30 32 34 36 38 42 50 */
|
||||
lzx->posn_slots = ((window_bits == 21) ? 50 :
|
||||
((window_bits == 20) ? 42 : (window_bits << 1)));
|
||||
lzx->intel_started = 0;
|
||||
lzx->input_end = 0;
|
||||
|
||||
lzx->error = MSPACK_ERR_OK;
|
||||
|
||||
lzx->i_ptr = lzx->i_end = &lzx->inbuf[0];
|
||||
lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0];
|
||||
lzx->bit_buffer = lzx->bits_left = 0;
|
||||
|
||||
lzxd_reset_state(lzx);
|
||||
return lzx;
|
||||
}
|
||||
|
||||
void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) {
|
||||
if (lzx) lzx->length = out_bytes;
|
||||
}
|
||||
|
||||
int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
|
||||
/* bitstream reading and huffman variables */
|
||||
register unsigned int bit_buffer;
|
||||
register int bits_left, i=0;
|
||||
register unsigned short sym;
|
||||
unsigned char *i_ptr, *i_end;
|
||||
|
||||
int match_length, length_footer, extra, verbatim_bits, bytes_todo;
|
||||
int this_run, main_element, aligned_bits, j;
|
||||
unsigned char *window, *runsrc, *rundest, buf[12];
|
||||
unsigned int frame_size=0, end_frame, match_offset, window_posn;
|
||||
unsigned int R0, R1, R2;
|
||||
|
||||
/* easy answers */
|
||||
if (!lzx || (out_bytes < 0)) return MSPACK_ERR_ARGS;
|
||||
if (lzx->error) return lzx->error;
|
||||
|
||||
/* flush out any stored-up bytes before we begin */
|
||||
i = lzx->o_end - lzx->o_ptr;
|
||||
if ((off_t) i > out_bytes) i = (int) out_bytes;
|
||||
if (i) {
|
||||
if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) {
|
||||
return lzx->error = MSPACK_ERR_WRITE;
|
||||
}
|
||||
lzx->o_ptr += i;
|
||||
lzx->offset += i;
|
||||
out_bytes -= i;
|
||||
}
|
||||
if (out_bytes == 0) return MSPACK_ERR_OK;
|
||||
|
||||
/* restore local state */
|
||||
RESTORE_BITS;
|
||||
window = lzx->window;
|
||||
window_posn = lzx->window_posn;
|
||||
R0 = lzx->R0;
|
||||
R1 = lzx->R1;
|
||||
R2 = lzx->R2;
|
||||
|
||||
end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1;
|
||||
|
||||
while (lzx->frame < end_frame) {
|
||||
/* have we reached the reset interval? (if there is one?) */
|
||||
if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) {
|
||||
if (lzx->block_remaining) {
|
||||
D(("%d bytes remaining at reset interval", lzx->block_remaining))
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH;
|
||||
}
|
||||
|
||||
/* re-read the intel header and reset the huffman lengths */
|
||||
lzxd_reset_state(lzx);
|
||||
}
|
||||
|
||||
/* read header if necessary */
|
||||
if (!lzx->header_read) {
|
||||
/* read 1 bit. if bit=0, intel filesize = 0.
|
||||
* if bit=1, read intel filesize (32 bits) */
|
||||
j = 0; READ_BITS(i, 1); if (i) { READ_BITS(i, 16); READ_BITS(j, 16); }
|
||||
lzx->intel_filesize = (i << 16) | j;
|
||||
lzx->header_read = 1;
|
||||
}
|
||||
|
||||
/* calculate size of frame: all frames are 32k except the final frame
|
||||
* which is 32kb or less. this can only be calculated when lzx->length
|
||||
* has been filled in. */
|
||||
frame_size = LZX_FRAME_SIZE;
|
||||
if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) {
|
||||
frame_size = lzx->length - lzx->offset;
|
||||
}
|
||||
|
||||
/* decode until one more frame is available */
|
||||
bytes_todo = lzx->frame_posn + frame_size - window_posn;
|
||||
while (bytes_todo > 0) {
|
||||
/* initialise new block, if one is needed */
|
||||
if (lzx->block_remaining == 0) {
|
||||
/* realign if previous block was an odd-sized UNCOMPRESSED block */
|
||||
if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) &&
|
||||
(lzx->block_length & 1))
|
||||
{
|
||||
if (i_ptr == i_end) {
|
||||
if (lzxd_read_input(lzx)) return lzx->error;
|
||||
i_ptr = lzx->i_ptr;
|
||||
i_end = lzx->i_end;
|
||||
}
|
||||
i_ptr++;
|
||||
}
|
||||
|
||||
/* read block type (3 bits) and block length (24 bits) */
|
||||
READ_BITS(lzx->block_type, 3);
|
||||
READ_BITS(i, 16); READ_BITS(j, 8);
|
||||
lzx->block_remaining = lzx->block_length = (i << 8) | j;
|
||||
/*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/
|
||||
|
||||
/* read individual block headers */
|
||||
switch (lzx->block_type) {
|
||||
case LZX_BLOCKTYPE_ALIGNED:
|
||||
/* read lengths of and build aligned huffman decoding tree */
|
||||
for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; }
|
||||
BUILD_TABLE(ALIGNED);
|
||||
/* no break -- rest of aligned header is same as verbatim */
|
||||
case LZX_BLOCKTYPE_VERBATIM:
|
||||
/* read lengths of and build main huffman decoding tree */
|
||||
READ_LENGTHS(MAINTREE, 0, 256);
|
||||
READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + (lzx->posn_slots << 3));
|
||||
BUILD_TABLE(MAINTREE);
|
||||
/* if the literal 0xE8 is anywhere in the block... */
|
||||
if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1;
|
||||
/* read lengths of and build lengths huffman decoding tree */
|
||||
READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
|
||||
BUILD_TABLE(LENGTH);
|
||||
break;
|
||||
|
||||
case LZX_BLOCKTYPE_UNCOMPRESSED:
|
||||
/* because we can't assume otherwise */
|
||||
lzx->intel_started = 1;
|
||||
|
||||
/* read 1-16 (not 0-15) bits to align to bytes */
|
||||
ENSURE_BITS(16);
|
||||
if (bits_left > 16) i_ptr -= 2;
|
||||
bits_left = 0; bit_buffer = 0;
|
||||
|
||||
/* read 12 bytes of stored R0 / R1 / R2 values */
|
||||
for (rundest = &buf[0], i = 0; i < 12; i++) {
|
||||
if (i_ptr == i_end) {
|
||||
if (lzxd_read_input(lzx)) return lzx->error;
|
||||
i_ptr = lzx->i_ptr;
|
||||
i_end = lzx->i_end;
|
||||
}
|
||||
*rundest++ = *i_ptr++;
|
||||
}
|
||||
R0 = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
|
||||
R1 = buf[4] | (buf[5] << 8) | (buf[6] << 16) | (buf[7] << 24);
|
||||
R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24);
|
||||
break;
|
||||
|
||||
default:
|
||||
D(("bad block type"))
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH;
|
||||
}
|
||||
}
|
||||
|
||||
/* decode more of the block:
|
||||
* run = min(what's available, what's needed) */
|
||||
this_run = lzx->block_remaining;
|
||||
if (this_run > bytes_todo) this_run = bytes_todo;
|
||||
|
||||
/* assume we decode exactly this_run bytes, for now */
|
||||
bytes_todo -= this_run;
|
||||
lzx->block_remaining -= this_run;
|
||||
|
||||
/* decode at least this_run bytes */
|
||||
switch (lzx->block_type) {
|
||||
case LZX_BLOCKTYPE_VERBATIM:
|
||||
while (this_run > 0) {
|
||||
READ_HUFFSYM(MAINTREE, main_element);
|
||||
if (main_element < LZX_NUM_CHARS) {
|
||||
/* literal: 0 to LZX_NUM_CHARS-1 */
|
||||
window[window_posn++] = main_element;
|
||||
this_run--;
|
||||
}
|
||||
else {
|
||||
/* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
|
||||
main_element -= LZX_NUM_CHARS;
|
||||
|
||||
/* get match length */
|
||||
match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
|
||||
if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
|
||||
READ_HUFFSYM(LENGTH, length_footer);
|
||||
match_length += length_footer;
|
||||
}
|
||||
match_length += LZX_MIN_MATCH;
|
||||
|
||||
/* get match offset */
|
||||
switch ((match_offset = (main_element >> 3))) {
|
||||
case 0: match_offset = R0; break;
|
||||
case 1: match_offset = R1; R1=R0; R0 = match_offset; break;
|
||||
case 2: match_offset = R2; R2=R0; R0 = match_offset; break;
|
||||
case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break;
|
||||
default:
|
||||
extra = extra_bits[match_offset];
|
||||
READ_BITS(verbatim_bits, extra);
|
||||
match_offset = position_base[match_offset] - 2 + verbatim_bits;
|
||||
R2 = R1; R1 = R0; R0 = match_offset;
|
||||
}
|
||||
|
||||
if ((window_posn + match_length) > lzx->window_size) {
|
||||
D(("match ran over window wrap"))
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH;
|
||||
}
|
||||
|
||||
/* copy match */
|
||||
rundest = &window[window_posn];
|
||||
i = match_length;
|
||||
/* does match offset wrap the window? */
|
||||
if (match_offset > window_posn) {
|
||||
/* j = length from match offset to end of window */
|
||||
j = match_offset - window_posn;
|
||||
if (j > (int) lzx->window_size) {
|
||||
D(("match offset beyond window boundaries"))
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH;
|
||||
}
|
||||
runsrc = &window[lzx->window_size - j];
|
||||
if (j < i) {
|
||||
/* if match goes over the window edge, do two copy runs */
|
||||
i -= j; while (j-- > 0) *rundest++ = *runsrc++;
|
||||
runsrc = window;
|
||||
}
|
||||
while (i-- > 0) *rundest++ = *runsrc++;
|
||||
}
|
||||
else {
|
||||
runsrc = rundest - match_offset;
|
||||
while (i-- > 0) *rundest++ = *runsrc++;
|
||||
}
|
||||
|
||||
this_run -= match_length;
|
||||
window_posn += match_length;
|
||||
}
|
||||
} /* while (this_run > 0) */
|
||||
break;
|
||||
|
||||
case LZX_BLOCKTYPE_ALIGNED:
|
||||
while (this_run > 0) {
|
||||
READ_HUFFSYM(MAINTREE, main_element);
|
||||
if (main_element < LZX_NUM_CHARS) {
|
||||
/* literal: 0 to LZX_NUM_CHARS-1 */
|
||||
window[window_posn++] = main_element;
|
||||
this_run--;
|
||||
}
|
||||
else {
|
||||
/* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
|
||||
main_element -= LZX_NUM_CHARS;
|
||||
|
||||
/* get match length */
|
||||
match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
|
||||
if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
|
||||
READ_HUFFSYM(LENGTH, length_footer);
|
||||
match_length += length_footer;
|
||||
}
|
||||
match_length += LZX_MIN_MATCH;
|
||||
|
||||
/* get match offset */
|
||||
switch ((match_offset = (main_element >> 3))) {
|
||||
case 0: match_offset = R0; break;
|
||||
case 1: match_offset = R1; R1 = R0; R0 = match_offset; break;
|
||||
case 2: match_offset = R2; R2 = R0; R0 = match_offset; break;
|
||||
default:
|
||||
extra = extra_bits[match_offset];
|
||||
match_offset = position_base[match_offset] - 2;
|
||||
if (extra > 3) {
|
||||
/* verbatim and aligned bits */
|
||||
extra -= 3;
|
||||
READ_BITS(verbatim_bits, extra);
|
||||
match_offset += (verbatim_bits << 3);
|
||||
READ_HUFFSYM(ALIGNED, aligned_bits);
|
||||
match_offset += aligned_bits;
|
||||
}
|
||||
else if (extra == 3) {
|
||||
/* aligned bits only */
|
||||
READ_HUFFSYM(ALIGNED, aligned_bits);
|
||||
match_offset += aligned_bits;
|
||||
}
|
||||
else if (extra > 0) { /* extra==1, extra==2 */
|
||||
/* verbatim bits only */
|
||||
READ_BITS(verbatim_bits, extra);
|
||||
match_offset += verbatim_bits;
|
||||
}
|
||||
else /* extra == 0 */ {
|
||||
/* ??? not defined in LZX specification! */
|
||||
match_offset = 1;
|
||||
}
|
||||
/* update repeated offset LRU queue */
|
||||
R2 = R1; R1 = R0; R0 = match_offset;
|
||||
}
|
||||
|
||||
if ((window_posn + match_length) > lzx->window_size) {
|
||||
D(("match ran over window wrap"))
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH;
|
||||
}
|
||||
|
||||
/* copy match */
|
||||
rundest = &window[window_posn];
|
||||
i = match_length;
|
||||
/* does match offset wrap the window? */
|
||||
if (match_offset > window_posn) {
|
||||
/* j = length from match offset to end of window */
|
||||
j = match_offset - window_posn;
|
||||
if (j > (int) lzx->window_size) {
|
||||
D(("match offset beyond window boundaries"))
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH;
|
||||
}
|
||||
runsrc = &window[lzx->window_size - j];
|
||||
if (j < i) {
|
||||
/* if match goes over the window edge, do two copy runs */
|
||||
i -= j; while (j-- > 0) *rundest++ = *runsrc++;
|
||||
runsrc = window;
|
||||
}
|
||||
while (i-- > 0) *rundest++ = *runsrc++;
|
||||
}
|
||||
else {
|
||||
runsrc = rundest - match_offset;
|
||||
while (i-- > 0) *rundest++ = *runsrc++;
|
||||
}
|
||||
|
||||
this_run -= match_length;
|
||||
window_posn += match_length;
|
||||
}
|
||||
} /* while (this_run > 0) */
|
||||
break;
|
||||
|
||||
case LZX_BLOCKTYPE_UNCOMPRESSED:
|
||||
/* as this_run is limited not to wrap a frame, this also means it
|
||||
* won't wrap the window (as the window is a multiple of 32k) */
|
||||
rundest = &window[window_posn];
|
||||
window_posn += this_run;
|
||||
while (this_run > 0) {
|
||||
if ((i = i_end - i_ptr)) {
|
||||
if (i > this_run) i = this_run;
|
||||
lzx->sys->copy(i_ptr, rundest, (size_t) i);
|
||||
rundest += i;
|
||||
i_ptr += i;
|
||||
this_run -= i;
|
||||
}
|
||||
else {
|
||||
if (lzxd_read_input(lzx)) return lzx->error;
|
||||
i_ptr = lzx->i_ptr;
|
||||
i_end = lzx->i_end;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
D(("Default Here."));
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH; /* might as well */
|
||||
}
|
||||
|
||||
/* did the final match overrun our desired this_run length? */
|
||||
if (this_run < 0) {
|
||||
if ((unsigned int)(-this_run) > lzx->block_remaining) {
|
||||
D(("overrun went past end of block by %d (%d remaining)",
|
||||
-this_run, lzx->block_remaining ))
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH;
|
||||
}
|
||||
lzx->block_remaining -= -this_run;
|
||||
}
|
||||
} /* while (bytes_todo > 0) */
|
||||
|
||||
/* streams don't extend over frame boundaries */
|
||||
if ((window_posn - lzx->frame_posn) != frame_size) {
|
||||
D(("decode beyond output frame limits! %d != %d",
|
||||
window_posn - lzx->frame_posn, frame_size))
|
||||
/* Ignored */
|
||||
#if 0
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* re-align input bitstream */
|
||||
if (bits_left > 0) ENSURE_BITS(16);
|
||||
if (bits_left & 15) REMOVE_BITS(bits_left & 15);
|
||||
|
||||
/* check that we've used all of the previous frame first */
|
||||
if (lzx->o_ptr != lzx->o_end) {
|
||||
D(("%d avail bytes, new %d frame", lzx->o_end-lzx->o_ptr, frame_size))
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH;
|
||||
}
|
||||
|
||||
/* does this intel block _really_ need decoding? */
|
||||
if (lzx->intel_started && lzx->intel_filesize &&
|
||||
(lzx->frame <= 32768) && (frame_size > 10))
|
||||
{
|
||||
unsigned char *data = &lzx->e8_buf[0];
|
||||
unsigned char *dataend = &lzx->e8_buf[frame_size - 10];
|
||||
signed int curpos = lzx->intel_curpos;
|
||||
signed int filesize = lzx->intel_filesize;
|
||||
signed int abs_off, rel_off;
|
||||
|
||||
/* copy e8 block to the e8 buffer and tweak if needed */
|
||||
lzx->o_ptr = data;
|
||||
lzx->sys->copy(&lzx->window[lzx->frame_posn], data, frame_size);
|
||||
|
||||
while (data < dataend) {
|
||||
if (*data++ != 0xE8) { curpos++; continue; }
|
||||
abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
|
||||
if ((abs_off >= -curpos) && (abs_off < filesize)) {
|
||||
rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
|
||||
data[0] = (unsigned char) rel_off;
|
||||
data[1] = (unsigned char) (rel_off >> 8);
|
||||
data[2] = (unsigned char) (rel_off >> 16);
|
||||
data[3] = (unsigned char) (rel_off >> 24);
|
||||
}
|
||||
data += 4;
|
||||
curpos += 5;
|
||||
}
|
||||
lzx->intel_curpos += frame_size;
|
||||
}
|
||||
else {
|
||||
lzx->o_ptr = &lzx->window[lzx->frame_posn];
|
||||
if (lzx->intel_filesize) lzx->intel_curpos += frame_size;
|
||||
}
|
||||
lzx->o_end = &lzx->o_ptr[frame_size];
|
||||
|
||||
/* write a frame */
|
||||
i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size;
|
||||
if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) {
|
||||
return lzx->error = MSPACK_ERR_WRITE;
|
||||
}
|
||||
lzx->o_ptr += i;
|
||||
lzx->offset += i;
|
||||
out_bytes -= i;
|
||||
|
||||
/* advance frame start position */
|
||||
lzx->frame_posn += frame_size;
|
||||
lzx->frame++;
|
||||
|
||||
/* wrap window / frame position pointers */
|
||||
if (window_posn == lzx->window_size) window_posn = 0;
|
||||
if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0;
|
||||
|
||||
} /* while (lzx->frame < end_frame) */
|
||||
|
||||
if (out_bytes) {
|
||||
D(("bytes left to output"))
|
||||
return lzx->error = MSPACK_ERR_DECRUNCH;
|
||||
}
|
||||
|
||||
/* store local state */
|
||||
STORE_BITS;
|
||||
lzx->window_posn = window_posn;
|
||||
lzx->R0 = R0;
|
||||
lzx->R1 = R1;
|
||||
lzx->R2 = R2;
|
||||
|
||||
return MSPACK_ERR_OK;
|
||||
}
|
||||
|
||||
void lzxd_free(struct lzxd_stream *lzx) {
|
||||
struct mspack_system *sys;
|
||||
if (lzx) {
|
||||
sys = lzx->sys;
|
||||
sys->free(lzx->inbuf);
|
||||
sys->free(lzx->window);
|
||||
sys->free(lzx);
|
||||
}
|
||||
}
|
218
src/calibre/utils/lzx/lzxmodule.c
Normal file
218
src/calibre/utils/lzx/lzxmodule.c
Normal file
@ -0,0 +1,218 @@
|
||||
/* __license__ = 'GPL v3'
|
||||
* __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
*
|
||||
* Python module C glue code.
|
||||
*/
|
||||
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
#include <mspack.h>
|
||||
#include <lzx.h>
|
||||
|
||||
static char lzx_doc[] =
|
||||
"Provide basic LZX decompression using the code from libmspack.";
|
||||
|
||||
static PyObject *LzxError = NULL;
|
||||
|
||||
typedef struct memory_file {
|
||||
unsigned int magic; /* 0xB5 */
|
||||
void * buffer;
|
||||
int total_bytes;
|
||||
int current_bytes;
|
||||
} memory_file;
|
||||
|
||||
void *
|
||||
glue_alloc(struct mspack_system *this, size_t bytes)
|
||||
{
|
||||
void *p = NULL;
|
||||
p = (void *)malloc(bytes);
|
||||
if (p == NULL) {
|
||||
return (void *)PyErr_NoMemory();
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
void
|
||||
glue_free(void *p)
|
||||
{
|
||||
free(p);
|
||||
}
|
||||
|
||||
void
|
||||
glue_copy(void *src, void *dest, size_t bytes)
|
||||
{
|
||||
memcpy(dest, src, bytes);
|
||||
}
|
||||
|
||||
struct mspack_file *
|
||||
glue_open(struct mspack_system *this, char *filename, int mode)
|
||||
{
|
||||
PyErr_SetString(LzxError, "MSPACK_OPEN unsupported");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
glue_close(struct mspack_file *file)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int
|
||||
glue_read(struct mspack_file *file, void * buffer, int bytes)
|
||||
{
|
||||
memory_file *mem;
|
||||
int remaining;
|
||||
|
||||
mem = (memory_file *)file;
|
||||
if (mem->magic != 0xB5) return -1;
|
||||
|
||||
remaining = mem->total_bytes - mem->current_bytes;
|
||||
if (!remaining) return 0;
|
||||
if (bytes > remaining) bytes = remaining;
|
||||
memcpy(buffer, (unsigned char *)mem->buffer + mem->current_bytes, bytes);
|
||||
mem->current_bytes += bytes;
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
int
|
||||
glue_write(struct mspack_file * file, void * buffer, int bytes)
|
||||
{
|
||||
memory_file *mem;
|
||||
int remaining;
|
||||
|
||||
mem = (memory_file *)file;
|
||||
if (mem->magic != 0xB5) return -1;
|
||||
|
||||
remaining = mem->total_bytes - mem->current_bytes;
|
||||
if (!remaining) return 0;
|
||||
if (bytes > remaining) {
|
||||
PyErr_SetString(LzxError,
|
||||
"MSPACK_WRITE tried to write beyond end of buffer");
|
||||
bytes = remaining;
|
||||
}
|
||||
memcpy((unsigned char *)mem->buffer + mem->current_bytes, buffer, bytes);
|
||||
mem->current_bytes += bytes;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
struct mspack_system lzxglue_system = {
|
||||
glue_open,
|
||||
glue_close,
|
||||
glue_read, /* Read */
|
||||
glue_write, /* Write */
|
||||
NULL, /* Seek */
|
||||
NULL, /* Tell */
|
||||
NULL, /* Message */
|
||||
glue_alloc,
|
||||
glue_free,
|
||||
glue_copy,
|
||||
NULL /* Termination */
|
||||
};
|
||||
|
||||
|
||||
int LZXwindow = 0;
|
||||
struct lzxd_stream * lzx_stream = NULL;
|
||||
|
||||
/* Can't really init here, don't know enough */
|
||||
static PyObject *
|
||||
init(PyObject *self, PyObject *args)
|
||||
{
|
||||
int window = 0;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "i", &window)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
LZXwindow = window;
|
||||
lzx_stream = NULL;
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
/* Doesn't exist. Oh well, reinitialize state every time anyway */
|
||||
static PyObject *
|
||||
reset(PyObject *self, PyObject *args)
|
||||
{
|
||||
if (!PyArg_ParseTuple(args, "")) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
//int LZXdecompress(unsigned char *inbuf, unsigned char *outbuf,
|
||||
// unsigned int inlen, unsigned int outlen)
|
||||
static PyObject *
|
||||
decompress(PyObject *self, PyObject *args)
|
||||
{
|
||||
unsigned char *inbuf;
|
||||
unsigned char *outbuf;
|
||||
unsigned int inlen;
|
||||
unsigned int outlen;
|
||||
int err;
|
||||
memory_file source;
|
||||
memory_file dest;
|
||||
PyObject *retval = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "s#I", &inbuf, &inlen, &outlen)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
retval = PyString_FromStringAndSize(NULL, outlen);
|
||||
if (retval == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
outbuf = (unsigned char *)PyString_AS_STRING(retval);
|
||||
|
||||
source.magic = 0xB5;
|
||||
source.buffer = inbuf;
|
||||
source.current_bytes = 0;
|
||||
source.total_bytes = inlen;
|
||||
|
||||
dest.magic = 0xB5;
|
||||
dest.buffer = outbuf;
|
||||
dest.current_bytes = 0;
|
||||
dest.total_bytes = outlen;
|
||||
|
||||
lzx_stream = lzxd_init(&lzxglue_system, (struct mspack_file *)&source,
|
||||
(struct mspack_file *)&dest, LZXwindow,
|
||||
0x7fff /* Never reset, I do it */, 4096, outlen);
|
||||
err = -1;
|
||||
if (lzx_stream) err = lzxd_decompress(lzx_stream, outlen);
|
||||
|
||||
lzxd_free(lzx_stream);
|
||||
lzx_stream = NULL;
|
||||
|
||||
if (err != MSPACK_ERR_OK) {
|
||||
Py_DECREF(retval);
|
||||
PyErr_SetString(LzxError, "LZX decompression failed");
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static PyMethodDef lzx_methods[] = {
|
||||
{ "init", &init, METH_VARARGS, "Initialize the LZX decompressor" },
|
||||
{ "reset", &reset, METH_VARARGS, "Reset the LZX decompressor" },
|
||||
{ "decompress", &decompress, METH_VARARGS, "Run the LZX decompressor" },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC
|
||||
initlzx(void)
|
||||
{
|
||||
PyObject *m;
|
||||
|
||||
m = Py_InitModule3("lzx", lzx_methods, lzx_doc);
|
||||
if (m == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL);
|
||||
Py_INCREF(LzxError);
|
||||
PyModule_AddObject(m, "LzxError", LzxError);
|
||||
|
||||
return;
|
||||
}
|
1482
src/calibre/utils/lzx/mspack.h
Normal file
1482
src/calibre/utils/lzx/mspack.h
Normal file
File diff suppressed because it is too large
Load Diff
66
src/calibre/utils/lzx/system.h
Normal file
66
src/calibre/utils/lzx/system.h
Normal file
@ -0,0 +1,66 @@
|
||||
/* This file is part of libmspack.
|
||||
* (C) 2003-2004 Stuart Caie.
|
||||
*
|
||||
* libmspack is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
|
||||
*
|
||||
* For further details, see the file COPYING.LIB distributed with libmspack
|
||||
*/
|
||||
|
||||
#ifndef MSPACK_SYSTEM_H
|
||||
#define MSPACK_SYSTEM_H 1
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define inline
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
# include <stdio.h>
|
||||
# define D(x) do { printf("%s:%d (%s) ",__FILE__, __LINE__, __FUNCTION__); \
|
||||
printf x ; fputc('\n', stdout); fflush(stdout);} while (0);
|
||||
#else
|
||||
# define D(x)
|
||||
#endif
|
||||
|
||||
/* endian-neutral reading of little-endian data */
|
||||
#define __egi32(a,n) ( (((a)[n+3]) << 24) | (((a)[n+2]) << 16) | \
|
||||
(((a)[n+1]) << 8) | ((a)[n+0]) )
|
||||
#define EndGetI64(a) ((((unsigned long long int) __egi32(a,4)) << 32) | \
|
||||
((unsigned int) __egi32(a,0)))
|
||||
#define EndGetI32(a) __egi32(a,0)
|
||||
#define EndGetI16(a) ((((a)[1])<<8)|((a)[0]))
|
||||
|
||||
/* endian-neutral reading of big-endian data */
|
||||
#define EndGetM32(a) ((((a)[0])<<24)|(((a)[1])<<16)|(((a)[2])<<8)|((a)[3]))
|
||||
#define EndGetM16(a) ((((a)[0])<<8)|((a)[1]))
|
||||
|
||||
extern struct mspack_system *mspack_default_system;
|
||||
|
||||
/* returns the length of a file opened for reading */
|
||||
extern int mspack_sys_filelen(struct mspack_system *system,
|
||||
struct mspack_file *file, off_t *length);
|
||||
|
||||
/* validates a system structure */
|
||||
extern int mspack_valid_system(struct mspack_system *sys);
|
||||
|
||||
/* Can't redfine intrinsics in Microsoft Visual C */
|
||||
#ifndef _MSC_VER
|
||||
|
||||
/* inline memcmp() */
|
||||
static inline int memcmp(const void *s1, const void *s2, size_t n) {
|
||||
unsigned char *c1 = (unsigned char *) s1;
|
||||
unsigned char *c2 = (unsigned char *) s2;
|
||||
if (n == 0) return 0;
|
||||
while (--n && (*c1 == *c2)) c1++, c2++;
|
||||
return *c1 - *c2;
|
||||
}
|
||||
|
||||
/* inline strlen() */
|
||||
static inline size_t strlen(const char *s) {
|
||||
const char *e = s;
|
||||
while (*e) e++;
|
||||
return e - s;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user