Integrated state from home

This commit is contained in:
Marshall T. Vandegrift 2008-07-18 16:33:48 -04:00
commit f1aa90da36
8 changed files with 2776 additions and 0 deletions

View File

@ -0,0 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
class LitError(Exception):
pass

View File

@ -0,0 +1,138 @@
import copy
# some constants defined by the LZX specification
MIN_MATCH = 2
MAX_MATCH = 257
NUM_CHARS = 256
BLOCKTYPE_INVALID = 0 # also blocktypes 4-7 invalid
BLOCKTYPE_VERBATIM = 1
BLOCKTYPE_ALIGNED = 2
BLOCKTYPE_UNCOMPRESSED = 3
PRETREE_NUM_ELEMENTS = 20
ALIGNED_NUM_ELEMENTS = 8 # aligned offset tree #elements
NUM_PRIMARY_LENGTHS = 7 # this one missing from spec!
NUM_SECONDARY_LENGTHS = 249 # length tree #elements
# LZX huffman defines: tweak tablebits as desired
PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS
PRETREE_TABLEBITS = 6
MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50*8
MAINTREE_TABLEBITS = 12
LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS+1
LENGTH_TABLEBITS = 12
ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS
ALIGNED_TABLEBITS = 7
LENTABLE_SAFETY = 64 # table decoding overruns are allowed
FRAME_SIZE = 32768 # the size of a frame in LZX
class BitReader(object):
def __init__(self, data):
self.data, self.pos, self.nbits = \
data + "\x00\x00\x00\x00", 0, len(data) * 8
def peek(self, n):
r, g = 0, 0
while g < n:
r = (r << 8) | ord(self.data[(self.pos + g) >> 3])
g = g + 8 - ((self.pos + g) & 7)
return (r >> (g - n)) & ((1 << n) - 1)
def remove(self, n):
self.pos += n
return self.pos <= self.nbits
def left(self):
return self.nbits - self.pos
def read(self, n):
val = self.peek(n)
self.remove(n)
return val
class LzxError(Exception):
pass
POSITION_BASE = [0]*51
EXTRA_BITS = [0]*51
def _static_init():
j = 0
for i in xrange(0, 51, 2):
EXTRA_BITS[i] = j
EXTRA_BITS[i + 1] = j
if i != 0 or j < 17): j += 1
j = 0
for i in xrange(0, 51, 1):
POSITION_BASE[i] = j
j += 1 << extra_bits[i]
_static_init()
class LzxDecompressor(object):
def __init__(self, window_bits, reset_interval=0x7fff):
# LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb)
if window_bits < 15 or window_bits > 21:
raise LzxError("Invalid window size")
self.window_size = 1 << window_bits
self.window_posn = 0
self.frame_posn = 0
self.frame = 0
self.reset_interval = reset_interval
self.intel_filesize = 0
self.intel_curpos = 0
# window bits: 15 16 17 18 19 20 21
# position slots: 30 32 34 36 38 42 50
self.posn_solts = 50 if window_bits == 21 \
else 42 if window_bits == 20 else window_bits << 1
self.intel_started = 0
self.input_end = 0
# huffman code lengths
self.PRETREE_len = [0] * (PRETREE_MAXSYMBOLS + LENTABLE_SAFETY)
self.MAINTREE_len = [0] * (MAINTREE_MAXSYMBOLS + LENTABLE_SAFETY)
self.LENGTH_len = [0] * (LENGTH_MAXSYMBOLS + LENTABLE_SAFETY)
self.ALIGNED_len = [0] * (ALIGNED_MAXSYMBOLS + LENTABLE_SAFETY)
# huffman decoding tables
self.PRETREE_table = \
[0] * ((1 << PRETREE_TABLEBITS) + (PRETREE_MAXSYMBOLS * 2))
self.MAINTREE_table = \
[0] * ((1 << MAINTREE_TABLEBITS) + (MAINTREE_MAXSYMBOLS * 2))
self.LENGTH_table = \
[0] * ((1 << LENGTH_TABLEBITS) + (LENGTH_MAXSYMBOLS * 2))
self.ALIGNED_table = \
[0] * ((1 << ALIGNED_TABLEBITS) + (ALIGNED_MAXSYMBOLS * 2))
self.o_buf = self.i_buf = ''
self._reset_state()
def _reset_state(self):
self.R0 = 1
self.R1 = 1
self.R2 = 1
self.header_read = 0
self.block_remaining = 0
self.block_type = BLOCKTYPE_INVALID
# initialise tables to 0 (because deltas will be applied to them)
for i in xrange(MAINTREE_MAXSYMBOLS): self.MAINTREE_len[i] = 0
for i in xrange(LENGTH_MAXSYMBOLS): self.LENGTH_len[i] = 0
def decompress(self, data, out_bytes):
return ''.join(self._decompress(data, out_bytes))
def _decompress(self, data, out_bytes):
# easy answers
if out_bytes < 0:
raise LzxError('Negative desired output bytes')
# Initialize input and output
input = BitReader(data)
output = []

View File

@ -0,0 +1,2 @@
from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
from calibre.ebooks.lit.maps.html import MAP as HTML_MAP

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,74 @@
TAGS = [
None,
"package",
"dc:Title",
"dc:Creator",
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
"manifest",
"item",
"spine",
"itemref",
"metadata",
"dc-metadata",
"dc:Subject",
"dc:Description",
"dc:Publisher",
"dc:Contributor",
"dc:Date",
"dc:Type",
"dc:Format",
"dc:Identifier",
"dc:Source",
"dc:Language",
"dc:Relation",
"dc:Coverage",
"dc:Rights",
"x-metadata",
"meta",
"tours",
"tour",
"site",
"guide",
"reference",
None,
]
ATTRS = {
0x0001: "href",
0x0002: "%never-used",
0x0003: "%guid",
0x0004: "%minimum_level",
0x0005: "%attr5",
0x0006: "id",
0x0007: "href",
0x0008: "media-type",
0x0009: "fallback",
0x000A: "idref",
0x000B: "xmlns:dc",
0x000C: "xmlns:oebpackage",
0x000D: "role",
0x000E: "file-as",
0x000F: "event",
0x0010: "scheme",
0x0011: "title",
0x0012: "type",
0x0013: "unique-identifier",
0x0014: "name",
0x0015: "content",
0x0016: "xml:lang",
}
TAGS_ATTRS = [{} for i in xrange(43)]
MAP = (TAGS, ATTRS, TAGS_ATTRS)

View File

@ -0,0 +1,481 @@
# Re-modified for use in MS LIT decryption. Un-reversed the bytebit[] array.
# Substituted Microsoft's absurd modified S-boxes. Modified the encrypt/decrypt
# methods to handle more than one block at a time.
#
# And lo, all the previous notices follow:
# Modified DES encryption for VNC password authentication.
# Ported from realvnc's java viewer by <cliechti@gmx.net>
# I chose this package name because it is not compatible with the
# original DES algorithm, e.g. found pycrypto.
#
# (C) 2003 chris <cliechti@gmx.net>
# Released as free software under the Python License.
#
# You're free to use it for commercial and noncommercial
# application, modify and redistribute it as long as the
# copyright notices are intact. There are no warranties, not
# even that it does what it says to do ;-)
#
# Original notice following:
# This DES class has been extracted from package Acme.Crypto for use in VNC.
# The bytebit[] array has been reversed so that the most significant bit
# in each byte of the key is ignored, not the least significant. Also the
# unnecessary odd parity code has been removed.
#
# These changes are:
# Copyright (C) 1999 AT&T Laboratories Cambridge. All Rights Reserved.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# DesCipher - the DES encryption method
#
# The meat of this code is by Dave Zimmerman <dzimm@widget.com>, and is:
#
# Copyright (c) 1996 Widget Workshop, Inc. All Rights Reserved.
#
# Permission to use, copy, modify, and distribute this software
# and its documentation for NON-COMMERCIAL or COMMERCIAL purposes and
# without fee is hereby granted, provided that this copyright notice is kept
# intact.
#
# WIDGET WORKSHOP MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY
# OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
# TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE, OR NON-INFRINGEMENT. WIDGET WORKSHOP SHALL NOT BE LIABLE
# FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
# DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
#
# THIS SOFTWARE IS NOT DESIGNED OR INTENDED FOR USE OR RESALE AS ON-LINE
# CONTROL EQUIPMENT IN HAZARDOUS ENVIRONMENTS REQUIRING FAIL-SAFE
# PERFORMANCE, SUCH AS IN THE OPERATION OF NUCLEAR FACILITIES, AIRCRAFT
# NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL, DIRECT LIFE
# SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH THE FAILURE OF THE
# SOFTWARE COULD LEAD DIRECTLY TO DEATH, PERSONAL INJURY, OR SEVERE
# PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH RISK ACTIVITIES"). WIDGET WORKSHOP
# SPECIFICALLY DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR
# HIGH RISK ACTIVITIES.
#
#
# The rest is:
#
# Copyright (C) 1996 by Jef Poskanzer <jef@acme.com>. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# Visit the ACME Labs Java page for up-to-date versions of this and other
# fine Java utilities: http://www.acme.com/java/
#/ The DES encryption method.
# <P>
# This is surprisingly fast, for pure Java. On a SPARC 20, wrapped
# in Acme.Crypto.EncryptedOutputStream or Acme.Crypto.EncryptedInputStream,
# it does around 7000 bytes/second.
# <P>
# Most of this code is by Dave Zimmerman <dzimm@widget.com>, and is
# Copyright (c) 1996 Widget Workshop, Inc. See the source file for details.
# <P>
# <A HREF="/resources/classes/Acme/Crypto/DesCipher.java">Fetch the software.</A><BR>
# <A HREF="/resources/classes/Acme.tar.Z">Fetch the entire Acme package.</A>
# <P>
# @see Des3Cipher
# @see EncryptedOutputStream
# @see EncryptedInputStream
import struct
class DesCipher:
# Constructor, byte-array key.
def __init__(self, key):
self.setKey(key)
#/ Set the key.
def setKey(self, key):
self.encryptKeys = self.deskey([ord(x) for x in key], 1)
self.decryptKeys = self.deskey([ord(x) for x in key], 0)
# Turn an 8-byte key into internal keys.
def deskey(self, keyBlock, encrypting):
#~ int i, j, l, m, n;
pc1m = [0]*56 #new int[56];
pcr = [0]*56 #new int[56];
kn = [0]*32 #new int[32];
for j in range(56):
l = pc1[j]
m = l & 07
pc1m[j] = ((keyBlock[l >> 3] & bytebit[m]) != 0)
for i in range(16):
if encrypting:
m = i << 1
else:
m = (15-i) << 1
n = m + 1
kn[m] = kn[n] = 0
for j in range(28):
l = j + totrot[i]
if l < 28:
pcr[j] = pc1m[l]
else:
pcr[j] = pc1m[l - 28]
for j in range(28, 56):
l = j + totrot[i]
if l < 56:
pcr[j] = pc1m[l]
else:
pcr[j] = pc1m[l - 28]
for j in range(24):
if pcr[pc2[j]] != 0:
kn[m] |= bigbyte[j]
if pcr[pc2[j+24]] != 0:
kn[n] |= bigbyte[j]
return self.cookey(kn)
def cookey(self, raw):
#~ int raw0, raw1;
#~ int rawi, KnLi;
#~ int i;
KnL = [0]*32
rawi = 0
KnLi = 0
for i in range(16):
raw0 = raw[rawi]
rawi += 1
raw1 = raw[rawi]
rawi += 1
KnL[KnLi] = (raw0 & 0x00fc0000L) << 6
KnL[KnLi] |= (raw0 & 0x00000fc0L) << 10
KnL[KnLi] |= (raw1 & 0x00fc0000L) >> 10
KnL[KnLi] |= (raw1 & 0x00000fc0L) >> 6
KnLi += 1
KnL[KnLi] = (raw0 & 0x0003f000L) << 12
KnL[KnLi] |= (raw0 & 0x0000003fL) << 16
KnL[KnLi] |= (raw1 & 0x0003f000L) >> 4
KnL[KnLi] |= (raw1 & 0x0000003fL)
KnLi += 1
return KnL
# Block encryption routines.
#/ Encrypt a block of eight bytes.
def encrypt(self, clearText):
if len(clearText) % 8 != 0:
raise TypeError, "length must be multiple of block size"
result = []
while clearText:
result.append(struct.pack(
">LL", *self.des(struct.unpack(">LL", clearText[:8]),
self.encryptKeys)))
clearText = clearText[8:]
return ''.join(result)
#/ Decrypt a block of eight bytes.
def decrypt(self, cipherText):
if len(cipherText) % 8 != 0:
raise TypeError, "length must be multiple of block size"
result = []
while cipherText:
result.append(struct.pack(
">LL", *self.des(struct.unpack(">LL", cipherText[:8]),
self.decryptKeys)))
cipherText = cipherText[8:]
return ''.join(result)
# The DES function.
def des(self, (leftt, right), keys):
#~ int fval, work, right, leftt;
#~ int round
keysi = 0
work = ((leftt >> 4) ^ right) & 0x0f0f0f0fL
right ^= work
leftt ^= (work << 4) & 0xffffffffL
work = ((leftt >> 16) ^ right) & 0x0000ffffL
right ^= work
leftt ^= (work << 16) & 0xffffffffL
work = ((right >> 2) ^ leftt) & 0x33333333L
leftt ^= work
right ^= (work << 2) & 0xffffffffL
work = ((right >> 8) ^ leftt) & 0x00ff00ffL
leftt ^= work
right ^= (work << 8) & 0xffffffffL
right = ((right << 1) | ((right >> 31) & 1)) & 0xffffffffL
work = (leftt ^ right) & 0xaaaaaaaaL
leftt ^= work
right ^= work
leftt = ((leftt << 1) | ((leftt >> 31) & 1)) & 0xffffffffL
for round in range(8):
work = ((right << 28) | (right >> 4)) & 0xffffffffL
work ^= keys[keysi]
keysi += 1
fval = SP7[ work & 0x0000003fL ]
fval |= SP5[(work >> 8) & 0x0000003fL ]
fval |= SP3[(work >> 16) & 0x0000003fL ]
fval |= SP1[(work >> 24) & 0x0000003fL ]
work = right ^ keys[keysi]
keysi += 1
fval |= SP8[ work & 0x0000003fL ]
fval |= SP6[(work >> 8) & 0x0000003fL ]
fval |= SP4[(work >> 16) & 0x0000003fL ]
fval |= SP2[(work >> 24) & 0x0000003fL ]
leftt ^= fval
work = ((leftt << 28) | (leftt >> 4)) & 0xffffffffL
work ^= keys[keysi]
keysi += 1
fval = SP7[ work & 0x0000003fL ]
fval |= SP5[(work >> 8) & 0x0000003fL ]
fval |= SP3[(work >> 16) & 0x0000003fL ]
fval |= SP1[(work >> 24) & 0x0000003fL ]
work = leftt ^ keys[keysi]
keysi += 1
fval |= SP8[ work & 0x0000003fL ]
fval |= SP6[(work >> 8) & 0x0000003fL ]
fval |= SP4[(work >> 16) & 0x0000003fL ]
fval |= SP2[(work >> 24) & 0x0000003fL ]
right ^= fval
right = ((right << 31) | (right >> 1)) & 0xffffffffL
work = (leftt ^ right) & 0xaaaaaaaaL
leftt ^= work
right ^= work
leftt = ((leftt << 31) | (leftt >> 1)) & 0xffffffffL
work = ((leftt >> 8) ^ right) & 0x00ff00ffL
right ^= work
leftt ^= (work << 8) & 0xffffffffL
work = ((leftt >> 2) ^ right) & 0x33333333L
right ^= work
leftt ^= (work << 2) & 0xffffffffL
work = ((right >> 16) ^ leftt) & 0x0000ffffL
leftt ^= work
right ^= (work << 16) & 0xffffffffL
work = ((right >> 4) ^ leftt) & 0x0f0f0f0fL
leftt ^= work
right ^= (work << 4) & 0xffffffffL
return right, leftt
# Tables, permutations, S-boxes, etc.
bytebit = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01]
bigbyte = [
0x800000, 0x400000, 0x200000, 0x100000,
0x080000, 0x040000, 0x020000, 0x010000,
0x008000, 0x004000, 0x002000, 0x001000,
0x000800, 0x000400, 0x000200, 0x000100,
0x000080, 0x000040, 0x000020, 0x000010,
0x000008, 0x000004, 0x000002, 0x000001
]
pc1 = [
56, 48, 40, 32, 24, 16, 8,
0, 57, 49, 41, 33, 25, 17,
9, 1, 58, 50, 42, 34, 26,
18, 10, 2, 59, 51, 43, 35,
62, 54, 46, 38, 30, 22, 14,
6, 61, 53, 45, 37, 29, 21,
13, 5, 60, 52, 44, 36, 28,
20, 12, 4, 27, 19, 11, 3
]
totrot = [
1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28
]
pc2 = [
13, 16, 10, 23, 0, 4,
2, 27, 14, 5, 20, 9,
22, 18, 11, 3 , 25, 7,
15, 6, 26, 19, 12, 1,
40, 51, 30, 36, 46, 54,
29, 39, 50, 44, 32, 47,
43, 48, 38, 55, 33, 52,
45, 41, 49, 35, 28, 31,
]
SP1 = [
0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,
0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L,
0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L,
0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L,
0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L,
0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L,
0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L,
0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L,
0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L,
0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L,
0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L,
0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L,
0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L,
0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L,
0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L
]
SP2 = [
0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L,
0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L,
0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L,
0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L,
0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L,
0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L,
0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L,
0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L,
0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L,
0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L,
0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L,
0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L,
0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L,
0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L,
0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L,
0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L
]
SP3 = [
0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L,
0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L,
0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L,
0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L,
0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L,
0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L,
0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L,
0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L,
0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L,
0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L,
0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L,
0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L,
0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L,
0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L,
0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L,
0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L
]
SP4 = [
0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L,
0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L,
0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L,
0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L,
0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L,
0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L,
0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L,
0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L,
0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L,
0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L,
0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L,
0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L,
0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L,
0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L,
0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L,
0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L
]
SP5 = [
0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L,
0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L,
0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L,
0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L,
0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L,
0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L,
0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L,
0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L,
0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L,
0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L,
0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L,
0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L,
0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L,
0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L,
0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L,
0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L
]
SP6 = [
0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L,
0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L,
0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L,
0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L,
0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L,
0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L,
0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L,
0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L,
0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L,
0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L,
0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L,
0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L,
0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L,
0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L,
0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L,
0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L,
]
SP7 = [
0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L,
0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L,
0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L,
0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L,
0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L,
0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L,
0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L,
0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L,
0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L,
0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L,
0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L,
0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L,
0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L,
0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L,
0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L,
0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L,
]
SP8 = [
0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L,
0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L,
0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L,
0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L,
0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L,
0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L,
0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L,
0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L,
0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L,
0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L,
0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L,
0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L,
0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L,
0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L,
0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L,
0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L,
]
def new(key):
return DesCipher(key)
block_size = 8
key_size = 8
#test only:
if __name__ == '__main__':
des = DesCipher("\x01\x23\x45\x67\x89\xab\xcd\xef")
print ''.join(
"%02x" % ord(x) for x in des.encrypt("Now is t"))

View File

@ -0,0 +1,343 @@
#!/usr/bin/env python
# -*- coding: iso-8859-1
"""A sample implementation of SHA-1 in pure Python.
Framework adapted from Dinu Gherman's MD5 implementation by
J. Hallén and L. Creighton. SHA-1 implementation based directly on
the text of the NIST standard FIPS PUB 180-1.
"""
__date__ = '2004-11-17'
__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy
import struct, copy
# ======================================================================
# Bit-Manipulation helpers
#
# _long2bytes() was contributed by Barry Warsaw
# and is reused here with tiny modifications.
# ======================================================================
def _long2bytesBigEndian(n, blocksize=0):
"""Convert a long integer to a byte string.
If optional blocksize is given and greater than zero, pad the front
of the byte string with binary zeros so that the length is a multiple
of blocksize.
"""
# After much testing, this algorithm was deemed to be the fastest.
s = ''
pack = struct.pack
while n > 0:
s = pack('>I', n & 0xffffffffL) + s
n = n >> 32
# Strip off leading zeros.
for i in range(len(s)):
if s[i] != '\000':
break
else:
# Only happens when n == 0.
s = '\000'
i = 0
s = s[i:]
# Add back some pad bytes. This could be done more efficiently
# w.r.t. the de-padding being done above, but sigh...
if blocksize > 0 and len(s) % blocksize:
s = (blocksize - len(s) % blocksize) * '\000' + s
return s
def _bytelist2longBigEndian(list):
"Transform a list of characters into a list of longs."
imax = len(list)/4
hl = [0L] * imax
j = 0
i = 0
while i < imax:
b0 = long(ord(list[j])) << 24
b1 = long(ord(list[j+1])) << 16
b2 = long(ord(list[j+2])) << 8
b3 = long(ord(list[j+3]))
hl[i] = b0 | b1 | b2 | b3
i = i+1
j = j+4
return hl
def _rotateLeft(x, n):
"Rotate x (32 bit) left n bits circularly."
return (x << n) | (x >> (32-n))
# ======================================================================
# The SHA transformation functions
#
# ======================================================================
def f0_19(B, C, D):
return (B & (C ^ D)) ^ D
def f20_39(B, C, D):
return B ^ C ^ D
def f40_59(B, C, D):
return ((B | C) & D) | (B & C)
def f60_79(B, C, D):
return B ^ C ^ D
def f6_42(B, C, D):
return (B + C) ^ C
f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
f[3] = f20_39
f[6] = f6_42
f[10] = f20_39
f[15] = f20_39
f[26] = f0_19
f[31] = f40_59
f[42] = f6_42
f[51] = f20_39
f[68] = f0_19
# Constants to be used
K = [
0x5A827999L, # ( 0 <= t <= 19)
0x6ED9EBA1L, # (20 <= t <= 39)
0x8F1BBCDCL, # (40 <= t <= 59)
0xCA62C1D6L # (60 <= t <= 79)
]
class mssha1(object):
"An implementation of the MD5 hash function in pure Python."
def __init__(self):
"Initialisation."
# Initial message length in bits(!).
self.length = 0L
self.count = [0, 0]
# Initial empty message as a sequence of bytes (8 bit characters).
self.input = []
# Call a separate init function, that can be used repeatedly
# to start from scratch on the same object.
self.init()
def init(self):
"Initialize the message-digest and set all fields to zero."
self.length = 0L
self.input = []
# Initial 160 bit message digest (5 times 32 bit).
self.H0 = 0x32107654L
self.H1 = 0x23016745L
self.H2 = 0xC4E680A2L
self.H3 = 0xDC679823L
self.H4 = 0xD0857A34L
def _transform(self, W):
for t in range(16, 80):
W.append(_rotateLeft(
W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL)
A = self.H0
B = self.H1
C = self.H2
D = self.H3
E = self.H4
for t in xrange(0, 80):
TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
E = D
D = C
C = _rotateLeft(B, 30) & 0xffffffffL
B = A
A = TEMP & 0xffffffffL
self.H0 = (self.H0 + A) & 0xffffffffL
self.H1 = (self.H1 + B) & 0xffffffffL
self.H2 = (self.H2 + C) & 0xffffffffL
self.H3 = (self.H3 + D) & 0xffffffffL
self.H4 = (self.H4 + E) & 0xffffffffL
# Down from here all methods follow the Python Standard Library
# API of the sha module.
def update(self, inBuf):
"""Add to the current message.
Update the mssha1 object with the string arg. Repeated calls
are equivalent to a single call with the concatenation of all
the arguments, i.e. s.update(a); s.update(b) is equivalent
to s.update(a+b).
The hash is immediately calculated for all full blocks. The final
calculation is made in digest(). It will calculate 1-2 blocks,
depending on how much padding we have to add. This allows us to
keep an intermediate value for the hash, so that we only need to
make minimal recalculation if we call update() to add more data
to the hashed string.
"""
leninBuf = long(len(inBuf))
# Compute number of bytes mod 64.
index = (self.count[1] >> 3) & 0x3FL
# Update number of bits.
self.count[1] = self.count[1] + (leninBuf << 3)
if self.count[1] < (leninBuf << 3):
self.count[0] = self.count[0] + 1
self.count[0] = self.count[0] + (leninBuf >> 29)
partLen = 64 - index
if leninBuf >= partLen:
self.input[index:] = list(inBuf[:partLen])
self._transform(_bytelist2longBigEndian(self.input))
i = partLen
while i + 63 < leninBuf:
self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
i = i + 64
else:
self.input = list(inBuf[i:leninBuf])
else:
i = 0
self.input = self.input + list(inBuf)
def digest(self):
"""Terminate the message-digest computation and return digest.
Return the digest of the strings passed to the update()
method so far. This is a 16-byte string which may contain
non-ASCII characters, including null bytes.
"""
H0 = self.H0
H1 = self.H1
H2 = self.H2
H3 = self.H3
H4 = self.H4
input = [] + self.input
count = [] + self.count
index = (self.count[1] >> 3) & 0x3fL
if index < 56:
padLen = 56 - index
else:
padLen = 120 - index
padding = ['\200'] + ['\000'] * 63
self.update(padding[:padLen])
# Append length (before padding).
bits = _bytelist2longBigEndian(self.input[:56]) + count
self._transform(bits)
# Store state in digest.
digest = _long2bytesBigEndian(self.H0, 4) + \
_long2bytesBigEndian(self.H1, 4) + \
_long2bytesBigEndian(self.H2, 4) + \
_long2bytesBigEndian(self.H3, 4) + \
_long2bytesBigEndian(self.H4, 4)
self.H0 = H0
self.H1 = H1
self.H2 = H2
self.H3 = H3
self.H4 = H4
self.input = input
self.count = count
return digest
def hexdigest(self):
"""Terminate and return digest in HEX form.
Like digest() except the digest is returned as a string of
length 32, containing only hexadecimal digits. This may be
used to exchange the value safely in email or other non-
binary environments.
"""
return ''.join(['%02x' % ord(c) for c in self.digest()])
def copy(self):
"""Return a clone object.
Return a copy ('clone') of the md5 object. This can be used
to efficiently compute the digests of strings that share
a common initial substring.
"""
return copy.deepcopy(self)
# ======================================================================
# Mimic Python top-level functions from standard library API
# for consistency with the md5 module of the standard library.
# ======================================================================
# These are mandatory variables in the module. They have constant values
# in the SHA standard.
digest_size = digestsize = 20
blocksize = 1
def new(arg=None):
"""Return a new mssha1 crypto object.
If arg is present, the method call update(arg) is made.
"""
crypto = mssha1()
if arg:
crypto.update(arg)
return crypto
if __name__ == '__main__':
def main():
import sys
file = None
if len(sys.argv) > 2:
print "usage: %s [FILE]" % sys.argv[0]
return
elif len(sys.argv) < 2:
file = sys.stdin
else:
file = open(sys.argv[1], 'rb')
context = new()
data = file.read(16384)
while data:
context.update(data)
data = file.read(16384)
file.close()
digest = context.hexdigest().upper()
for i in xrange(0, 40, 8):
print digest[i:i+8],
print
main()

View File

@ -0,0 +1,725 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Support for reading the metadata from a lit file.
'''
import sys, struct, cStringIO, os
import functools
from itertools import repeat
from calibre import relpath
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFReader
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
import calibre.ebooks.lit.mssha1 as mssha1
import calibre.ebooks.lit.msdes as msdes
OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE package
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
"""
XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE html PUBLIC
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
"""
DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
def u32(bytes):
return struct.unpack('<L', bytes[:4])[0]
def u16(bytes):
return struct.unpack('<H', bytes[:2])[0]
def int32(bytes):
return struct.unpack('<l', bytes[:4])[0]
def encint(bytes, remaining):
pos, val = 0, 0
while remaining > 0:
b = ord(bytes[pos])
pos += 1
remaining -= 1
val <<= 7
val |= (b & 0x7f)
if b & 0x80 == 0: break
return val, bytes[pos:], remaining
def msguid(bytes):
values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
return "{%08lX-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X}" % values
def read_utf8_char(bytes, pos):
c = ord(bytes[pos])
mask = 0x80
if (c & mask):
elsize = 0
while c & mask:
mask >>= 1
elsize += 1
if (mask <= 1) or (mask == 0x40):
raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos]))
else:
elsize = 1
if elsize > 1:
if elsize + pos > len(bytes):
raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos]))
c &= (mask - 1)
for i in range(1, elsize):
b = ord(bytes[pos+i])
if (b & 0xC0) != 0x80:
raise LitError(
'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i]))
c = (c << 6) | (b & 0x3F)
return unichr(c), pos+elsize
FLAG_OPENING = 1
FLAG_CLOSING = 2
FLAG_BLOCK = 4
FLAG_HEAD = 8
FLAG_ATOM = 16
XML_ENTITIES = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
class UnBinary(object):
def __init__(self, bin, manifest, map=OPF_MAP):
self.manifest = manifest
self.tag_map, self.attr_map, self.tag_to_attr_map = map
self.opf = map is OPF_MAP
self.bin = bin
self.buf = cStringIO.StringIO()
self.ampersands = []
self.binary_to_text()
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
self.escape_ampersands()
def escape_ampersands(self):
offset = 0
for pos in self.ampersands:
test = self.raw[pos+offset:pos+offset+6]
if test.startswith('&#') and ';' in test:
continue
escape = True
for ent in XML_ENTITIES:
if test.startswith(ent):
escape = False
break
if not escape:
continue
self.raw = self.raw[:pos+offset] + '&amp;' + self.raw[pos+offset+1:]
offset += 4
def item_path(self, internal_id):
for i in self.manifest:
if i == internal_id:
return i.path
raise LitError('Could not find item %s'%(internal_id,))
def __unicode__(self):
return self.raw
def binary_to_text(self, base=0, depth=0):
tag_name = current_map = None
dynamic_tag = errors = 0
in_censorship = is_goingdown = False
state = 'text'
index = base
flags = 0
while index < len(self.bin):
c, index = read_utf8_char(self.bin, index)
oc = ord(c)
if state == 'text':
if oc == 0:
state = 'get flags'
continue
elif c == '\v':
c = '\n'
elif c == '&':
self.ampersands.append(self.buf.tell()-1)
self.buf.write(c.encode('utf-8'))
elif state == 'get flags':
if oc == 0:
state = 'text'
continue
flags = oc
state = 'get tag'
elif state == 'get tag':
state = 'text' if oc == 0 else 'get attr'
if flags & FLAG_OPENING:
tag = oc
self.buf.write('<')
if not (flags & FLAG_CLOSING):
is_goingdown = True
if tag == 0x8000:
state = 'get custom length'
continue
if flags & FLAG_ATOM:
raise LitError('TODO: Atoms not yet implemented')
elif tag < len(self.tag_map):
tag_name = self.tag_map[tag]
current_map = self.tag_to_attr_map[tag]
else:
dynamic_tag += 1
errors += 1
tag_name = '?'+unichr(tag)+'?'
current_map = self.tag_to_attr_map[tag]
print 'WARNING: tag %s unknown' % unichr(tag)
self.buf.write(unicode(tag_name).encode('utf-8'))
elif flags & FLAG_CLOSING:
if depth == 0:
raise LitError('Extra closing tag')
return index
elif state == 'get attr':
in_censorship = False
if oc == 0:
if not is_goingdown:
tag_name = None
dynamic_tag = 0
self.buf.write(' />')
else:
self.buf.write('>')
index = self.binary_to_text(base=index, depth=depth+1)
is_goingdown = False
if not tag_name:
raise LitError('Tag ends before it begins.')
self.buf.write('</'+tag_name+'>')
dynamic_tag = 0
tag_name = None
state = 'text'
else:
if oc == 0x8000:
state = 'get attr length'
continue
attr = None
if oc in current_map and current_map[oc]:
attr = current_map[oc]
elif oc in self.attr_map:
attr = self.attr_map[oc]
if not attr or not isinstance(attr, basestring):
raise LitError(
'Unknown attribute %d in tag %s' % (oc, tag_name))
if attr.startswith('%'):
in_censorship = True
state = 'get value length'
continue
self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
if attr in ['href', 'src']:
state = 'get href length'
else:
state = 'get value length'
elif state == 'get value length':
if not in_censorship:
self.buf.write('"')
count = oc - 1
if count == 0:
if not in_censorship:
self.buf.write('"')
in_censorship = False
state = 'get attr'
continue
state = 'get value'
if oc == 0xffff:
continue
if count < 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count)
elif state == 'get value':
if count == 0xfffe:
if not in_censorship:
self.buf.write('%s"' % (oc - 1))
in_censorship = False
state = 'get attr'
elif count > 0:
if not in_censorship:
self.buf.write(c)
count -= 1
if count == 0:
if not in_censorship:
self.buf.write('"')
in_censorship = False
state = 'get attr'
elif state == 'get custom length':
count = oc - 1
if count <= 0 or count > len(self.bin)-index:
raise LitError('Invalid character count %d' % count)
dynamic_tag += 1
state = 'get custom'
tag_name = ''
elif state == 'get custom':
tag_name += c
count -= 1
if count == 0:
self.buf.write(tag_name)
state = 'get attr'
elif state == 'get attr length':
count = oc - 1
if count <= 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count)
self.buf.write(' ')
state = 'get custom attr'
elif state == 'get custom attr':
self.buf.write(c)
count -= 1
if count == 0:
self.buf.write('=')
state = 'get value length'
elif state == 'get href length':
count = oc - 1
if count <= 0 or count > (len(self.bin) - index):
raise LitError('Invalid character count %d' % count)
href = ''
state = 'get href'
elif state == 'get href':
href += c
count -= 1
if count == 0:
doc, m, frag = href[1:].partition('#')
path = self.item_path(doc)
if m and frag:
path += m + frag
self.buf.write((u'"%s"' % path).encode('utf-8'))
state = 'get attr'
return index
class DirectoryEntry(object):
def __init__(self, name, section, offset, size):
self.name = name
self.section = section
self.offset = offset
self.size = size
def __repr__(self):
return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
% (repr(self.name), self.section, self.offset, self.size)
def __str__(self):
return repr(self)
class ManifestItem(object):
def __init__(self, original, internal, mime_type, offset, root, state):
self.original = original
self.internal = internal
self.mime_type = mime_type
self.offset = offset
self.root = root
self.state = state
self.prefix = state if state in ('images', 'css') else ''
self.prefix = self.prefix + os.sep if self.prefix else ''
self.path = self.prefix + self.original
def __eq__(self, other):
if hasattr(other, 'internal'):
return self.internal == other.internal
return self.internal == other
def __repr__(self):
return "ManifestItem(internal='%s', path='%s')" \
% (repr(self.internal), repr(self.path))
def preserve(function):
def wrapper(self, *args, **kwargs):
opos = self._stream.tell()
try:
return function(self, *args, **kwargs)
finally:
self._stream.seek(opos)
functools.update_wrapper(wrapper, function)
return wrapper
class LitFile(object):
PIECE_SIZE = 16
def magic():
@preserve
def fget(self):
self._stream.seek(0)
return self._stream.read(8)
return property(fget=fget)
magic = magic()
def version():
def fget(self):
self._stream.seek(8)
return u32(self._stream.read(4))
return property(fget=fget)
version = version()
def hdr_len():
@preserve
def fget(self):
self._stream.seek(12)
return int32(self._stream.read(4))
return property(fget=fget)
hdr_len = hdr_len()
def num_pieces():
@preserve
def fget(self):
self._stream.seek(16)
return int32(self._stream.read(4))
return property(fget=fget)
num_pieces = num_pieces()
def sec_hdr_len():
@preserve
def fget(self):
self._stream.seek(20)
return int32(self._stream.read(4))
return property(fget=fget)
sec_hdr_len = sec_hdr_len()
def guid():
@preserve
def fget(self):
self._stream.seek(24)
return self._stream.read(16)
return property(fget=fget)
guid = guid()
def header():
@preserve
def fget(self):
size = self.hdr_len \
+ (self.num_pieces * self.PIECE_SIZE) \
+ self.sec_hdr_len
self._stream.seek(0)
return self._stream.read(size)
return property(fget=fget)
header = header()
def __init__(self, stream):
self._stream = stream
if self.magic != 'ITOLITLS':
raise LitError('Not a valid LIT file')
if self.version != 1:
raise LitError('Unknown LIT version %d'%(self.version,))
self.read_secondary_header()
self.read_header_pieces()
@preserve
def __len__(self):
self._stream.seek(0, 2)
return self._stream.tell()
@preserve
def _read_raw(self, offset, size):
self._stream.seek(offset)
return self._stream.read(size)
def _read_content(self, offset, size):
return self._read_raw(self.content_offset + offset, size)
@preserve
def read_secondary_header(self):
self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
bytes = self._stream.read(self.sec_hdr_len)
offset = int32(bytes[4:])
while offset < len(bytes):
blocktype = bytes[offset:offset+4]
blockver = u32(bytes[offset+4:])
if blocktype == 'CAOL':
if blockver != 2:
raise LitError(
'Unknown CAOL block format %d' % blockver)
self.creator_id = u32(bytes[offset+12:])
self.entry_chunklen = u32(bytes[offset+20:])
self.count_chunklen = u32(bytes[offset+24:])
self.entry_unknown = u32(bytes[offset+28:])
self.count_unknown = u32(bytes[offset+32:])
offset += 48
elif blocktype == 'ITSF':
if blockver != 4:
raise LitError(
'Unknown ITSF block format %d' % blockver)
if u32(bytes[offset+4+16:]):
raise LitError('This file has a 64bit content offset')
self.content_offset = u32(bytes[offset+16:])
self.timestamp = u32(bytes[offset+24:])
self.language_id = u32(bytes[offset+28:])
offset += 48
if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset')
@preserve
def read_header_pieces(self):
src = self.header[self.hdr_len:]
for i in range(self.num_pieces):
piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
raise LitError('Piece %s has 64bit value' % repr(piece))
offset, size = u32(piece), int32(piece[8:])
self._stream.seek(offset)
piece = self._stream.read(size)
if i == 0:
continue # Dont need this piece
elif i == 1:
if u32(piece[8:]) != self.entry_chunklen or \
u32(piece[12:]) != self.entry_unknown:
raise LitError('Secondary header does not match piece')
self.read_directory(piece)
elif i == 2:
if u32(piece[8:]) != self.count_chunklen or \
u32(piece[12:]) != self.count_unknown:
raise LitError('Secondary header does not match piece')
continue # No data needed from this piece
elif i == 3:
self.piece3_guid = piece
elif i == 4:
self.piece4_guid = piece
def read_directory(self, piece):
self.entries = {}
if not piece.startswith('IFCM'):
raise LitError('Header piece #1 is not main directory.')
chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
if (32 + chunk_size * num_chunks) != len(piece):
raise LitError('IFCM HEADER has incorrect length')
for chunk in range(num_chunks):
p = 32 + chunk * chunk_size
if piece[p:p+4] != 'AOLL':
continue
remaining = chunk_size - int32(piece[p+4:p+8]) - 48
if remaining < 0:
raise LitError('AOLL remaining count is negative')
entries = u16(piece[p+chunk_size-2:])
if entries <= 0:
# Hopefully everything will work even without a correct entries
# count
entries = (2 ** 16) - 1
piece = piece[p+48:]
i = 0
while i < entries:
if remaining <= 0: break
namelen, piece, remaining = encint(piece, remaining)
if namelen != (namelen & 0x7fffffff):
raise LitError('Directory entry had 64bit name length.')
if namelen > remaining - 3:
raise LitError('Read past end of directory chunk')
name = piece[:namelen]
piece = piece[namelen:]
section, piece, remaining = encint(piece, remaining)
offset, piece, remaining = encint(piece, remaining)
size, piece, remaining = encint(piece, remaining)
entry = DirectoryEntry(name, section, offset, size)
if name == '::DataSpace/NameList':
self.read_section_names(entry)
elif name == '/manifest':
self.read_manifest(entry)
elif name == '/meta':
self.read_meta(entry)
self.entries[name] = entry
i += 1
if not hasattr(self, 'section_names'):
raise LitError('Lit file does not have a valid NameList')
if not hasattr(self, 'manifest'):
raise LitError('Lit file does not have a valid manifest')
self.read_drm()
def read_section_names(self, entry):
raw = self._read_content(entry.offset, entry.size)
if len(raw) < 4:
raise LitError('Invalid Namelist section')
pos = 4
self.num_sections = u16(raw[2:pos])
self.section_names = [""]*self.num_sections
self.section_data = [None]*self.num_sections
for section in range(self.num_sections):
size = u16(raw[pos:pos+2])
pos += 2
size = size*2 + 2
if pos + size > len(raw):
raise LitError('Invalid Namelist section')
self.section_names[section] = \
raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
pos += size
def read_manifest(self, entry):
self.manifest = []
raw = self._read_content(entry.offset, entry.size)
pos = 0
while pos < len(raw):
size = ord(raw[pos])
if size == 0: break
pos += 1
root = raw[pos:pos+size].decode('utf8')
pos += size
if pos >= len(raw):
raise LitError('Truncated manifest.')
for state in ['spine', 'not spine', 'css', 'images']:
num_files = int32(raw[pos:pos+4])
pos += 4
if num_files == 0: continue
i = 0
while i < num_files:
if pos+5 >= len(raw):
raise LitError('Truncated manifest.')
offset = u32(raw[pos:pos+4])
pos += 4
slen = ord(raw[pos])
pos += 1
internal = raw[pos:pos+slen].decode('utf8')
pos += slen
slen = ord(raw[pos])
pos += 1
original = raw[pos:pos+slen].decode('utf8')
pos += slen
slen = ord(raw[pos])
pos += 1
mime_type = raw[pos:pos+slen].decode('utf8')
pos += slen + 1
self.manifest.append(
ManifestItem(original, internal, mime_type,
offset, root, state))
i += 1
def read_meta(self, entry):
raw = self._read_content(entry.offset, entry.size)
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
self.meta = xml
def read_drm(self):
def exists_file(name):
try: self.get_file(name)
except KeyError: return False
return True
self.drmlevel = 0
if exists_file('/DRMStorage/Licenses/EUL'):
self.drmlevel = 5
elif exists_file('/DRMStorage/DRMBookplate'):
self.drmlevel = 3
elif exists_file('/DRMStorage/DRMSealed'):
self.drmlevel = 1
else:
return
des = msdes.new(self.calculate_deskey())
bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed'))
if bookkey[0] != '\000':
raise LitError('Unable to decrypt title key!')
self.bookkey = bookkey[1:9]
def calculate_deskey(self):
hashfiles = ['/meta', '/DRMStorage/DRMSource']
if self.drmlevel == 3:
hashfiles.append('/DRMStorage/DRMBookplate')
prepad = 2
hash = mssha1.new()
for name in hashfiles:
data = self.get_file(name)
if prepad > 0:
data = ("\000" * prepad) + data
prepad = 0
postpad = 64 - (len(data) % 64)
if postpad < 64:
data = data + ("\000" * postpad)
hash.update(data)
digest = hash.digest()
key = [0] * 8
for i in xrange(0, len(digest)):
key[i % 8] ^= ord(digest[i])
return ''.join(chr(x) for x in key)
def get_file(self, name):
entry = self.entries[name]
if entry.section == 0:
return self._read_content(entry.offset, entry.size)
section = self.get_section(entry.section)
return section[entry.offset:entry.offset+entry.size]
def get_section(self, section):
data = self.section_data[section]
if not data:
data = self._get_section(section)
self.section_data[section] = data
return data
def _get_section(self, section):
name = self.section_names[section]
path = '::DataSpace/Storage/' + name
transform = self.get_file(path + '/Transform/List')
content = self.get_file(path + '/Content')
control = self.get_file(path + '/ControlData')
idx_transform = idx_control = 0
while (len(transform) - idx_transform) >= 16:
ndwords = int32(control[idx_control:]) + 1
if (idx_control + (ndwords * 4)) > len(control) or ndwords <= 0:
raise LitError("ControlData is too short")
guid = msguid(transform[idx_transform:])
if guid == DESENCRYPT_GUID:
content = self._decrypt(content)
idx_control += ndwords * 4
elif guid == LZXCOMPRESS_GUID:
raise LitError("LZX decompression not implemented")
else:
raise LitError("Unrecognized transform: %s." % repr(guid))
idx_transform += 16
return content
def _decrypt(self, content):
if self.drmlevel == 5:
raise LitError('Cannot extract content from a DRM protected ebook')
return msdes.new(self.bookkey).decrypt(content)
def get_metadata(stream):
try:
litfile = LitFile(stream)
src = litfile.meta.encode('utf-8')
mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd())
cover_url, cover_item = mi.cover, None
if cover_url:
cover_url = relpath(cover_url, os.getcwd())
for item in litfile.manifest:
if item.path == cover_url:
cover_item = item.internal
if cover_item is not None:
ext = cover_url.rpartition('.')[-1]
if not ext:
ext = 'jpg'
else:
ext = ext.lower()
cd = litfile.get_file(cover_item)
mi.cover_data = (ext, cd) if cd else (None, None)
except:
title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown'
mi = MetaInformation(title, ['Unknown'])
return mi
def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, _('Usage: %s file.lit')%(args[0],)
return 1
mi = get_metadata(open(args[1], 'rb'))
print unicode(mi)
if mi.cover_data[1]:
cover = os.path.abspath(os.path.splitext(os.path.basename(args[1]))[0] + '.' + mi.cover_data[0])
open(cover, 'wb').write(mi.cover_data[1])
print _('Cover saved to'), cover
return 0
if __name__ == '__main__':
sys.exit(main())