mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Re-integrate changes from home
This commit is contained in:
commit
76b3759947
@ -1,5 +1,2 @@
|
|||||||
import calibre.ebooks.maps.opf as opf
|
from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
|
||||||
import calibre.ebooks.maps.html as html
|
from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
|
||||||
|
|
||||||
OPF_MAP = opf.MAP
|
|
||||||
HTML_MAP = html.MAP
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,28 +1,3 @@
|
|||||||
ATTRS = {
|
|
||||||
0x0001 => "href",
|
|
||||||
0x0002 => "%never-used",
|
|
||||||
0x0003 => "%guid",
|
|
||||||
0x0004 => "%minimum_level",
|
|
||||||
0x0005 => "%attr5",
|
|
||||||
0x0006 => "id",
|
|
||||||
0x0007 => "href",
|
|
||||||
0x0008 => "media-type",
|
|
||||||
0x0009 => "fallback",
|
|
||||||
0x000A => "idref",
|
|
||||||
0x000B => "xmlns:dc",
|
|
||||||
0x000C => "xmlns:oebpackage",
|
|
||||||
0x000D => "role",
|
|
||||||
0x000E => "file-as",
|
|
||||||
0x000F => "event",
|
|
||||||
0x0010 => "scheme",
|
|
||||||
0x0011 => "title",
|
|
||||||
0x0012 => "type",
|
|
||||||
0x0013 => "unique-identifier",
|
|
||||||
0x0014 => "name",
|
|
||||||
0x0015 => "content",
|
|
||||||
0x0016 => "xml:lang",
|
|
||||||
}
|
|
||||||
|
|
||||||
TAGS = [
|
TAGS = [
|
||||||
None,
|
None,
|
||||||
"package",
|
"package",
|
||||||
@ -69,6 +44,31 @@ TAGS = [
|
|||||||
None,
|
None,
|
||||||
]
|
]
|
||||||
|
|
||||||
TAGS_ATTR = [{} for i in xrange(43)]
|
ATTRS = {
|
||||||
|
0x0001: "href",
|
||||||
|
0x0002: "%never-used",
|
||||||
|
0x0003: "%guid",
|
||||||
|
0x0004: "%minimum_level",
|
||||||
|
0x0005: "%attr5",
|
||||||
|
0x0006: "id",
|
||||||
|
0x0007: "href",
|
||||||
|
0x0008: "media-type",
|
||||||
|
0x0009: "fallback",
|
||||||
|
0x000A: "idref",
|
||||||
|
0x000B: "xmlns:dc",
|
||||||
|
0x000C: "xmlns:oebpackage",
|
||||||
|
0x000D: "role",
|
||||||
|
0x000E: "file-as",
|
||||||
|
0x000F: "event",
|
||||||
|
0x0010: "scheme",
|
||||||
|
0x0011: "title",
|
||||||
|
0x0012: "type",
|
||||||
|
0x0013: "unique-identifier",
|
||||||
|
0x0014: "name",
|
||||||
|
0x0015: "content",
|
||||||
|
0x0016: "xml:lang",
|
||||||
|
}
|
||||||
|
|
||||||
MAP = (TAGS, TAGS_ATTRS, ATTRS0)
|
TAGS_ATTRS = [{} for i in xrange(43)]
|
||||||
|
|
||||||
|
MAP = (TAGS, ATTRS, TAGS_ATTRS)
|
||||||
|
343
src/calibre/ebooks/lit/mssha1.py
Normal file
343
src/calibre/ebooks/lit/mssha1.py
Normal file
@ -0,0 +1,343 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: iso-8859-1
|
||||||
|
|
||||||
|
"""A sample implementation of SHA-1 in pure Python.
|
||||||
|
|
||||||
|
Framework adapted from Dinu Gherman's MD5 implementation by
|
||||||
|
J. Hallén and L. Creighton. SHA-1 implementation based directly on
|
||||||
|
the text of the NIST standard FIPS PUB 180-1.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
__date__ = '2004-11-17'
|
||||||
|
__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy
|
||||||
|
|
||||||
|
|
||||||
|
import struct, copy
|
||||||
|
|
||||||
|
|
||||||
|
# ======================================================================
|
||||||
|
# Bit-Manipulation helpers
|
||||||
|
#
|
||||||
|
# _long2bytes() was contributed by Barry Warsaw
|
||||||
|
# and is reused here with tiny modifications.
|
||||||
|
# ======================================================================
|
||||||
|
|
||||||
|
def _long2bytesBigEndian(n, blocksize=0):
|
||||||
|
"""Convert a long integer to a byte string.
|
||||||
|
|
||||||
|
If optional blocksize is given and greater than zero, pad the front
|
||||||
|
of the byte string with binary zeros so that the length is a multiple
|
||||||
|
of blocksize.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# After much testing, this algorithm was deemed to be the fastest.
|
||||||
|
s = ''
|
||||||
|
pack = struct.pack
|
||||||
|
while n > 0:
|
||||||
|
s = pack('>I', n & 0xffffffffL) + s
|
||||||
|
n = n >> 32
|
||||||
|
|
||||||
|
# Strip off leading zeros.
|
||||||
|
for i in range(len(s)):
|
||||||
|
if s[i] != '\000':
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Only happens when n == 0.
|
||||||
|
s = '\000'
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
s = s[i:]
|
||||||
|
|
||||||
|
# Add back some pad bytes. This could be done more efficiently
|
||||||
|
# w.r.t. the de-padding being done above, but sigh...
|
||||||
|
if blocksize > 0 and len(s) % blocksize:
|
||||||
|
s = (blocksize - len(s) % blocksize) * '\000' + s
|
||||||
|
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def _bytelist2longBigEndian(list):
|
||||||
|
"Transform a list of characters into a list of longs."
|
||||||
|
|
||||||
|
imax = len(list)/4
|
||||||
|
hl = [0L] * imax
|
||||||
|
|
||||||
|
j = 0
|
||||||
|
i = 0
|
||||||
|
while i < imax:
|
||||||
|
b0 = long(ord(list[j])) << 24
|
||||||
|
b1 = long(ord(list[j+1])) << 16
|
||||||
|
b2 = long(ord(list[j+2])) << 8
|
||||||
|
b3 = long(ord(list[j+3]))
|
||||||
|
hl[i] = b0 | b1 | b2 | b3
|
||||||
|
i = i+1
|
||||||
|
j = j+4
|
||||||
|
|
||||||
|
return hl
|
||||||
|
|
||||||
|
|
||||||
|
def _rotateLeft(x, n):
|
||||||
|
"Rotate x (32 bit) left n bits circularly."
|
||||||
|
|
||||||
|
return (x << n) | (x >> (32-n))
|
||||||
|
|
||||||
|
|
||||||
|
# ======================================================================
|
||||||
|
# The SHA transformation functions
|
||||||
|
#
|
||||||
|
# ======================================================================
|
||||||
|
|
||||||
|
def f0_19(B, C, D):
|
||||||
|
return (B & (C ^ D)) ^ D
|
||||||
|
|
||||||
|
def f20_39(B, C, D):
|
||||||
|
return B ^ C ^ D
|
||||||
|
|
||||||
|
def f40_59(B, C, D):
|
||||||
|
return ((B | C) & D) | (B & C)
|
||||||
|
|
||||||
|
def f60_79(B, C, D):
|
||||||
|
return B ^ C ^ D
|
||||||
|
|
||||||
|
def f6_42(B, C, D):
|
||||||
|
return (B + C) ^ C
|
||||||
|
|
||||||
|
f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
|
||||||
|
f[3] = f20_39
|
||||||
|
f[6] = f6_42
|
||||||
|
f[10] = f20_39
|
||||||
|
f[15] = f20_39
|
||||||
|
f[26] = f0_19
|
||||||
|
f[31] = f40_59
|
||||||
|
f[42] = f6_42
|
||||||
|
f[51] = f20_39
|
||||||
|
f[68] = f0_19
|
||||||
|
|
||||||
|
|
||||||
|
# Constants to be used
|
||||||
|
K = [
|
||||||
|
0x5A827999L, # ( 0 <= t <= 19)
|
||||||
|
0x6ED9EBA1L, # (20 <= t <= 39)
|
||||||
|
0x8F1BBCDCL, # (40 <= t <= 59)
|
||||||
|
0xCA62C1D6L # (60 <= t <= 79)
|
||||||
|
]
|
||||||
|
|
||||||
|
class sha:
|
||||||
|
"An implementation of the MD5 hash function in pure Python."
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"Initialisation."
|
||||||
|
|
||||||
|
# Initial message length in bits(!).
|
||||||
|
self.length = 0L
|
||||||
|
self.count = [0, 0]
|
||||||
|
|
||||||
|
# Initial empty message as a sequence of bytes (8 bit characters).
|
||||||
|
self.input = []
|
||||||
|
|
||||||
|
# Call a separate init function, that can be used repeatedly
|
||||||
|
# to start from scratch on the same object.
|
||||||
|
self.init()
|
||||||
|
|
||||||
|
|
||||||
|
def init(self):
|
||||||
|
"Initialize the message-digest and set all fields to zero."
|
||||||
|
|
||||||
|
self.length = 0L
|
||||||
|
self.input = []
|
||||||
|
|
||||||
|
# Initial 160 bit message digest (5 times 32 bit).
|
||||||
|
self.H0 = 0x32107654L
|
||||||
|
self.H1 = 0x23016745L
|
||||||
|
self.H2 = 0xC4E680A2L
|
||||||
|
self.H3 = 0xDC679823L
|
||||||
|
self.H4 = 0xD0857A34L
|
||||||
|
|
||||||
|
def _transform(self, W):
|
||||||
|
for t in range(16, 80):
|
||||||
|
W.append(_rotateLeft(
|
||||||
|
W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL)
|
||||||
|
|
||||||
|
A = self.H0
|
||||||
|
B = self.H1
|
||||||
|
C = self.H2
|
||||||
|
D = self.H3
|
||||||
|
E = self.H4
|
||||||
|
|
||||||
|
for t in xrange(0, 80):
|
||||||
|
TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
|
||||||
|
E = D
|
||||||
|
D = C
|
||||||
|
C = _rotateLeft(B, 30) & 0xffffffffL
|
||||||
|
B = A
|
||||||
|
A = TEMP & 0xffffffffL
|
||||||
|
|
||||||
|
self.H0 = (self.H0 + A) & 0xffffffffL
|
||||||
|
self.H1 = (self.H1 + B) & 0xffffffffL
|
||||||
|
self.H2 = (self.H2 + C) & 0xffffffffL
|
||||||
|
self.H3 = (self.H3 + D) & 0xffffffffL
|
||||||
|
self.H4 = (self.H4 + E) & 0xffffffffL
|
||||||
|
|
||||||
|
|
||||||
|
# Down from here all methods follow the Python Standard Library
|
||||||
|
# API of the sha module.
|
||||||
|
|
||||||
|
def update(self, inBuf):
|
||||||
|
"""Add to the current message.
|
||||||
|
|
||||||
|
Update the sha object with the string arg. Repeated calls
|
||||||
|
are equivalent to a single call with the concatenation of all
|
||||||
|
the arguments, i.e. s.update(a); s.update(b) is equivalent
|
||||||
|
to s.update(a+b).
|
||||||
|
|
||||||
|
The hash is immediately calculated for all full blocks. The final
|
||||||
|
calculation is made in digest(). It will calculate 1-2 blocks,
|
||||||
|
depending on how much padding we have to add. This allows us to
|
||||||
|
keep an intermediate value for the hash, so that we only need to
|
||||||
|
make minimal recalculation if we call update() to add more data
|
||||||
|
to the hashed string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
leninBuf = long(len(inBuf))
|
||||||
|
|
||||||
|
# Compute number of bytes mod 64.
|
||||||
|
index = (self.count[1] >> 3) & 0x3FL
|
||||||
|
|
||||||
|
# Update number of bits.
|
||||||
|
self.count[1] = self.count[1] + (leninBuf << 3)
|
||||||
|
if self.count[1] < (leninBuf << 3):
|
||||||
|
self.count[0] = self.count[0] + 1
|
||||||
|
self.count[0] = self.count[0] + (leninBuf >> 29)
|
||||||
|
|
||||||
|
partLen = 64 - index
|
||||||
|
|
||||||
|
if leninBuf >= partLen:
|
||||||
|
self.input[index:] = list(inBuf[:partLen])
|
||||||
|
self._transform(_bytelist2longBigEndian(self.input))
|
||||||
|
i = partLen
|
||||||
|
while i + 63 < leninBuf:
|
||||||
|
self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
|
||||||
|
i = i + 64
|
||||||
|
else:
|
||||||
|
self.input = list(inBuf[i:leninBuf])
|
||||||
|
else:
|
||||||
|
i = 0
|
||||||
|
self.input = self.input + list(inBuf)
|
||||||
|
|
||||||
|
|
||||||
|
def digest(self):
|
||||||
|
"""Terminate the message-digest computation and return digest.
|
||||||
|
|
||||||
|
Return the digest of the strings passed to the update()
|
||||||
|
method so far. This is a 16-byte string which may contain
|
||||||
|
non-ASCII characters, including null bytes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
H0 = self.H0
|
||||||
|
H1 = self.H1
|
||||||
|
H2 = self.H2
|
||||||
|
H3 = self.H3
|
||||||
|
H4 = self.H4
|
||||||
|
input = [] + self.input
|
||||||
|
count = [] + self.count
|
||||||
|
|
||||||
|
index = (self.count[1] >> 3) & 0x3fL
|
||||||
|
|
||||||
|
if index < 56:
|
||||||
|
padLen = 56 - index
|
||||||
|
else:
|
||||||
|
padLen = 120 - index
|
||||||
|
|
||||||
|
padding = ['\200'] + ['\000'] * 63
|
||||||
|
self.update(padding[:padLen])
|
||||||
|
|
||||||
|
# Append length (before padding).
|
||||||
|
bits = _bytelist2longBigEndian(self.input[:56]) + count
|
||||||
|
|
||||||
|
self._transform(bits)
|
||||||
|
|
||||||
|
# Store state in digest.
|
||||||
|
digest = _long2bytesBigEndian(self.H0, 4) + \
|
||||||
|
_long2bytesBigEndian(self.H1, 4) + \
|
||||||
|
_long2bytesBigEndian(self.H2, 4) + \
|
||||||
|
_long2bytesBigEndian(self.H3, 4) + \
|
||||||
|
_long2bytesBigEndian(self.H4, 4)
|
||||||
|
|
||||||
|
self.H0 = H0
|
||||||
|
self.H1 = H1
|
||||||
|
self.H2 = H2
|
||||||
|
self.H3 = H3
|
||||||
|
self.H4 = H4
|
||||||
|
self.input = input
|
||||||
|
self.count = count
|
||||||
|
|
||||||
|
return digest
|
||||||
|
|
||||||
|
|
||||||
|
def hexdigest(self):
|
||||||
|
"""Terminate and return digest in HEX form.
|
||||||
|
|
||||||
|
Like digest() except the digest is returned as a string of
|
||||||
|
length 32, containing only hexadecimal digits. This may be
|
||||||
|
used to exchange the value safely in email or other non-
|
||||||
|
binary environments.
|
||||||
|
"""
|
||||||
|
return ''.join(['%02x' % ord(c) for c in self.digest()])
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
"""Return a clone object.
|
||||||
|
|
||||||
|
Return a copy ('clone') of the md5 object. This can be used
|
||||||
|
to efficiently compute the digests of strings that share
|
||||||
|
a common initial substring.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return copy.deepcopy(self)
|
||||||
|
|
||||||
|
|
||||||
|
# ======================================================================
|
||||||
|
# Mimic Python top-level functions from standard library API
|
||||||
|
# for consistency with the md5 module of the standard library.
|
||||||
|
# ======================================================================
|
||||||
|
|
||||||
|
# These are mandatory variables in the module. They have constant values
|
||||||
|
# in the SHA standard.
|
||||||
|
|
||||||
|
digest_size = digestsize = 20
|
||||||
|
blocksize = 1
|
||||||
|
|
||||||
|
def new(arg=None):
|
||||||
|
"""Return a new sha crypto object.
|
||||||
|
|
||||||
|
If arg is present, the method call update(arg) is made.
|
||||||
|
"""
|
||||||
|
|
||||||
|
crypto = sha()
|
||||||
|
if arg:
|
||||||
|
crypto.update(arg)
|
||||||
|
|
||||||
|
return crypto
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
def main():
|
||||||
|
import sys
|
||||||
|
file = None
|
||||||
|
if len(sys.argv) > 2:
|
||||||
|
print "usage: %s [FILE]" % sys.argv[0]
|
||||||
|
return
|
||||||
|
elif len(sys.argv) < 2:
|
||||||
|
file = sys.stdin
|
||||||
|
else:
|
||||||
|
file = open(sys.argv[1], 'rb')
|
||||||
|
context = new()
|
||||||
|
data = file.read(16384)
|
||||||
|
while data:
|
||||||
|
context.update(data)
|
||||||
|
data = file.read(16384)
|
||||||
|
file.close()
|
||||||
|
digest = context.hexdigest().upper()
|
||||||
|
for i in xrange(0, 40, 8):
|
||||||
|
print digest[i:i+8],
|
||||||
|
print
|
||||||
|
main()
|
@ -5,6 +5,7 @@ Support for reading the metadata from a lit file.
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import sys, struct, cStringIO, os
|
import sys, struct, cStringIO, os
|
||||||
|
import functools
|
||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
|
|
||||||
from calibre import relpath
|
from calibre import relpath
|
||||||
@ -13,6 +14,31 @@ from calibre.ebooks.metadata.opf import OPFReader
|
|||||||
from calibre.ebooks.lit import LitError
|
from calibre.ebooks.lit import LitError
|
||||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||||
|
|
||||||
|
OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<!DOCTYPE package
|
||||||
|
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
|
||||||
|
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
|
||||||
|
"""
|
||||||
|
XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<!DOCTYPE html PUBLIC
|
||||||
|
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
|
||||||
|
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
|
||||||
|
"""
|
||||||
|
|
||||||
|
class DirectoryEntry(object):
|
||||||
|
def __init__(self, name, section, offset, size):
|
||||||
|
self.name = name
|
||||||
|
self.section = section
|
||||||
|
self.offset = offset
|
||||||
|
self.size = size
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<DirectoryEntry name='%s' section='%d' offset='%d' size='%d'>" \
|
||||||
|
% (self.name, self.section, self.offset, self.size)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self)
|
||||||
|
|
||||||
def u32(bytes):
|
def u32(bytes):
|
||||||
return struct.unpack('<L', bytes[:4])[0]
|
return struct.unpack('<L', bytes[:4])[0]
|
||||||
|
|
||||||
@ -67,7 +93,7 @@ XML_ENTITIES = ['&', ''', '<', '>', '"']
|
|||||||
class UnBinary(object):
|
class UnBinary(object):
|
||||||
def __init__(self, bin, manifest, map=OPF_MAP):
|
def __init__(self, bin, manifest, map=OPF_MAP):
|
||||||
self.manifest = manifest
|
self.manifest = manifest
|
||||||
self.attr_map, self.tag_map, self.tag_to_attr_map = map
|
self.tag_map, self.attr_map, self.tag_to_attr_map = map
|
||||||
self.opf = map is OPF_MAP
|
self.opf = map is OPF_MAP
|
||||||
self.bin = bin
|
self.bin = bin
|
||||||
self.buf = cStringIO.StringIO()
|
self.buf = cStringIO.StringIO()
|
||||||
@ -104,7 +130,7 @@ class UnBinary(object):
|
|||||||
def binary_to_text(self, base=0, depth=0):
|
def binary_to_text(self, base=0, depth=0):
|
||||||
tag_name = current_map = None
|
tag_name = current_map = None
|
||||||
dynamic_tag = errors = 0
|
dynamic_tag = errors = 0
|
||||||
in_censorship = False
|
in_censorship = is_goingdown = False
|
||||||
state = 'text'
|
state = 'text'
|
||||||
index = base
|
index = base
|
||||||
flags = 0
|
flags = 0
|
||||||
@ -136,7 +162,7 @@ class UnBinary(object):
|
|||||||
tag = oc
|
tag = oc
|
||||||
self.buf.write('<')
|
self.buf.write('<')
|
||||||
if not (flags & FLAG_CLOSING):
|
if not (flags & FLAG_CLOSING):
|
||||||
is_goingdown = 1
|
is_goingdown = True
|
||||||
if tag == 0x8000:
|
if tag == 0x8000:
|
||||||
state = 'get custom length'
|
state = 'get custom length'
|
||||||
continue
|
continue
|
||||||
@ -167,7 +193,7 @@ class UnBinary(object):
|
|||||||
else:
|
else:
|
||||||
self.buf.write('>')
|
self.buf.write('>')
|
||||||
index = self.binary_to_text(base=index, depth=depth+1)
|
index = self.binary_to_text(base=index, depth=depth+1)
|
||||||
is_goingdown = 0
|
is_goingdown = False
|
||||||
if not tag_name:
|
if not tag_name:
|
||||||
raise LitError('Tag ends before it begins.')
|
raise LitError('Tag ends before it begins.')
|
||||||
self.buf.write('</'+tag_name+'>')
|
self.buf.write('</'+tag_name+'>')
|
||||||
@ -222,7 +248,7 @@ class UnBinary(object):
|
|||||||
if not in_censorship:
|
if not in_censorship:
|
||||||
self.buf.write(c)
|
self.buf.write(c)
|
||||||
count -= 1
|
count -= 1
|
||||||
elif count == 0:
|
if count == 0:
|
||||||
if not in_censorship:
|
if not in_censorship:
|
||||||
self.buf.write('"')
|
self.buf.write('"')
|
||||||
in_censorship = False
|
in_censorship = False
|
||||||
@ -268,7 +294,7 @@ class UnBinary(object):
|
|||||||
href += c
|
href += c
|
||||||
count -= 1
|
count -= 1
|
||||||
if count == 0:
|
if count == 0:
|
||||||
doc, m, frag = href.partition('#')
|
doc, m, frag = href[1:].partition('#')
|
||||||
path = self.item_path(doc)
|
path = self.item_path(doc)
|
||||||
if m and frag:
|
if m and frag:
|
||||||
path += m + frag
|
path += m + frag
|
||||||
@ -297,100 +323,74 @@ class ManifestItem(object):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return self.internal + u'->' + self.path
|
return self.internal + u'->' + self.path
|
||||||
|
|
||||||
|
def preserve(function):
|
||||||
|
def wrapper(self, *args, **kwargs):
|
||||||
|
opos = self._stream.tell()
|
||||||
|
try:
|
||||||
|
return function(self, *args, **kwargs)
|
||||||
|
finally:
|
||||||
|
self._stream.seek(opos)
|
||||||
|
functools.update_wrapper(wrapper, function)
|
||||||
|
return wrapper
|
||||||
|
|
||||||
class LitFile(object):
|
class LitFile(object):
|
||||||
PIECE_SIZE = 16
|
PIECE_SIZE = 16
|
||||||
|
|
||||||
def magic():
|
def magic():
|
||||||
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
val = None
|
self._stream.seek(0)
|
||||||
opos = self._stream.tell()
|
return self._stream.read(8)
|
||||||
try:
|
|
||||||
self._stream.seek(0)
|
|
||||||
val = self._stream.read(8)
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
return val
|
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
magic = magic()
|
magic = magic()
|
||||||
|
|
||||||
def version():
|
def version():
|
||||||
def fget(self):
|
def fget(self):
|
||||||
val = None
|
self._stream.seek(8)
|
||||||
opos = self._stream.tell()
|
return u32(self._stream.read(4))
|
||||||
try:
|
|
||||||
self._stream.seek(8)
|
|
||||||
val = u32(self._stream.read(4))
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
return val
|
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
version = version()
|
version = version()
|
||||||
|
|
||||||
def hdr_len():
|
def hdr_len():
|
||||||
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
val = None
|
self._stream.seek(12)
|
||||||
opos = self._stream.tell()
|
return int32(self._stream.read(4))
|
||||||
try:
|
|
||||||
self._stream.seek(12)
|
|
||||||
val = int32(self._stream.read(4))
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
return val
|
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
hdr_len = hdr_len()
|
hdr_len = hdr_len()
|
||||||
|
|
||||||
def num_pieces():
|
def num_pieces():
|
||||||
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
val = None
|
self._stream.seek(16)
|
||||||
opos = self._stream.tell()
|
return int32(self._stream.read(4))
|
||||||
try:
|
|
||||||
self._stream.seek(16)
|
|
||||||
val = int32(self._stream.read(4))
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
return val
|
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
num_pieces = num_pieces()
|
num_pieces = num_pieces()
|
||||||
|
|
||||||
def sec_hdr_len():
|
def sec_hdr_len():
|
||||||
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
val = None
|
self._stream.seek(20)
|
||||||
opos = self._stream.tell()
|
return int32(self._stream.read(4))
|
||||||
try:
|
|
||||||
self._stream.seek(20)
|
|
||||||
val = int32(self._stream.read(4))
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
return val
|
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
sec_hdr_len = sec_hdr_len()
|
sec_hdr_len = sec_hdr_len()
|
||||||
|
|
||||||
def guid():
|
def guid():
|
||||||
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
val = None
|
self._stream.seek(24)
|
||||||
opos = self._stream.tell()
|
return self._stream.read(16)
|
||||||
try:
|
|
||||||
self._stream.seek(24)
|
|
||||||
val = self._stream.read(16)
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
return val
|
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
guid = guid()
|
guid = guid()
|
||||||
|
|
||||||
def header():
|
def header():
|
||||||
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
val = None
|
size = self.hdr_len \
|
||||||
opos = self._stream.tell()
|
+ (self.num_pieces * self.PIECE_SIZE) \
|
||||||
try:
|
+ self.sec_hdr_len
|
||||||
size = self.hdr_len \
|
self._stream.seek(0)
|
||||||
+ (self.num_pieces * self.PIECE_SIZE) \
|
return self._stream.read(size)
|
||||||
+ self.sec_hdr_len
|
|
||||||
self._stream.seek(0)
|
|
||||||
val = self._stream.read(size)
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
return val
|
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
header = header()
|
header = header()
|
||||||
|
|
||||||
@ -402,70 +402,64 @@ class LitFile(object):
|
|||||||
raise LitError('Unknown LIT version %d'%(self.version,))
|
raise LitError('Unknown LIT version %d'%(self.version,))
|
||||||
self.read_secondary_header()
|
self.read_secondary_header()
|
||||||
self.read_header_pieces()
|
self.read_header_pieces()
|
||||||
|
|
||||||
def read_secondary_header(self):
|
|
||||||
opos = self._stream.tell()
|
|
||||||
try:
|
|
||||||
self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
|
|
||||||
bytes = self._stream.read(self.sec_hdr_len)
|
|
||||||
offset = int32(bytes[4:])
|
|
||||||
while offset < len(bytes):
|
|
||||||
blocktype = bytes[offset:offset+4]
|
|
||||||
blockver = u32(bytes[offset+4:])
|
|
||||||
if blocktype == 'CAOL':
|
|
||||||
if blockver != 2:
|
|
||||||
raise LitError(
|
|
||||||
'Unknown CAOL block format %d' % blockver)
|
|
||||||
self.creator_id = u32(bytes[offset+12:])
|
|
||||||
self.entry_chunklen = u32(bytes[offset+20:])
|
|
||||||
self.count_chunklen = u32(bytes[offset+24:])
|
|
||||||
self.entry_unknown = u32(bytes[offset+28:])
|
|
||||||
self.count_unknown = u32(bytes[offset+32:])
|
|
||||||
offset += 48
|
|
||||||
elif blocktype == 'ITSF':
|
|
||||||
if blockver != 4:
|
|
||||||
raise LitError(
|
|
||||||
'Unknown ITSF block format %d' % blockver)
|
|
||||||
if u32(bytes[offset+4+16:]):
|
|
||||||
raise LitError('This file has a 64bit content offset')
|
|
||||||
self.content_offset = u32(bytes[offset+16:])
|
|
||||||
self.timestamp = u32(bytes[offset+24:])
|
|
||||||
self.language_id = u32(bytes[offset+28:])
|
|
||||||
offset += 48
|
|
||||||
if not hasattr(self, 'content_offset'):
|
|
||||||
raise LitError('Could not figure out the content offset')
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
|
|
||||||
|
@preserve
|
||||||
|
def read_secondary_header(self):
|
||||||
|
self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
|
||||||
|
bytes = self._stream.read(self.sec_hdr_len)
|
||||||
|
offset = int32(bytes[4:])
|
||||||
|
while offset < len(bytes):
|
||||||
|
blocktype = bytes[offset:offset+4]
|
||||||
|
blockver = u32(bytes[offset+4:])
|
||||||
|
if blocktype == 'CAOL':
|
||||||
|
if blockver != 2:
|
||||||
|
raise LitError(
|
||||||
|
'Unknown CAOL block format %d' % blockver)
|
||||||
|
self.creator_id = u32(bytes[offset+12:])
|
||||||
|
self.entry_chunklen = u32(bytes[offset+20:])
|
||||||
|
self.count_chunklen = u32(bytes[offset+24:])
|
||||||
|
self.entry_unknown = u32(bytes[offset+28:])
|
||||||
|
self.count_unknown = u32(bytes[offset+32:])
|
||||||
|
offset += 48
|
||||||
|
elif blocktype == 'ITSF':
|
||||||
|
if blockver != 4:
|
||||||
|
raise LitError(
|
||||||
|
'Unknown ITSF block format %d' % blockver)
|
||||||
|
if u32(bytes[offset+4+16:]):
|
||||||
|
raise LitError('This file has a 64bit content offset')
|
||||||
|
self.content_offset = u32(bytes[offset+16:])
|
||||||
|
self.timestamp = u32(bytes[offset+24:])
|
||||||
|
self.language_id = u32(bytes[offset+28:])
|
||||||
|
offset += 48
|
||||||
|
if not hasattr(self, 'content_offset'):
|
||||||
|
raise LitError('Could not figure out the content offset')
|
||||||
|
|
||||||
|
@preserve
|
||||||
def read_header_pieces(self):
|
def read_header_pieces(self):
|
||||||
opos = self._stream.tell()
|
src = self.header[self.hdr_len:]
|
||||||
try:
|
for i in range(self.num_pieces):
|
||||||
src = self.header[self.hdr_len:]
|
piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
|
||||||
for i in range(self.num_pieces):
|
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
|
||||||
piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
|
raise LitError('Piece %s has 64bit value' % repr(piece))
|
||||||
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
|
offset, size = u32(piece), int32(piece[8:])
|
||||||
raise LitError('Piece %s has 64bit value' % repr(piece))
|
self._stream.seek(offset)
|
||||||
offset, size = u32(piece), int32(piece[8:])
|
piece = self._stream.read(size)
|
||||||
self._stream.seek(offset)
|
if i == 0:
|
||||||
piece = self._stream.read(size)
|
continue # Dont need this piece
|
||||||
if i == 0:
|
elif i == 1:
|
||||||
continue # Dont need this piece
|
if u32(piece[8:]) != self.entry_chunklen or \
|
||||||
elif i == 1:
|
u32(piece[12:]) != self.entry_unknown:
|
||||||
if u32(piece[8:]) != self.entry_chunklen or \
|
raise LitError('Secondary header does not match piece')
|
||||||
u32(piece[12:]) != self.entry_unknown:
|
self.read_directory(piece)
|
||||||
raise LitError('Secondary header does not match piece')
|
elif i == 2:
|
||||||
self.read_directory(piece)
|
if u32(piece[8:]) != self.count_chunklen or \
|
||||||
elif i == 2:
|
u32(piece[12:]) != self.count_unknown:
|
||||||
if u32(piece[8:]) != self.count_chunklen or \
|
raise LitError('Secondary header does not match piece')
|
||||||
u32(piece[12:]) != self.count_unknown:
|
continue # No data needed from this piece
|
||||||
raise LitError('Secondary header does not match piece')
|
elif i == 3:
|
||||||
continue # No data needed from this piece
|
self.piece3_guid = piece
|
||||||
elif i == 3:
|
elif i == 4:
|
||||||
self.piece3_guid = piece
|
self.piece4_guid = piece
|
||||||
elif i == 4:
|
|
||||||
self.piece4_guid = piece
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
|
|
||||||
def read_directory(self, piece):
|
def read_directory(self, piece):
|
||||||
self.entries = []
|
self.entries = []
|
||||||
@ -521,108 +515,88 @@ class LitFile(object):
|
|||||||
|
|
||||||
if not hasattr(self, 'manifest'):
|
if not hasattr(self, 'manifest'):
|
||||||
raise LitError('Lit file does not have a valid manifest')
|
raise LitError('Lit file does not have a valid manifest')
|
||||||
|
|
||||||
def read_section_names(self, entry):
|
|
||||||
opos = self._stream.tell()
|
|
||||||
try:
|
|
||||||
self._stream.seek(self.content_offset + entry.offset)
|
|
||||||
raw = self._stream.read(entry.size)
|
|
||||||
if len(raw) < 4:
|
|
||||||
raise LitError('Invalid Namelist section')
|
|
||||||
pos = 4
|
|
||||||
self.num_sections = u16(raw[2:pos])
|
|
||||||
|
|
||||||
self.sections = {}
|
|
||||||
for section in range(self.num_sections):
|
|
||||||
size = u16(raw[pos:pos+2])
|
|
||||||
pos += 2
|
|
||||||
size = size*2 + 2
|
|
||||||
if pos + size > len(raw):
|
|
||||||
raise LitError('Invalid Namelist section')
|
|
||||||
self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
|
|
||||||
pos += size
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
|
|
||||||
def read_manifest(self, entry):
|
|
||||||
opos = self._stream.tell()
|
|
||||||
try:
|
|
||||||
self.manifest = []
|
|
||||||
self._stream.seek(self.content_offset + entry.offset)
|
|
||||||
raw = self._stream.read(entry.size)
|
|
||||||
pos = 0
|
|
||||||
while pos < len(raw):
|
|
||||||
size = ord(raw[pos])
|
|
||||||
if size == 0: break
|
|
||||||
pos += 1
|
|
||||||
root = raw[pos:pos+size].decode('utf8')
|
|
||||||
pos += size
|
|
||||||
if pos >= len(raw):
|
|
||||||
raise LitError('Truncated manifest.')
|
|
||||||
for state in ['spine', 'not spine', 'css', 'images']:
|
|
||||||
num_files = int32(raw[pos:pos+4])
|
|
||||||
pos += 4
|
|
||||||
if num_files == 0: continue
|
|
||||||
|
|
||||||
i = 0
|
|
||||||
while i < num_files:
|
|
||||||
if pos+5 >= len(raw):
|
|
||||||
raise LitError('Truncated manifest.')
|
|
||||||
offset = u32(raw[pos:pos+4])
|
|
||||||
pos += 4
|
|
||||||
|
|
||||||
slen = ord(raw[pos])
|
|
||||||
pos += 1
|
|
||||||
internal = raw[pos:pos+slen].decode('utf8')
|
|
||||||
pos += slen
|
|
||||||
|
|
||||||
slen = ord(raw[pos])
|
|
||||||
pos += 1
|
|
||||||
original = raw[pos:pos+slen].decode('utf8')
|
|
||||||
pos += slen
|
|
||||||
|
|
||||||
slen = ord(raw[pos])
|
|
||||||
pos += 1
|
|
||||||
mime_type = raw[pos:pos+slen].decode('utf8')
|
|
||||||
pos += slen + 1
|
|
||||||
|
|
||||||
self.manifest.append(
|
|
||||||
ManifestItem(original, internal, mime_type,
|
|
||||||
offset, root, state))
|
|
||||||
i += 1
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
|
|
||||||
def read_meta(self, entry):
|
|
||||||
opos = self._stream.tell()
|
|
||||||
try:
|
|
||||||
self._stream.seek(self.content_offset + entry.offset)
|
|
||||||
raw = self._stream.read(entry.size)
|
|
||||||
|
|
||||||
xml = \
|
@preserve
|
||||||
'''\
|
def read_section_names(self, entry):
|
||||||
<?xml version="1.0" encoding="UTF-8" ?>
|
self._stream.seek(self.content_offset + entry.offset)
|
||||||
<!DOCTYPE package
|
raw = self._stream.read(entry.size)
|
||||||
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
|
if len(raw) < 4:
|
||||||
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
|
raise LitError('Invalid Namelist section')
|
||||||
'''+\
|
pos = 4
|
||||||
unicode(UnBinary(raw, self.manifest))
|
self.num_sections = u16(raw[2:pos])
|
||||||
self.meta = xml
|
|
||||||
finally:
|
self.sections = {}
|
||||||
self._stream.seek(opos)
|
for section in range(self.num_sections):
|
||||||
|
size = u16(raw[pos:pos+2])
|
||||||
|
pos += 2
|
||||||
|
size = size*2 + 2
|
||||||
|
if pos + size > len(raw):
|
||||||
|
raise LitError('Invalid Namelist section')
|
||||||
|
self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
|
||||||
|
pos += size
|
||||||
|
|
||||||
|
@preserve
|
||||||
|
def read_manifest(self, entry):
|
||||||
|
self.manifest = []
|
||||||
|
self._stream.seek(self.content_offset + entry.offset)
|
||||||
|
raw = self._stream.read(entry.size)
|
||||||
|
pos = 0
|
||||||
|
while pos < len(raw):
|
||||||
|
size = ord(raw[pos])
|
||||||
|
if size == 0: break
|
||||||
|
pos += 1
|
||||||
|
root = raw[pos:pos+size].decode('utf8')
|
||||||
|
pos += size
|
||||||
|
if pos >= len(raw):
|
||||||
|
raise LitError('Truncated manifest.')
|
||||||
|
for state in ['spine', 'not spine', 'css', 'images']:
|
||||||
|
num_files = int32(raw[pos:pos+4])
|
||||||
|
pos += 4
|
||||||
|
if num_files == 0: continue
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
while i < num_files:
|
||||||
|
if pos+5 >= len(raw):
|
||||||
|
raise LitError('Truncated manifest.')
|
||||||
|
offset = u32(raw[pos:pos+4])
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
slen = ord(raw[pos])
|
||||||
|
pos += 1
|
||||||
|
internal = raw[pos:pos+slen].decode('utf8')
|
||||||
|
pos += slen
|
||||||
|
|
||||||
|
slen = ord(raw[pos])
|
||||||
|
pos += 1
|
||||||
|
original = raw[pos:pos+slen].decode('utf8')
|
||||||
|
pos += slen
|
||||||
|
|
||||||
|
slen = ord(raw[pos])
|
||||||
|
pos += 1
|
||||||
|
mime_type = raw[pos:pos+slen].decode('utf8')
|
||||||
|
pos += slen + 1
|
||||||
|
|
||||||
|
self.manifest.append(
|
||||||
|
ManifestItem(original, internal, mime_type,
|
||||||
|
offset, root, state))
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
@preserve
|
||||||
|
def read_meta(self, entry):
|
||||||
|
self._stream.seek(self.content_offset + entry.offset)
|
||||||
|
raw = self._stream.read(entry.size)
|
||||||
|
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
|
||||||
|
self.meta = xml
|
||||||
|
|
||||||
|
@preserve
|
||||||
def read_image(self, internal_name):
|
def read_image(self, internal_name):
|
||||||
cover_entry = None
|
cover_entry = None
|
||||||
for entry in self.entries:
|
for entry in self.entries:
|
||||||
if internal_name in entry.name:
|
if internal_name in entry.name:
|
||||||
cover_entry = entry
|
cover_entry = entry
|
||||||
break
|
break
|
||||||
opos = self._stream.tell()
|
self._stream.seek(self.content_offset + cover_entry.offset)
|
||||||
try:
|
return self._stream.read(cover_entry.size)
|
||||||
self._stream.seek(self.content_offset + cover_entry.offset)
|
|
||||||
return self._stream.read(cover_entry.size)
|
|
||||||
finally:
|
|
||||||
self._stream.seek(opos)
|
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
try:
|
try:
|
||||||
|
@ -237,7 +237,7 @@ class OPF(MetaInformation):
|
|||||||
|
|
||||||
def get_title(self):
|
def get_title(self):
|
||||||
title = self.soup.package.metadata.find('dc:title')
|
title = self.soup.package.metadata.find('dc:title')
|
||||||
if title and title.string:
|
if title:
|
||||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip()
|
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip()
|
||||||
return self.default_title.strip()
|
return self.default_title.strip()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user