mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Re-integrate changes from home
This commit is contained in:
commit
76b3759947
@ -1,5 +1,2 @@
|
||||
import calibre.ebooks.maps.opf as opf
|
||||
import calibre.ebooks.maps.html as html
|
||||
|
||||
OPF_MAP = opf.MAP
|
||||
HTML_MAP = html.MAP
|
||||
from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
|
||||
from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,28 +1,3 @@
|
||||
ATTRS = {
|
||||
0x0001 => "href",
|
||||
0x0002 => "%never-used",
|
||||
0x0003 => "%guid",
|
||||
0x0004 => "%minimum_level",
|
||||
0x0005 => "%attr5",
|
||||
0x0006 => "id",
|
||||
0x0007 => "href",
|
||||
0x0008 => "media-type",
|
||||
0x0009 => "fallback",
|
||||
0x000A => "idref",
|
||||
0x000B => "xmlns:dc",
|
||||
0x000C => "xmlns:oebpackage",
|
||||
0x000D => "role",
|
||||
0x000E => "file-as",
|
||||
0x000F => "event",
|
||||
0x0010 => "scheme",
|
||||
0x0011 => "title",
|
||||
0x0012 => "type",
|
||||
0x0013 => "unique-identifier",
|
||||
0x0014 => "name",
|
||||
0x0015 => "content",
|
||||
0x0016 => "xml:lang",
|
||||
}
|
||||
|
||||
TAGS = [
|
||||
None,
|
||||
"package",
|
||||
@ -69,6 +44,31 @@ TAGS = [
|
||||
None,
|
||||
]
|
||||
|
||||
TAGS_ATTR = [{} for i in xrange(43)]
|
||||
ATTRS = {
|
||||
0x0001: "href",
|
||||
0x0002: "%never-used",
|
||||
0x0003: "%guid",
|
||||
0x0004: "%minimum_level",
|
||||
0x0005: "%attr5",
|
||||
0x0006: "id",
|
||||
0x0007: "href",
|
||||
0x0008: "media-type",
|
||||
0x0009: "fallback",
|
||||
0x000A: "idref",
|
||||
0x000B: "xmlns:dc",
|
||||
0x000C: "xmlns:oebpackage",
|
||||
0x000D: "role",
|
||||
0x000E: "file-as",
|
||||
0x000F: "event",
|
||||
0x0010: "scheme",
|
||||
0x0011: "title",
|
||||
0x0012: "type",
|
||||
0x0013: "unique-identifier",
|
||||
0x0014: "name",
|
||||
0x0015: "content",
|
||||
0x0016: "xml:lang",
|
||||
}
|
||||
|
||||
MAP = (TAGS, TAGS_ATTRS, ATTRS0)
|
||||
TAGS_ATTRS = [{} for i in xrange(43)]
|
||||
|
||||
MAP = (TAGS, ATTRS, TAGS_ATTRS)
|
||||
|
343
src/calibre/ebooks/lit/mssha1.py
Normal file
343
src/calibre/ebooks/lit/mssha1.py
Normal file
@ -0,0 +1,343 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: iso-8859-1
|
||||
|
||||
"""A sample implementation of SHA-1 in pure Python.
|
||||
|
||||
Framework adapted from Dinu Gherman's MD5 implementation by
|
||||
J. Hallén and L. Creighton. SHA-1 implementation based directly on
|
||||
the text of the NIST standard FIPS PUB 180-1.
|
||||
"""
|
||||
|
||||
|
||||
__date__ = '2004-11-17'
|
||||
__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy
|
||||
|
||||
|
||||
import struct, copy
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Bit-Manipulation helpers
|
||||
#
|
||||
# _long2bytes() was contributed by Barry Warsaw
|
||||
# and is reused here with tiny modifications.
|
||||
# ======================================================================
|
||||
|
||||
def _long2bytesBigEndian(n, blocksize=0):
|
||||
"""Convert a long integer to a byte string.
|
||||
|
||||
If optional blocksize is given and greater than zero, pad the front
|
||||
of the byte string with binary zeros so that the length is a multiple
|
||||
of blocksize.
|
||||
"""
|
||||
|
||||
# After much testing, this algorithm was deemed to be the fastest.
|
||||
s = ''
|
||||
pack = struct.pack
|
||||
while n > 0:
|
||||
s = pack('>I', n & 0xffffffffL) + s
|
||||
n = n >> 32
|
||||
|
||||
# Strip off leading zeros.
|
||||
for i in range(len(s)):
|
||||
if s[i] != '\000':
|
||||
break
|
||||
else:
|
||||
# Only happens when n == 0.
|
||||
s = '\000'
|
||||
i = 0
|
||||
|
||||
s = s[i:]
|
||||
|
||||
# Add back some pad bytes. This could be done more efficiently
|
||||
# w.r.t. the de-padding being done above, but sigh...
|
||||
if blocksize > 0 and len(s) % blocksize:
|
||||
s = (blocksize - len(s) % blocksize) * '\000' + s
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def _bytelist2longBigEndian(list):
|
||||
"Transform a list of characters into a list of longs."
|
||||
|
||||
imax = len(list)/4
|
||||
hl = [0L] * imax
|
||||
|
||||
j = 0
|
||||
i = 0
|
||||
while i < imax:
|
||||
b0 = long(ord(list[j])) << 24
|
||||
b1 = long(ord(list[j+1])) << 16
|
||||
b2 = long(ord(list[j+2])) << 8
|
||||
b3 = long(ord(list[j+3]))
|
||||
hl[i] = b0 | b1 | b2 | b3
|
||||
i = i+1
|
||||
j = j+4
|
||||
|
||||
return hl
|
||||
|
||||
|
||||
def _rotateLeft(x, n):
|
||||
"Rotate x (32 bit) left n bits circularly."
|
||||
|
||||
return (x << n) | (x >> (32-n))
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# The SHA transformation functions
|
||||
#
|
||||
# ======================================================================
|
||||
|
||||
def f0_19(B, C, D):
|
||||
return (B & (C ^ D)) ^ D
|
||||
|
||||
def f20_39(B, C, D):
|
||||
return B ^ C ^ D
|
||||
|
||||
def f40_59(B, C, D):
|
||||
return ((B | C) & D) | (B & C)
|
||||
|
||||
def f60_79(B, C, D):
|
||||
return B ^ C ^ D
|
||||
|
||||
def f6_42(B, C, D):
|
||||
return (B + C) ^ C
|
||||
|
||||
f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
|
||||
f[3] = f20_39
|
||||
f[6] = f6_42
|
||||
f[10] = f20_39
|
||||
f[15] = f20_39
|
||||
f[26] = f0_19
|
||||
f[31] = f40_59
|
||||
f[42] = f6_42
|
||||
f[51] = f20_39
|
||||
f[68] = f0_19
|
||||
|
||||
|
||||
# Constants to be used
|
||||
K = [
|
||||
0x5A827999L, # ( 0 <= t <= 19)
|
||||
0x6ED9EBA1L, # (20 <= t <= 39)
|
||||
0x8F1BBCDCL, # (40 <= t <= 59)
|
||||
0xCA62C1D6L # (60 <= t <= 79)
|
||||
]
|
||||
|
||||
class sha:
|
||||
"An implementation of the MD5 hash function in pure Python."
|
||||
|
||||
def __init__(self):
|
||||
"Initialisation."
|
||||
|
||||
# Initial message length in bits(!).
|
||||
self.length = 0L
|
||||
self.count = [0, 0]
|
||||
|
||||
# Initial empty message as a sequence of bytes (8 bit characters).
|
||||
self.input = []
|
||||
|
||||
# Call a separate init function, that can be used repeatedly
|
||||
# to start from scratch on the same object.
|
||||
self.init()
|
||||
|
||||
|
||||
def init(self):
|
||||
"Initialize the message-digest and set all fields to zero."
|
||||
|
||||
self.length = 0L
|
||||
self.input = []
|
||||
|
||||
# Initial 160 bit message digest (5 times 32 bit).
|
||||
self.H0 = 0x32107654L
|
||||
self.H1 = 0x23016745L
|
||||
self.H2 = 0xC4E680A2L
|
||||
self.H3 = 0xDC679823L
|
||||
self.H4 = 0xD0857A34L
|
||||
|
||||
def _transform(self, W):
|
||||
for t in range(16, 80):
|
||||
W.append(_rotateLeft(
|
||||
W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL)
|
||||
|
||||
A = self.H0
|
||||
B = self.H1
|
||||
C = self.H2
|
||||
D = self.H3
|
||||
E = self.H4
|
||||
|
||||
for t in xrange(0, 80):
|
||||
TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
|
||||
E = D
|
||||
D = C
|
||||
C = _rotateLeft(B, 30) & 0xffffffffL
|
||||
B = A
|
||||
A = TEMP & 0xffffffffL
|
||||
|
||||
self.H0 = (self.H0 + A) & 0xffffffffL
|
||||
self.H1 = (self.H1 + B) & 0xffffffffL
|
||||
self.H2 = (self.H2 + C) & 0xffffffffL
|
||||
self.H3 = (self.H3 + D) & 0xffffffffL
|
||||
self.H4 = (self.H4 + E) & 0xffffffffL
|
||||
|
||||
|
||||
# Down from here all methods follow the Python Standard Library
|
||||
# API of the sha module.
|
||||
|
||||
def update(self, inBuf):
|
||||
"""Add to the current message.
|
||||
|
||||
Update the sha object with the string arg. Repeated calls
|
||||
are equivalent to a single call with the concatenation of all
|
||||
the arguments, i.e. s.update(a); s.update(b) is equivalent
|
||||
to s.update(a+b).
|
||||
|
||||
The hash is immediately calculated for all full blocks. The final
|
||||
calculation is made in digest(). It will calculate 1-2 blocks,
|
||||
depending on how much padding we have to add. This allows us to
|
||||
keep an intermediate value for the hash, so that we only need to
|
||||
make minimal recalculation if we call update() to add more data
|
||||
to the hashed string.
|
||||
"""
|
||||
|
||||
leninBuf = long(len(inBuf))
|
||||
|
||||
# Compute number of bytes mod 64.
|
||||
index = (self.count[1] >> 3) & 0x3FL
|
||||
|
||||
# Update number of bits.
|
||||
self.count[1] = self.count[1] + (leninBuf << 3)
|
||||
if self.count[1] < (leninBuf << 3):
|
||||
self.count[0] = self.count[0] + 1
|
||||
self.count[0] = self.count[0] + (leninBuf >> 29)
|
||||
|
||||
partLen = 64 - index
|
||||
|
||||
if leninBuf >= partLen:
|
||||
self.input[index:] = list(inBuf[:partLen])
|
||||
self._transform(_bytelist2longBigEndian(self.input))
|
||||
i = partLen
|
||||
while i + 63 < leninBuf:
|
||||
self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
|
||||
i = i + 64
|
||||
else:
|
||||
self.input = list(inBuf[i:leninBuf])
|
||||
else:
|
||||
i = 0
|
||||
self.input = self.input + list(inBuf)
|
||||
|
||||
|
||||
def digest(self):
|
||||
"""Terminate the message-digest computation and return digest.
|
||||
|
||||
Return the digest of the strings passed to the update()
|
||||
method so far. This is a 16-byte string which may contain
|
||||
non-ASCII characters, including null bytes.
|
||||
"""
|
||||
|
||||
H0 = self.H0
|
||||
H1 = self.H1
|
||||
H2 = self.H2
|
||||
H3 = self.H3
|
||||
H4 = self.H4
|
||||
input = [] + self.input
|
||||
count = [] + self.count
|
||||
|
||||
index = (self.count[1] >> 3) & 0x3fL
|
||||
|
||||
if index < 56:
|
||||
padLen = 56 - index
|
||||
else:
|
||||
padLen = 120 - index
|
||||
|
||||
padding = ['\200'] + ['\000'] * 63
|
||||
self.update(padding[:padLen])
|
||||
|
||||
# Append length (before padding).
|
||||
bits = _bytelist2longBigEndian(self.input[:56]) + count
|
||||
|
||||
self._transform(bits)
|
||||
|
||||
# Store state in digest.
|
||||
digest = _long2bytesBigEndian(self.H0, 4) + \
|
||||
_long2bytesBigEndian(self.H1, 4) + \
|
||||
_long2bytesBigEndian(self.H2, 4) + \
|
||||
_long2bytesBigEndian(self.H3, 4) + \
|
||||
_long2bytesBigEndian(self.H4, 4)
|
||||
|
||||
self.H0 = H0
|
||||
self.H1 = H1
|
||||
self.H2 = H2
|
||||
self.H3 = H3
|
||||
self.H4 = H4
|
||||
self.input = input
|
||||
self.count = count
|
||||
|
||||
return digest
|
||||
|
||||
|
||||
def hexdigest(self):
|
||||
"""Terminate and return digest in HEX form.
|
||||
|
||||
Like digest() except the digest is returned as a string of
|
||||
length 32, containing only hexadecimal digits. This may be
|
||||
used to exchange the value safely in email or other non-
|
||||
binary environments.
|
||||
"""
|
||||
return ''.join(['%02x' % ord(c) for c in self.digest()])
|
||||
|
||||
def copy(self):
|
||||
"""Return a clone object.
|
||||
|
||||
Return a copy ('clone') of the md5 object. This can be used
|
||||
to efficiently compute the digests of strings that share
|
||||
a common initial substring.
|
||||
"""
|
||||
|
||||
return copy.deepcopy(self)
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Mimic Python top-level functions from standard library API
|
||||
# for consistency with the md5 module of the standard library.
|
||||
# ======================================================================
|
||||
|
||||
# These are mandatory variables in the module. They have constant values
|
||||
# in the SHA standard.
|
||||
|
||||
digest_size = digestsize = 20
|
||||
blocksize = 1
|
||||
|
||||
def new(arg=None):
|
||||
"""Return a new sha crypto object.
|
||||
|
||||
If arg is present, the method call update(arg) is made.
|
||||
"""
|
||||
|
||||
crypto = sha()
|
||||
if arg:
|
||||
crypto.update(arg)
|
||||
|
||||
return crypto
|
||||
|
||||
if __name__ == '__main__':
|
||||
def main():
|
||||
import sys
|
||||
file = None
|
||||
if len(sys.argv) > 2:
|
||||
print "usage: %s [FILE]" % sys.argv[0]
|
||||
return
|
||||
elif len(sys.argv) < 2:
|
||||
file = sys.stdin
|
||||
else:
|
||||
file = open(sys.argv[1], 'rb')
|
||||
context = new()
|
||||
data = file.read(16384)
|
||||
while data:
|
||||
context.update(data)
|
||||
data = file.read(16384)
|
||||
file.close()
|
||||
digest = context.hexdigest().upper()
|
||||
for i in xrange(0, 40, 8):
|
||||
print digest[i:i+8],
|
||||
print
|
||||
main()
|
@ -5,6 +5,7 @@ Support for reading the metadata from a lit file.
|
||||
'''
|
||||
|
||||
import sys, struct, cStringIO, os
|
||||
import functools
|
||||
from itertools import repeat
|
||||
|
||||
from calibre import relpath
|
||||
@ -13,6 +14,31 @@ from calibre.ebooks.metadata.opf import OPFReader
|
||||
from calibre.ebooks.lit import LitError
|
||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||
|
||||
OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE package
|
||||
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
|
||||
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
|
||||
"""
|
||||
XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE html PUBLIC
|
||||
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
|
||||
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
|
||||
"""
|
||||
|
||||
class DirectoryEntry(object):
|
||||
def __init__(self, name, section, offset, size):
|
||||
self.name = name
|
||||
self.section = section
|
||||
self.offset = offset
|
||||
self.size = size
|
||||
|
||||
def __repr__(self):
|
||||
return "<DirectoryEntry name='%s' section='%d' offset='%d' size='%d'>" \
|
||||
% (self.name, self.section, self.offset, self.size)
|
||||
|
||||
def __str__(self):
|
||||
return repr(self)
|
||||
|
||||
def u32(bytes):
|
||||
return struct.unpack('<L', bytes[:4])[0]
|
||||
|
||||
@ -67,7 +93,7 @@ XML_ENTITIES = ['&', ''', '<', '>', '"']
|
||||
class UnBinary(object):
|
||||
def __init__(self, bin, manifest, map=OPF_MAP):
|
||||
self.manifest = manifest
|
||||
self.attr_map, self.tag_map, self.tag_to_attr_map = map
|
||||
self.tag_map, self.attr_map, self.tag_to_attr_map = map
|
||||
self.opf = map is OPF_MAP
|
||||
self.bin = bin
|
||||
self.buf = cStringIO.StringIO()
|
||||
@ -104,7 +130,7 @@ class UnBinary(object):
|
||||
def binary_to_text(self, base=0, depth=0):
|
||||
tag_name = current_map = None
|
||||
dynamic_tag = errors = 0
|
||||
in_censorship = False
|
||||
in_censorship = is_goingdown = False
|
||||
state = 'text'
|
||||
index = base
|
||||
flags = 0
|
||||
@ -136,7 +162,7 @@ class UnBinary(object):
|
||||
tag = oc
|
||||
self.buf.write('<')
|
||||
if not (flags & FLAG_CLOSING):
|
||||
is_goingdown = 1
|
||||
is_goingdown = True
|
||||
if tag == 0x8000:
|
||||
state = 'get custom length'
|
||||
continue
|
||||
@ -167,7 +193,7 @@ class UnBinary(object):
|
||||
else:
|
||||
self.buf.write('>')
|
||||
index = self.binary_to_text(base=index, depth=depth+1)
|
||||
is_goingdown = 0
|
||||
is_goingdown = False
|
||||
if not tag_name:
|
||||
raise LitError('Tag ends before it begins.')
|
||||
self.buf.write('</'+tag_name+'>')
|
||||
@ -222,7 +248,7 @@ class UnBinary(object):
|
||||
if not in_censorship:
|
||||
self.buf.write(c)
|
||||
count -= 1
|
||||
elif count == 0:
|
||||
if count == 0:
|
||||
if not in_censorship:
|
||||
self.buf.write('"')
|
||||
in_censorship = False
|
||||
@ -268,7 +294,7 @@ class UnBinary(object):
|
||||
href += c
|
||||
count -= 1
|
||||
if count == 0:
|
||||
doc, m, frag = href.partition('#')
|
||||
doc, m, frag = href[1:].partition('#')
|
||||
path = self.item_path(doc)
|
||||
if m and frag:
|
||||
path += m + frag
|
||||
@ -297,100 +323,74 @@ class ManifestItem(object):
|
||||
def __repr__(self):
|
||||
return self.internal + u'->' + self.path
|
||||
|
||||
def preserve(function):
|
||||
def wrapper(self, *args, **kwargs):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
return function(self, *args, **kwargs)
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
functools.update_wrapper(wrapper, function)
|
||||
return wrapper
|
||||
|
||||
class LitFile(object):
|
||||
PIECE_SIZE = 16
|
||||
|
||||
def magic():
|
||||
@preserve
|
||||
def fget(self):
|
||||
val = None
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(0)
|
||||
val = self._stream.read(8)
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
self._stream.seek(0)
|
||||
return self._stream.read(8)
|
||||
return property(fget=fget)
|
||||
magic = magic()
|
||||
|
||||
def version():
|
||||
def fget(self):
|
||||
val = None
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(8)
|
||||
val = u32(self._stream.read(4))
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
self._stream.seek(8)
|
||||
return u32(self._stream.read(4))
|
||||
return property(fget=fget)
|
||||
version = version()
|
||||
|
||||
def hdr_len():
|
||||
@preserve
|
||||
def fget(self):
|
||||
val = None
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(12)
|
||||
val = int32(self._stream.read(4))
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
self._stream.seek(12)
|
||||
return int32(self._stream.read(4))
|
||||
return property(fget=fget)
|
||||
hdr_len = hdr_len()
|
||||
|
||||
def num_pieces():
|
||||
@preserve
|
||||
def fget(self):
|
||||
val = None
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(16)
|
||||
val = int32(self._stream.read(4))
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
self._stream.seek(16)
|
||||
return int32(self._stream.read(4))
|
||||
return property(fget=fget)
|
||||
num_pieces = num_pieces()
|
||||
|
||||
def sec_hdr_len():
|
||||
@preserve
|
||||
def fget(self):
|
||||
val = None
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(20)
|
||||
val = int32(self._stream.read(4))
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
self._stream.seek(20)
|
||||
return int32(self._stream.read(4))
|
||||
return property(fget=fget)
|
||||
sec_hdr_len = sec_hdr_len()
|
||||
|
||||
def guid():
|
||||
@preserve
|
||||
def fget(self):
|
||||
val = None
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(24)
|
||||
val = self._stream.read(16)
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
self._stream.seek(24)
|
||||
return self._stream.read(16)
|
||||
return property(fget=fget)
|
||||
guid = guid()
|
||||
|
||||
def header():
|
||||
@preserve
|
||||
def fget(self):
|
||||
val = None
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
size = self.hdr_len \
|
||||
+ (self.num_pieces * self.PIECE_SIZE) \
|
||||
+ self.sec_hdr_len
|
||||
self._stream.seek(0)
|
||||
val = self._stream.read(size)
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
return val
|
||||
size = self.hdr_len \
|
||||
+ (self.num_pieces * self.PIECE_SIZE) \
|
||||
+ self.sec_hdr_len
|
||||
self._stream.seek(0)
|
||||
return self._stream.read(size)
|
||||
return property(fget=fget)
|
||||
header = header()
|
||||
|
||||
@ -403,69 +403,63 @@ class LitFile(object):
|
||||
self.read_secondary_header()
|
||||
self.read_header_pieces()
|
||||
|
||||
@preserve
|
||||
def read_secondary_header(self):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
|
||||
bytes = self._stream.read(self.sec_hdr_len)
|
||||
offset = int32(bytes[4:])
|
||||
while offset < len(bytes):
|
||||
blocktype = bytes[offset:offset+4]
|
||||
blockver = u32(bytes[offset+4:])
|
||||
if blocktype == 'CAOL':
|
||||
if blockver != 2:
|
||||
raise LitError(
|
||||
'Unknown CAOL block format %d' % blockver)
|
||||
self.creator_id = u32(bytes[offset+12:])
|
||||
self.entry_chunklen = u32(bytes[offset+20:])
|
||||
self.count_chunklen = u32(bytes[offset+24:])
|
||||
self.entry_unknown = u32(bytes[offset+28:])
|
||||
self.count_unknown = u32(bytes[offset+32:])
|
||||
offset += 48
|
||||
elif blocktype == 'ITSF':
|
||||
if blockver != 4:
|
||||
raise LitError(
|
||||
'Unknown ITSF block format %d' % blockver)
|
||||
if u32(bytes[offset+4+16:]):
|
||||
raise LitError('This file has a 64bit content offset')
|
||||
self.content_offset = u32(bytes[offset+16:])
|
||||
self.timestamp = u32(bytes[offset+24:])
|
||||
self.language_id = u32(bytes[offset+28:])
|
||||
offset += 48
|
||||
if not hasattr(self, 'content_offset'):
|
||||
raise LitError('Could not figure out the content offset')
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
|
||||
bytes = self._stream.read(self.sec_hdr_len)
|
||||
offset = int32(bytes[4:])
|
||||
while offset < len(bytes):
|
||||
blocktype = bytes[offset:offset+4]
|
||||
blockver = u32(bytes[offset+4:])
|
||||
if blocktype == 'CAOL':
|
||||
if blockver != 2:
|
||||
raise LitError(
|
||||
'Unknown CAOL block format %d' % blockver)
|
||||
self.creator_id = u32(bytes[offset+12:])
|
||||
self.entry_chunklen = u32(bytes[offset+20:])
|
||||
self.count_chunklen = u32(bytes[offset+24:])
|
||||
self.entry_unknown = u32(bytes[offset+28:])
|
||||
self.count_unknown = u32(bytes[offset+32:])
|
||||
offset += 48
|
||||
elif blocktype == 'ITSF':
|
||||
if blockver != 4:
|
||||
raise LitError(
|
||||
'Unknown ITSF block format %d' % blockver)
|
||||
if u32(bytes[offset+4+16:]):
|
||||
raise LitError('This file has a 64bit content offset')
|
||||
self.content_offset = u32(bytes[offset+16:])
|
||||
self.timestamp = u32(bytes[offset+24:])
|
||||
self.language_id = u32(bytes[offset+28:])
|
||||
offset += 48
|
||||
if not hasattr(self, 'content_offset'):
|
||||
raise LitError('Could not figure out the content offset')
|
||||
|
||||
@preserve
|
||||
def read_header_pieces(self):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
src = self.header[self.hdr_len:]
|
||||
for i in range(self.num_pieces):
|
||||
piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
|
||||
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
|
||||
raise LitError('Piece %s has 64bit value' % repr(piece))
|
||||
offset, size = u32(piece), int32(piece[8:])
|
||||
self._stream.seek(offset)
|
||||
piece = self._stream.read(size)
|
||||
if i == 0:
|
||||
continue # Dont need this piece
|
||||
elif i == 1:
|
||||
if u32(piece[8:]) != self.entry_chunklen or \
|
||||
u32(piece[12:]) != self.entry_unknown:
|
||||
raise LitError('Secondary header does not match piece')
|
||||
self.read_directory(piece)
|
||||
elif i == 2:
|
||||
if u32(piece[8:]) != self.count_chunklen or \
|
||||
u32(piece[12:]) != self.count_unknown:
|
||||
raise LitError('Secondary header does not match piece')
|
||||
continue # No data needed from this piece
|
||||
elif i == 3:
|
||||
self.piece3_guid = piece
|
||||
elif i == 4:
|
||||
self.piece4_guid = piece
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
src = self.header[self.hdr_len:]
|
||||
for i in range(self.num_pieces):
|
||||
piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
|
||||
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
|
||||
raise LitError('Piece %s has 64bit value' % repr(piece))
|
||||
offset, size = u32(piece), int32(piece[8:])
|
||||
self._stream.seek(offset)
|
||||
piece = self._stream.read(size)
|
||||
if i == 0:
|
||||
continue # Dont need this piece
|
||||
elif i == 1:
|
||||
if u32(piece[8:]) != self.entry_chunklen or \
|
||||
u32(piece[12:]) != self.entry_unknown:
|
||||
raise LitError('Secondary header does not match piece')
|
||||
self.read_directory(piece)
|
||||
elif i == 2:
|
||||
if u32(piece[8:]) != self.count_chunklen or \
|
||||
u32(piece[12:]) != self.count_unknown:
|
||||
raise LitError('Secondary header does not match piece')
|
||||
continue # No data needed from this piece
|
||||
elif i == 3:
|
||||
self.piece3_guid = piece
|
||||
elif i == 4:
|
||||
self.piece4_guid = piece
|
||||
|
||||
def read_directory(self, piece):
|
||||
self.entries = []
|
||||
@ -522,107 +516,87 @@ class LitFile(object):
|
||||
if not hasattr(self, 'manifest'):
|
||||
raise LitError('Lit file does not have a valid manifest')
|
||||
|
||||
@preserve
|
||||
def read_section_names(self, entry):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(self.content_offset + entry.offset)
|
||||
raw = self._stream.read(entry.size)
|
||||
if len(raw) < 4:
|
||||
self._stream.seek(self.content_offset + entry.offset)
|
||||
raw = self._stream.read(entry.size)
|
||||
if len(raw) < 4:
|
||||
raise LitError('Invalid Namelist section')
|
||||
pos = 4
|
||||
self.num_sections = u16(raw[2:pos])
|
||||
|
||||
self.sections = {}
|
||||
for section in range(self.num_sections):
|
||||
size = u16(raw[pos:pos+2])
|
||||
pos += 2
|
||||
size = size*2 + 2
|
||||
if pos + size > len(raw):
|
||||
raise LitError('Invalid Namelist section')
|
||||
pos = 4
|
||||
self.num_sections = u16(raw[2:pos])
|
||||
|
||||
self.sections = {}
|
||||
for section in range(self.num_sections):
|
||||
size = u16(raw[pos:pos+2])
|
||||
pos += 2
|
||||
size = size*2 + 2
|
||||
if pos + size > len(raw):
|
||||
raise LitError('Invalid Namelist section')
|
||||
self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
|
||||
pos += size
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
|
||||
pos += size
|
||||
|
||||
@preserve
|
||||
def read_manifest(self, entry):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self.manifest = []
|
||||
self._stream.seek(self.content_offset + entry.offset)
|
||||
raw = self._stream.read(entry.size)
|
||||
pos = 0
|
||||
while pos < len(raw):
|
||||
size = ord(raw[pos])
|
||||
if size == 0: break
|
||||
pos += 1
|
||||
root = raw[pos:pos+size].decode('utf8')
|
||||
pos += size
|
||||
if pos >= len(raw):
|
||||
raise LitError('Truncated manifest.')
|
||||
for state in ['spine', 'not spine', 'css', 'images']:
|
||||
num_files = int32(raw[pos:pos+4])
|
||||
self.manifest = []
|
||||
self._stream.seek(self.content_offset + entry.offset)
|
||||
raw = self._stream.read(entry.size)
|
||||
pos = 0
|
||||
while pos < len(raw):
|
||||
size = ord(raw[pos])
|
||||
if size == 0: break
|
||||
pos += 1
|
||||
root = raw[pos:pos+size].decode('utf8')
|
||||
pos += size
|
||||
if pos >= len(raw):
|
||||
raise LitError('Truncated manifest.')
|
||||
for state in ['spine', 'not spine', 'css', 'images']:
|
||||
num_files = int32(raw[pos:pos+4])
|
||||
pos += 4
|
||||
if num_files == 0: continue
|
||||
|
||||
i = 0
|
||||
while i < num_files:
|
||||
if pos+5 >= len(raw):
|
||||
raise LitError('Truncated manifest.')
|
||||
offset = u32(raw[pos:pos+4])
|
||||
pos += 4
|
||||
if num_files == 0: continue
|
||||
|
||||
i = 0
|
||||
while i < num_files:
|
||||
if pos+5 >= len(raw):
|
||||
raise LitError('Truncated manifest.')
|
||||
offset = u32(raw[pos:pos+4])
|
||||
pos += 4
|
||||
slen = ord(raw[pos])
|
||||
pos += 1
|
||||
internal = raw[pos:pos+slen].decode('utf8')
|
||||
pos += slen
|
||||
|
||||
slen = ord(raw[pos])
|
||||
pos += 1
|
||||
internal = raw[pos:pos+slen].decode('utf8')
|
||||
pos += slen
|
||||
slen = ord(raw[pos])
|
||||
pos += 1
|
||||
original = raw[pos:pos+slen].decode('utf8')
|
||||
pos += slen
|
||||
|
||||
slen = ord(raw[pos])
|
||||
pos += 1
|
||||
original = raw[pos:pos+slen].decode('utf8')
|
||||
pos += slen
|
||||
slen = ord(raw[pos])
|
||||
pos += 1
|
||||
mime_type = raw[pos:pos+slen].decode('utf8')
|
||||
pos += slen + 1
|
||||
|
||||
slen = ord(raw[pos])
|
||||
pos += 1
|
||||
mime_type = raw[pos:pos+slen].decode('utf8')
|
||||
pos += slen + 1
|
||||
|
||||
self.manifest.append(
|
||||
ManifestItem(original, internal, mime_type,
|
||||
offset, root, state))
|
||||
i += 1
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
self.manifest.append(
|
||||
ManifestItem(original, internal, mime_type,
|
||||
offset, root, state))
|
||||
i += 1
|
||||
|
||||
@preserve
|
||||
def read_meta(self, entry):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(self.content_offset + entry.offset)
|
||||
raw = self._stream.read(entry.size)
|
||||
|
||||
xml = \
|
||||
'''\
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE package
|
||||
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
|
||||
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
|
||||
'''+\
|
||||
unicode(UnBinary(raw, self.manifest))
|
||||
self.meta = xml
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
self._stream.seek(self.content_offset + entry.offset)
|
||||
raw = self._stream.read(entry.size)
|
||||
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
|
||||
self.meta = xml
|
||||
|
||||
@preserve
|
||||
def read_image(self, internal_name):
|
||||
cover_entry = None
|
||||
for entry in self.entries:
|
||||
if internal_name in entry.name:
|
||||
cover_entry = entry
|
||||
break
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
self._stream.seek(self.content_offset + cover_entry.offset)
|
||||
return self._stream.read(cover_entry.size)
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
self._stream.seek(self.content_offset + cover_entry.offset)
|
||||
return self._stream.read(cover_entry.size)
|
||||
|
||||
def get_metadata(stream):
|
||||
try:
|
||||
|
@ -237,7 +237,7 @@ class OPF(MetaInformation):
|
||||
|
||||
def get_title(self):
|
||||
title = self.soup.package.metadata.find('dc:title')
|
||||
if title and title.string:
|
||||
if title:
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip()
|
||||
return self.default_title.strip()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user