Fixed bug in directory processing and re-named methods to reflect public/private status.

This commit is contained in:
Marshall T. Vandegrift 2008-07-19 16:50:14 -04:00
parent 6b18c8b745
commit 006182e5f4

View File

@ -19,7 +19,7 @@ import calibre.ebooks.lit.mssha1 as mssha1
import calibre.ebooks.lit.msdes as msdes import calibre.ebooks.lit.msdes as msdes
import calibre.utils.lzx as lzx import calibre.utils.lzx as lzx
OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?> OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE package <!DOCTYPE package
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN" PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd"> "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
@ -421,8 +421,13 @@ class LitReader(object):
raise LitError('Not a valid LIT file') raise LitError('Not a valid LIT file')
if self.version != 1: if self.version != 1:
raise LitError('Unknown LIT version %d'%(self.version,)) raise LitError('Unknown LIT version %d'%(self.version,))
self.read_secondary_header() self.entries = {}
self.read_header_pieces() self._read_secondary_header()
self._read_header_pieces()
self._read_section_names()
self._read_manifest()
self._read_meta()
self._read_drm()
@preserve @preserve
def __len__(self): def __len__(self):
@ -437,10 +442,9 @@ class LitReader(object):
def _read_content(self, offset, size): def _read_content(self, offset, size):
return self._read_raw(self.content_offset + offset, size) return self._read_raw(self.content_offset + offset, size)
@preserve def _read_secondary_header(self):
def read_secondary_header(self): offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE) bytes = self._read_raw(offset, self.sec_hdr_len)
bytes = self._stream.read(self.sec_hdr_len)
offset = int32(bytes[4:]) offset = int32(bytes[4:])
while offset < len(bytes): while offset < len(bytes):
blocktype = bytes[offset:offset+4] blocktype = bytes[offset:offset+4]
@ -468,23 +472,21 @@ class LitReader(object):
if not hasattr(self, 'content_offset'): if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset') raise LitError('Could not figure out the content offset')
@preserve def _read_header_pieces(self):
def read_header_pieces(self):
src = self.header[self.hdr_len:] src = self.header[self.hdr_len:]
for i in range(self.num_pieces): for i in range(self.num_pieces):
piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE] piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0: if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
raise LitError('Piece %s has 64bit value' % repr(piece)) raise LitError('Piece %s has 64bit value' % repr(piece))
offset, size = u32(piece), int32(piece[8:]) offset, size = u32(piece), int32(piece[8:])
self._stream.seek(offset) piece = self._read_raw(offset, size)
piece = self._stream.read(size)
if i == 0: if i == 0:
continue # Dont need this piece continue # Dont need this piece
elif i == 1: elif i == 1:
if u32(piece[8:]) != self.entry_chunklen or \ if u32(piece[8:]) != self.entry_chunklen or \
u32(piece[12:]) != self.entry_unknown: u32(piece[12:]) != self.entry_unknown:
raise LitError('Secondary header does not match piece') raise LitError('Secondary header does not match piece')
self.read_directory(piece) self._read_directory(piece)
elif i == 2: elif i == 2:
if u32(piece[8:]) != self.count_chunklen or \ if u32(piece[8:]) != self.count_chunklen or \
u32(piece[12:]) != self.count_unknown: u32(piece[12:]) != self.count_unknown:
@ -495,58 +497,44 @@ class LitReader(object):
elif i == 4: elif i == 4:
self.piece4_guid = piece self.piece4_guid = piece
def read_directory(self, piece): def _read_directory(self, piece):
self.entries = {}
if not piece.startswith('IFCM'): if not piece.startswith('IFCM'):
raise LitError('Header piece #1 is not main directory.') raise LitError('Header piece #1 is not main directory.')
chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28]) chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
if (32 + chunk_size * num_chunks) != len(piece): if (32 + (num_chunks * chunk_size)) != len(piece):
raise LitError('IFCM HEADER has incorrect length') raise LitError('IFCM HEADER has incorrect length')
for chunk in range(num_chunks): for i in xrange(num_chunks):
p = 32 + chunk * chunk_size offset = 32 + (i * chunk_size)
if piece[p:p+4] != 'AOLL': chunk = piece[offset:offset + chunk_size]
continue tag, chunk = chunk[:4], chunk[4:]
remaining = chunk_size - int32(piece[p+4:p+8]) - 48 if tag != 'AOLL': continue
if remaining < 0: remaining, chunk = int32(chunk[:4]), chunk[4:]
if remaining >= chunk_size:
raise LitError('AOLL remaining count is negative') raise LitError('AOLL remaining count is negative')
entries = u16(piece[p+chunk_size-2:]) remaining = chunk_size - (remaining + 48)
if entries <= 0: entries = u16(chunk[-2:])
# Hopefully everything will work even without a correct entries if entries == 0:
# count # Hopefully will work even without a correct entries count
entries = (2 ** 16) - 1 entries = (2 ** 16) - 1
piece = piece[p+48:] chunk = chunk[40:]
i = 0 for j in xrange(entries):
while i < entries:
if remaining <= 0: break if remaining <= 0: break
namelen, piece, remaining = encint(piece, remaining) namelen, chunk, remaining = encint(chunk, remaining)
if namelen != (namelen & 0x7fffffff): if namelen != (namelen & 0x7fffffff):
raise LitError('Directory entry had 64bit name length.') raise LitError('Directory entry had 64bit name length.')
if namelen > remaining - 3: if namelen > remaining - 3:
raise LitError('Read past end of directory chunk') raise LitError('Read past end of directory chunk')
name = piece[:namelen] name, chunk = chunk[:namelen], chunk[namelen:]
piece = piece[namelen:] section, chunk, remaining = encint(chunk, remaining)
section, piece, remaining = encint(piece, remaining) offset, chunk, remaining = encint(chunk, remaining)
offset, piece, remaining = encint(piece, remaining) size, chunk, remaining = encint(chunk, remaining)
size, piece, remaining = encint(piece, remaining)
entry = DirectoryEntry(name, section, offset, size) entry = DirectoryEntry(name, section, offset, size)
if name == '::DataSpace/NameList':
self.read_section_names(entry)
elif name == '/manifest':
self.read_manifest(entry)
elif name == '/meta':
self.read_meta(entry)
self.entries[name] = entry self.entries[name] = entry
i += 1
if not hasattr(self, 'section_names'):
raise LitError('Lit file does not have a valid NameList')
if not hasattr(self, 'manifest'):
raise LitError('Lit file does not have a valid manifest')
self.read_drm()
def read_section_names(self, entry): def _read_section_names(self):
raw = self._read_content(entry.offset, entry.size) if '::DataSpace/NameList' not in self.entries:
raise LitError('Lit file does not have a valid NameList')
raw = self.get_file('::DataSpace/NameList')
if len(raw) < 4: if len(raw) < 4:
raise LitError('Invalid Namelist section') raise LitError('Invalid Namelist section')
pos = 4 pos = 4
@ -563,9 +551,11 @@ class LitReader(object):
raw[pos:pos+size].decode('utf-16-le').rstrip('\000') raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
pos += size pos += size
def read_manifest(self, entry): def _read_manifest(self):
if '/manifest' not in self.entries:
raise LitError('Lit file does not have a valid manifest')
raw = self.get_file('/manifest')
self.manifest = {} self.manifest = {}
raw = self._read_content(entry.offset, entry.size)
while raw: while raw:
slen, raw = ord(raw[0]), raw[1:] slen, raw = ord(raw[0]), raw[1:]
if slen == 0: break if slen == 0: break
@ -600,12 +590,12 @@ class LitReader(object):
for item in mlist: for item in mlist:
item.path = item.path[slen:] item.path = item.path[slen:]
def read_meta(self, entry): def _read_meta(self):
raw = self._read_content(entry.offset, entry.size) raw = self.get_file('/meta')
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP)) xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
self.meta = xml self.meta = xml
def read_drm(self): def _read_drm(self):
self.drmlevel = 0 self.drmlevel = 0
if '/DRMStorage/Licenses/EUL' in self.entries: if '/DRMStorage/Licenses/EUL' in self.entries:
self.drmlevel = 5 self.drmlevel = 5
@ -615,13 +605,13 @@ class LitReader(object):
self.drmlevel = 1 self.drmlevel = 1
else: else:
return return
des = msdes.new(self.calculate_deskey()) des = msdes.new(self._calculate_deskey())
bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed')) bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed'))
if bookkey[0] != '\000': if bookkey[0] != '\000':
raise LitError('Unable to decrypt title key!') raise LitError('Unable to decrypt title key!')
self.bookkey = bookkey[1:9] self.bookkey = bookkey[1:9]
def calculate_deskey(self): def _calculate_deskey(self):
hashfiles = ['/meta', '/DRMStorage/DRMSource'] hashfiles = ['/meta', '/DRMStorage/DRMSource']
if self.drmlevel == 3: if self.drmlevel == 3:
hashfiles.append('/DRMStorage/DRMBookplate') hashfiles.append('/DRMStorage/DRMBookplate')
@ -726,19 +716,18 @@ class LitReader(object):
u = int32(reset_table[ofs_entry + 4:]) u = int32(reset_table[ofs_entry + 4:])
if u != 0: if u != 0:
raise LitError("Reset table entry greater than 32 bits") raise LitError("Reset table entry greater than 32 bits")
if size >= (len(content) + base): if size >= len(content):
raise("Reset table entry out of bounds") raise("Reset table entry out of bounds")
if bytes_remaining >= window_bytes: if bytes_remaining >= window_bytes:
lzx.reset() lzx.reset()
result.append(lzx.decompress(content, window_bytes)) result.append(lzx.decompress(content[base:size], window_bytes))
bytes_remaining -= window_bytes bytes_remaining -= window_bytes
content = content[size - base:]
base = size base = size
accum += int32(reset_table[RESET_INTERVAL:]) accum += int32(reset_table[RESET_INTERVAL:])
ofs_entry += 8 ofs_entry += 8
if bytes_remaining < window_bytes and bytes_remaining > 0: if bytes_remaining < window_bytes and bytes_remaining > 0:
lzx.reset() lzx.reset()
result.append(lzx.decompress(content, bytes_remaining)) result.append(lzx.decompress(content[base:], bytes_remaining))
bytes_remaining = 0 bytes_remaining = 0
if bytes_remaining > 0: if bytes_remaining > 0:
raise LitError("Failed to completely decompress section") raise LitError("Failed to completely decompress section")