Remove incorrect trailing bytes check from mobi 6 deump code and put a correct version in the mobi 8 dump code

This commit is contained in:
Kovid Goyal 2014-03-29 14:38:21 +05:30
parent f679e5cf57
commit 624b39a32e
2 changed files with 10 additions and 5 deletions

View File

@ -31,7 +31,7 @@ FIELD_NAMES = {'len':'Header length', 'type':'Unknown', 'gen':'Index Type (0 - n
'total':'Total number of Index Entries in all records', 'ordt': 'ORDT Offset', 'ligt':'LIGT Offset', 'nligt':'Number of LIGT', 'total':'Total number of Index Entries in all records', 'ordt': 'ORDT Offset', 'ligt':'LIGT Offset', 'nligt':'Number of LIGT',
'ncncx':'Number of CNCX records', 'last_index':'Geometry of index records'} 'ncncx':'Number of CNCX records', 'last_index':'Geometry of index records'}
def read_last_index(data, header): def read_variable_len_data(data, header):
offset = header['tagx'] offset = header['tagx']
indices = [] indices = []
if offset > 0: if offset > 0:
@ -48,6 +48,11 @@ def read_last_index(data, header):
else: else:
header['tagx_block'] = b'' header['tagx_block'] = b''
header['tagx_block_size'] = 0 header['tagx_block_size'] = 0
idxt_offset = header['start']
idxt_size = 4 + header['count'] * 2
trailing_bytes = data[idxt_offset+idxt_size:]
if trailing_bytes.rstrip(b'\0'):
raise ValueError('Traling bytes after last IDXT entry: %r' % trailing_bytes.rstrip(b'\0'))
header['last_index'] = indices header['last_index'] = indices
def read_index(sections, idx, codec): def read_index(sections, idx, codec):
@ -66,7 +71,7 @@ def read_index(sections, idx, codec):
tag_section_start = indx_header['tagx'] tag_section_start = indx_header['tagx']
control_byte_count, tags = parse_tagx_section(data[tag_section_start:]) control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
read_last_index(data, indx_header) read_variable_len_data(data, indx_header)
index_headers = [] index_headers = []
for i in xrange(idx + 1, idx + 1 + indx_count): for i in xrange(idx + 1, idx + 1 + indx_count):
@ -74,7 +79,7 @@ def read_index(sections, idx, codec):
data = sections[i].raw data = sections[i].raw
index_headers.append(parse_index_record(table, data, control_byte_count, tags, codec, index_headers.append(parse_index_record(table, data, control_byte_count, tags, codec,
indx_header['ordt_map'], strict=True)) indx_header['ordt_map'], strict=True))
read_last_index(data, index_headers[-1]) read_variable_len_data(data, index_headers[-1])
return table, cncx, indx_header, index_headers return table, cncx, indx_header, index_headers
class Index(object): class Index(object):

View File

@ -192,8 +192,8 @@ class IndexHeader(object): # {{{
length_check, = struct.unpack(b'>H', idxt[4:6]) length_check, = struct.unpack(b'>H', idxt[4:6])
if length_check != self.header_length + self.tagx_header_length: if length_check != self.header_length + self.tagx_header_length:
raise ValueError('Length check failed') raise ValueError('Length check failed')
if idxt[6:].replace(b'\0', b''): # if idxt[6:].replace(b'\0', b''):
raise ValueError('Non null trailing bytes after IDXT') # raise ValueError('Non null trailing bytes after IDXT')
def __str__(self): def __str__(self):
ans = ['*'*20 + ' Index Header (%d bytes)'%len(self.record.raw)+ '*'*20] ans = ['*'*20 + ' Index Header (%d bytes)'%len(self.record.raw)+ '*'*20]