diff --git a/src/calibre/ebooks/mobi/debug/index.py b/src/calibre/ebooks/mobi/debug/index.py index 861ba16bee..3b03431628 100644 --- a/src/calibre/ebooks/mobi/debug/index.py +++ b/src/calibre/ebooks/mobi/debug/index.py @@ -25,35 +25,36 @@ Elem = namedtuple('Chunk', GuideRef = namedtuple('GuideRef', 'type title pos_fid') -INDEX_HEADER_FIELDS = INDEX_HEADER_FIELDS + ('last_index', 'tagx_block_size', 'tagx_block') +INDEX_HEADER_FIELDS = INDEX_HEADER_FIELDS + ('indices', 'tagx_block_size', 'tagx_block') FIELD_NAMES = {'len':'Header length', 'type':'Unknown', 'gen':'Index Type (0 - normal, 2 - inflection)', - 'start':'IDXT Offset', 'count':'Number of Index entries or records', 'code': 'character encoding', 'lng':'Unknown', - 'total':'Total number of Index Entries in all records', 'ordt': 'ORDT Offset', 'ligt':'LIGT Offset', 'nligt':'Number of LIGT', - 'ncncx':'Number of CNCX records', 'last_index':'Geometry of index records'} + 'start':'IDXT Offset', 'count':'Number of entries in this record', 'code': 'character encoding', 'lng':'Unknown', + 'total':'Total number of actual Index Entries in all records', 'ordt': 'ORDT Offset', 'ligt':'LIGT Offset', 'nligt':'Number of LIGT', + 'ncncx':'Number of CNCX records', 'indices':'Geometry of index records'} def read_variable_len_data(data, header): offset = header['tagx'] indices = [] + idxt_offset = header['start'] + idxt_size = 4 + header['count'] * 2 if offset > 0: tagx_block_size = header['tagx_block_size'] = struct.unpack_from(b'>I', data, offset + 4)[0] header['tagx_block'] = data[offset:offset+tagx_block_size] - offset += tagx_block_size + offset = idxt_offset + 4 for i in xrange(header['count']): - strlen = bytearray(data[offset:offset+1])[0] - text = data[offset+1:offset+1+strlen].decode('ascii') - offset += 1 + strlen - num = struct.unpack_from(b'>H', data, offset)[0] + p = struct.unpack_from(b'>H', data, offset)[0] offset += 2 + strlen = bytearray(data[p])[0] + text = data[p+1:p+1+strlen] + p += 1 + strlen + num = struct.unpack_from(b'>H', data, p)[0] indices.append((text, num)) else: header['tagx_block'] = b'' header['tagx_block_size'] = 0 - idxt_offset = header['start'] - idxt_size = 4 + header['count'] * 2 trailing_bytes = data[idxt_offset+idxt_size:] if trailing_bytes.rstrip(b'\0'): raise ValueError('Traling bytes after last IDXT entry: %r' % trailing_bytes.rstrip(b'\0')) - header['last_index'] = indices + header['indices'] = indices def read_index(sections, idx, codec): table, cncx = OrderedDict(), CNCX([], codec)