mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Dump the index record geometry by using the IDXT table
This commit is contained in:
parent
624b39a32e
commit
df0f4106a5
@ -25,35 +25,36 @@ Elem = namedtuple('Chunk',
|
||||
|
||||
GuideRef = namedtuple('GuideRef', 'type title pos_fid')
|
||||
|
||||
INDEX_HEADER_FIELDS = INDEX_HEADER_FIELDS + ('last_index', 'tagx_block_size', 'tagx_block')
|
||||
INDEX_HEADER_FIELDS = INDEX_HEADER_FIELDS + ('indices', 'tagx_block_size', 'tagx_block')
|
||||
FIELD_NAMES = {'len':'Header length', 'type':'Unknown', 'gen':'Index Type (0 - normal, 2 - inflection)',
|
||||
'start':'IDXT Offset', 'count':'Number of Index entries or records', 'code': 'character encoding', 'lng':'Unknown',
|
||||
'total':'Total number of Index Entries in all records', 'ordt': 'ORDT Offset', 'ligt':'LIGT Offset', 'nligt':'Number of LIGT',
|
||||
'ncncx':'Number of CNCX records', 'last_index':'Geometry of index records'}
|
||||
'start':'IDXT Offset', 'count':'Number of entries in this record', 'code': 'character encoding', 'lng':'Unknown',
|
||||
'total':'Total number of actual Index Entries in all records', 'ordt': 'ORDT Offset', 'ligt':'LIGT Offset', 'nligt':'Number of LIGT',
|
||||
'ncncx':'Number of CNCX records', 'indices':'Geometry of index records'}
|
||||
|
||||
def read_variable_len_data(data, header):
|
||||
offset = header['tagx']
|
||||
indices = []
|
||||
idxt_offset = header['start']
|
||||
idxt_size = 4 + header['count'] * 2
|
||||
if offset > 0:
|
||||
tagx_block_size = header['tagx_block_size'] = struct.unpack_from(b'>I', data, offset + 4)[0]
|
||||
header['tagx_block'] = data[offset:offset+tagx_block_size]
|
||||
offset += tagx_block_size
|
||||
offset = idxt_offset + 4
|
||||
for i in xrange(header['count']):
|
||||
strlen = bytearray(data[offset:offset+1])[0]
|
||||
text = data[offset+1:offset+1+strlen].decode('ascii')
|
||||
offset += 1 + strlen
|
||||
num = struct.unpack_from(b'>H', data, offset)[0]
|
||||
p = struct.unpack_from(b'>H', data, offset)[0]
|
||||
offset += 2
|
||||
strlen = bytearray(data[p])[0]
|
||||
text = data[p+1:p+1+strlen]
|
||||
p += 1 + strlen
|
||||
num = struct.unpack_from(b'>H', data, p)[0]
|
||||
indices.append((text, num))
|
||||
else:
|
||||
header['tagx_block'] = b''
|
||||
header['tagx_block_size'] = 0
|
||||
idxt_offset = header['start']
|
||||
idxt_size = 4 + header['count'] * 2
|
||||
trailing_bytes = data[idxt_offset+idxt_size:]
|
||||
if trailing_bytes.rstrip(b'\0'):
|
||||
raise ValueError('Traling bytes after last IDXT entry: %r' % trailing_bytes.rstrip(b'\0'))
|
||||
header['last_index'] = indices
|
||||
header['indices'] = indices
|
||||
|
||||
def read_index(sections, idx, codec):
|
||||
table, cncx = OrderedDict(), CNCX([], codec)
|
||||
|
Loading…
x
Reference in New Issue
Block a user