mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Mobi debug: Dump KF8 SKEL and SECT indices
This commit is contained in:
parent
5fd415ea2d
commit
94ff0c64d5
126
src/calibre/ebooks/mobi/debug/index.py
Normal file
126
src/calibre/ebooks/mobi/debug/index.py
Normal file
@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from collections import OrderedDict, namedtuple
|
||||
|
||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||
from calibre.ebooks.mobi.reader.index import (CNCX, parse_indx_header,
|
||||
parse_tagx_section, parse_index_record, INDEX_HEADER_FIELDS)
|
||||
|
||||
File = namedtuple('File',
|
||||
'file_number name divtbl_count start_position length')
|
||||
|
||||
Elem = namedtuple('Elem',
|
||||
'insert_pos toc_text file_number sequence_number start_pos '
|
||||
'length')
|
||||
|
||||
def read_index(sections, idx, codec):
|
||||
table, cncx = OrderedDict(), CNCX([], codec)
|
||||
|
||||
data = sections[idx].raw
|
||||
|
||||
indx_header = parse_indx_header(data)
|
||||
indx_count = indx_header['count']
|
||||
|
||||
if indx_header['ncncx'] > 0:
|
||||
off = idx + indx_count + 1
|
||||
cncx_records = [x.raw for x in sections[off:off+indx_header['ncncx']]]
|
||||
cncx = CNCX(cncx_records, codec)
|
||||
|
||||
tag_section_start = indx_header['tagx']
|
||||
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
|
||||
|
||||
for i in xrange(idx + 1, idx + 1 + indx_count):
|
||||
# Index record
|
||||
data = sections[i].raw
|
||||
parse_index_record(table, data, control_byte_count, tags, codec,
|
||||
indx_header['ordt_map'], strict=True)
|
||||
return table, cncx, indx_header
|
||||
|
||||
class Index(object):
|
||||
|
||||
def __init__(self, idx, records, codec):
|
||||
self.table = self.cncx = self.header = self.records = None
|
||||
if idx != NULL_INDEX:
|
||||
self.table, self.cncx, self.header = read_index(records, idx, codec)
|
||||
|
||||
def render(self):
|
||||
ans = ['*'*10 + ' Index Header ' + '*'*10]
|
||||
a = ans.append
|
||||
if self.header is not None:
|
||||
for field in INDEX_HEADER_FIELDS:
|
||||
a('%-12s: %r'%(field, self.header[field]))
|
||||
ans.extend(['', ''])
|
||||
|
||||
if self.cncx:
|
||||
a('*'*10 + ' CNCX ' + '*'*10)
|
||||
for offset, val in self.cncx.iteritems():
|
||||
a('%10s: %s'%(offset, val))
|
||||
ans.extend(['', ''])
|
||||
|
||||
if self.table is not None:
|
||||
a('*'*10 + ' %d Index Entries '%len(self.table) + '*'*10)
|
||||
for k, v in self.table.iteritems():
|
||||
a('%s: %r'%(k, v))
|
||||
|
||||
if self.records:
|
||||
ans.extend(['', '', '*'*10 + ' Parsed Entries ' + '*'*10])
|
||||
for f in self.records:
|
||||
a(repr(f))
|
||||
|
||||
return ans + ['']
|
||||
|
||||
def __str__(self):
|
||||
return '\n'.join(self.render())
|
||||
|
||||
class SKELIndex(Index):
|
||||
|
||||
def __init__(self, skelidx, records, codec):
|
||||
super(SKELIndex, self).__init__(skelidx, records, codec)
|
||||
self.records = []
|
||||
|
||||
if self.table is not None:
|
||||
for i, text in enumerate(self.table.iterkeys()):
|
||||
tag_map = self.table[text]
|
||||
if set(tag_map.iterkeys()) != {1, 6}:
|
||||
raise ValueError('SKEL Index has unknown tags: %s'%
|
||||
(set(tag_map.iterkeys())-{1,6}))
|
||||
self.records.append(File(
|
||||
i, # file_number
|
||||
text, # name
|
||||
tag_map[1][0], # divtbl_count
|
||||
tag_map[6][0], # start_pos
|
||||
tag_map[6][1]) # length
|
||||
)
|
||||
|
||||
class SECTIndex(Index):
|
||||
|
||||
def __init__(self, sectidx, records, codec):
|
||||
super(SECTIndex, self).__init__(sectidx, records, codec)
|
||||
self.records = []
|
||||
|
||||
if self.table is not None:
|
||||
for i, text in enumerate(self.table.iterkeys()):
|
||||
tag_map = self.table[text]
|
||||
if set(tag_map.iterkeys()) != {2, 3, 4, 6}:
|
||||
raise ValueError('SECT Index has unknown tags: %s'%
|
||||
(set(tag_map.iterkeys())-{2, 3, 4, 6}))
|
||||
|
||||
toc_text = self.cncx[tag_map[2][0]]
|
||||
self.records.append(Elem(
|
||||
int(text), # insert_pos
|
||||
toc_text, # toc_text
|
||||
tag_map[3][0], # file_number
|
||||
tag_map[4][0], # sequence_number
|
||||
tag_map[6][0], # start_pos
|
||||
tag_map[6][1] # length
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -11,6 +11,7 @@ import sys, os, imghdr, struct
|
||||
from itertools import izip
|
||||
|
||||
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex)
|
||||
from calibre.ebooks.mobi.utils import read_font_record
|
||||
from calibre.ebooks.mobi.debug import format_bytes
|
||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||
@ -65,6 +66,7 @@ class MOBIFile(object):
|
||||
self.header = self.mf.mobi8_header
|
||||
self.extract_resources()
|
||||
self.read_fdst()
|
||||
self.read_indices()
|
||||
|
||||
def print_header(self, f=sys.stdout):
|
||||
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
||||
@ -85,6 +87,12 @@ class MOBIFile(object):
|
||||
if self.fdst.num_sections != self.header.fdst_count:
|
||||
raise ValueError('KF8 Header contains invalid FDST count')
|
||||
|
||||
def read_indices(self):
|
||||
self.skel_index = SKELIndex(self.header.skel_idx, self.mf.records,
|
||||
self.header.encoding)
|
||||
self.sect_index = SECTIndex(self.header.sect_idx, self.mf.records,
|
||||
self.header.encoding)
|
||||
|
||||
def extract_resources(self):
|
||||
self.resource_map = []
|
||||
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
||||
@ -145,3 +153,9 @@ def inspect_mobi(mobi_file, ddir):
|
||||
with open(os.path.join(ddir, 'fdst.record'), 'wb') as fo:
|
||||
fo.write(str(f.fdst).encode('utf-8'))
|
||||
|
||||
with open(os.path.join(ddir, 'skel.record'), 'wb') as fo:
|
||||
fo.write(str(f.skel_index).encode('utf-8'))
|
||||
|
||||
with open(os.path.join(ddir, 'sect.record'), 'wb') as fo:
|
||||
fo.write(str(f.sect_index).encode('utf-8'))
|
||||
|
||||
|
@ -111,6 +111,12 @@ class CNCX(object): # {{{
|
||||
|
||||
def get(self, offset, default=None):
|
||||
return self.records.get(offset, default)
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.records)
|
||||
|
||||
def iteritems(self):
|
||||
return self.records.iteritems()
|
||||
# }}}
|
||||
|
||||
def parse_tagx_section(data):
|
||||
|
Loading…
x
Reference in New Issue
Block a user