Dump NCX index when debugging KF8

This commit is contained in:
Kovid Goyal 2012-04-12 13:15:53 +05:30
parent b8a2716c35
commit 50e419efd2
2 changed files with 38 additions and 1 deletions

View File

@ -12,6 +12,7 @@ from collections import OrderedDict, namedtuple
from calibre.ebooks.mobi.reader.headers import NULL_INDEX from calibre.ebooks.mobi.reader.headers import NULL_INDEX
from calibre.ebooks.mobi.reader.index import (CNCX, parse_indx_header, from calibre.ebooks.mobi.reader.index import (CNCX, parse_indx_header,
parse_tagx_section, parse_index_record, INDEX_HEADER_FIELDS) parse_tagx_section, parse_index_record, INDEX_HEADER_FIELDS)
from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry)
File = namedtuple('File', File = namedtuple('File',
'file_number name divtbl_count start_position length') 'file_number name divtbl_count start_position length')
@ -123,4 +124,35 @@ class SECTIndex(Index):
) )
) )
class NCXIndex(Index):
def __init__(self, ncxidx, records, codec):
super(NCXIndex, self).__init__(ncxidx, records, codec)
self.records = []
if self.table is not None:
for num, x in enumerate(self.table.iteritems()):
text, tag_map = x
entry = default_entry.copy()
entry['name'] = text
entry['num'] = num
for tag in tag_fieldname_map.iterkeys():
fieldname, i = tag_fieldname_map[tag]
if tag in tag_map:
fieldvalue = tag_map[tag][i]
if tag == 6:
# Appears to be an idx into the KF8 elems table with an
# offset
fieldvalue = tuple(tag_map[tag])
entry[fieldname] = fieldvalue
for which, name in {3:'text', 5:'kind', 70:'description',
71:'author', 72:'image_caption',
73:'image_attribution'}.iteritems():
if tag == which:
entry[name] = self.cncx.get(fieldvalue,
default_entry[name])
self.records.append(entry)

View File

@ -11,7 +11,7 @@ import sys, os, imghdr, struct
from itertools import izip from itertools import izip
from calibre.ebooks.mobi.debug.headers import TextRecord from calibre.ebooks.mobi.debug.headers import TextRecord
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex) from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
from calibre.ebooks.mobi.utils import read_font_record from calibre.ebooks.mobi.utils import read_font_record
from calibre.ebooks.mobi.debug import format_bytes from calibre.ebooks.mobi.debug import format_bytes
from calibre.ebooks.mobi.reader.headers import NULL_INDEX from calibre.ebooks.mobi.reader.headers import NULL_INDEX
@ -92,6 +92,8 @@ class MOBIFile(object):
self.header.encoding) self.header.encoding)
self.sect_index = SECTIndex(self.header.sect_idx, self.mf.records, self.sect_index = SECTIndex(self.header.sect_idx, self.mf.records,
self.header.encoding) self.header.encoding)
self.ncx_index = NCXIndex(self.header.primary_index_record,
self.mf.records, self.header.encoding)
def extract_resources(self): def extract_resources(self):
self.resource_map = [] self.resource_map = []
@ -159,3 +161,6 @@ def inspect_mobi(mobi_file, ddir):
with open(os.path.join(ddir, 'sect.record'), 'wb') as fo: with open(os.path.join(ddir, 'sect.record'), 'wb') as fo:
fo.write(str(f.sect_index).encode('utf-8')) fo.write(str(f.sect_index).encode('utf-8'))
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
fo.write(str(f.ncx_index).encode('utf-8'))