Dump NCX index when debugging KF8

This commit is contained in:
Kovid Goyal 2012-04-12 13:15:53 +05:30
parent b8a2716c35
commit 50e419efd2
2 changed files with 38 additions and 1 deletions

View File

@ -12,6 +12,7 @@ from collections import OrderedDict, namedtuple
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
from calibre.ebooks.mobi.reader.index import (CNCX, parse_indx_header,
parse_tagx_section, parse_index_record, INDEX_HEADER_FIELDS)
from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry)
File = namedtuple('File',
'file_number name divtbl_count start_position length')
@ -123,4 +124,35 @@ class SECTIndex(Index):
)
)
class NCXIndex(Index):
def __init__(self, ncxidx, records, codec):
super(NCXIndex, self).__init__(ncxidx, records, codec)
self.records = []
if self.table is not None:
for num, x in enumerate(self.table.iteritems()):
text, tag_map = x
entry = default_entry.copy()
entry['name'] = text
entry['num'] = num
for tag in tag_fieldname_map.iterkeys():
fieldname, i = tag_fieldname_map[tag]
if tag in tag_map:
fieldvalue = tag_map[tag][i]
if tag == 6:
# Appears to be an idx into the KF8 elems table with an
# offset
fieldvalue = tuple(tag_map[tag])
entry[fieldname] = fieldvalue
for which, name in {3:'text', 5:'kind', 70:'description',
71:'author', 72:'image_caption',
73:'image_attribution'}.iteritems():
if tag == which:
entry[name] = self.cncx.get(fieldvalue,
default_entry[name])
self.records.append(entry)

View File

@ -11,7 +11,7 @@ import sys, os, imghdr, struct
from itertools import izip
from calibre.ebooks.mobi.debug.headers import TextRecord
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex)
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
from calibre.ebooks.mobi.utils import read_font_record
from calibre.ebooks.mobi.debug import format_bytes
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
@ -92,6 +92,8 @@ class MOBIFile(object):
self.header.encoding)
self.sect_index = SECTIndex(self.header.sect_idx, self.mf.records,
self.header.encoding)
self.ncx_index = NCXIndex(self.header.primary_index_record,
self.mf.records, self.header.encoding)
def extract_resources(self):
self.resource_map = []
@ -159,3 +161,6 @@ def inspect_mobi(mobi_file, ddir):
with open(os.path.join(ddir, 'sect.record'), 'wb') as fo:
fo.write(str(f.sect_index).encode('utf-8'))
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
fo.write(str(f.ncx_index).encode('utf-8'))