Dump NCX index when debugging KF8

2025-07-09 03:04:10 -04:00 · 2012-04-12 13:15:53 +05:30 · 2012-04-12 13:15:53 +05:30 · 50e419efd2
commit 50e419efd2
parent b8a2716c35
2 changed files with 38 additions and 1 deletions
--- a/src/calibre/ebooks/mobi/debug/index.py
+++ b/src/calibre/ebooks/mobi/debug/index.py
@ -12,6 +12,7 @@ from collections import OrderedDict, namedtuple
 from calibre.ebooks.mobi.reader.headers import NULL_INDEX
 from calibre.ebooks.mobi.reader.index import (CNCX, parse_indx_header,
        parse_tagx_section, parse_index_record, INDEX_HEADER_FIELDS)
+from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry)

 File = namedtuple('File',
    'file_number name divtbl_count start_position length')
@ -123,4 +124,35 @@ class SECTIndex(Index):
                    )
                )

+class NCXIndex(Index):
+
+    def __init__(self, ncxidx, records, codec):
+        super(NCXIndex, self).__init__(ncxidx, records, codec)
+        self.records = []
+
+        if self.table is not None:
+            for num, x in enumerate(self.table.iteritems()):
+                text, tag_map = x
+                entry = default_entry.copy()
+                entry['name'] = text
+                entry['num'] = num
+
+                for tag in tag_fieldname_map.iterkeys():
+                    fieldname, i = tag_fieldname_map[tag]
+                    if tag in tag_map:
+                        fieldvalue = tag_map[tag][i]
+                        if tag == 6:
+                            # Appears to be an idx into the KF8 elems table with an
+                            # offset
+                            fieldvalue = tuple(tag_map[tag])
+                        entry[fieldname] = fieldvalue
+                        for which, name in {3:'text', 5:'kind', 70:'description',
+                                71:'author', 72:'image_caption',
+                                73:'image_attribution'}.iteritems():
+                            if tag == which:
+                                entry[name] = self.cncx.get(fieldvalue,
+                                        default_entry[name])
+                self.records.append(entry)
+
+

--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@ -11,7 +11,7 @@ import sys, os, imghdr, struct
 from itertools import izip

 from calibre.ebooks.mobi.debug.headers import TextRecord
-from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex)
+from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
 from calibre.ebooks.mobi.utils import read_font_record
 from calibre.ebooks.mobi.debug import format_bytes
 from calibre.ebooks.mobi.reader.headers import NULL_INDEX
@ -92,6 +92,8 @@ class MOBIFile(object):
                self.header.encoding)
        self.sect_index = SECTIndex(self.header.sect_idx, self.mf.records,
                self.header.encoding)
+        self.ncx_index = NCXIndex(self.header.primary_index_record,
+                self.mf.records, self.header.encoding)

    def extract_resources(self):
        self.resource_map = []
@ -159,3 +161,6 @@ def inspect_mobi(mobi_file, ddir):
    with open(os.path.join(ddir, 'sect.record'), 'wb') as fo:
        fo.write(str(f.sect_index).encode('utf-8'))

+    with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
+        fo.write(str(f.ncx_index).encode('utf-8'))
+