KF8 debug: Dump skeleton structure

2025-08-30 23:00:21 -04:00 · 2012-04-18 17:02:59 +05:30 · 2012-04-18 17:02:59 +05:30 · 02920d2e7d
commit 02920d2e7d
parent 27a855b477
1 changed files with 44 additions and 1 deletions
--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
 import sys, os, imghdr, struct
 from itertools import izip
 from calibre import CurrentDir
 from calibre.ebooks.mobi.debug.headers import TextRecord
 from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
 from calibre.ebooks.mobi.utils import read_font_record
@ -43,6 +44,24 @@ class FDST(object):
        return '\n'.join(ans)
 class File(object):
    def __init__(self, skel, skeleton, text, first_aid, sections):
        self.name = 'part%04d'%skel.file_number
        self.skeleton, self.text, self.first_aid = skeleton, text, first_aid
        self.sections = sections
    def dump(self, ddir):
        with open(os.path.join(ddir, self.name + '.html'), 'wb') as f:
            f.write(self.text)
        base = os.path.join(ddir, self.name + '-parts')
        os.mkdir(base)
        with CurrentDir(base):
            with open('skeleton.html', 'wb') as f:
                f.write(self.skeleton)
            for i, text in enumerate(self.sections):
                with open('sect-%04d.html'%i, 'wb') as f:
                    f.write(text)
 class MOBIFile(object):
@ -67,6 +86,7 @@ class MOBIFile(object):
        self.extract_resources()
        self.read_fdst()
        self.read_indices()
        self.build_files()
    def print_header(self, f=sys.stdout):
        print (str(self.mf.palmdb).encode('utf-8'), file=f)
@ -95,6 +115,26 @@ class MOBIFile(object):
        self.ncx_index = NCXIndex(self.header.primary_index_record,
                self.mf.records, self.header.encoding)
    def build_files(self):
        text = self.raw_text
        self.files = []
        for skel in self.skel_index.records:
            sects = [x for x in self.sect_index.records if x.file_number
                    == skel.file_number]
            skeleton = text[skel.start_position:skel.start_position+skel.length]
            ftext = skeleton
            first_aid = sects[0].toc_text
            sections = []
            for sect in sects:
                start_pos = skel.start_position + skel.length + sect.start_pos
                sect_text = text[start_pos:start_pos+sect.length]
                insert_pos = sect.insert_pos - skel.start_position
                ftext = ftext[:insert_pos] + sect_text + ftext[insert_pos:]
                sections.append(sect_text)
            self.files.append(File(skel, skeleton, ftext, first_aid, sections))
    def extract_resources(self):
        self.resource_map = []
        known_types = {b'FLIS', b'FCIS', b'SRCS',
@ -141,7 +181,7 @@ def inspect_mobi(mobi_file, ddir):
    with open(alltext, 'wb') as of:
        of.write(f.raw_text)
-    for x in ('text_records', 'images', 'fonts', 'binary'):
+    for x in ('text_records', 'images', 'fonts', 'binary', 'files'):
        os.mkdir(os.path.join(ddir, x))
    for rec in f.text_records:
@ -164,3 +204,6 @@ def inspect_mobi(mobi_file, ddir):
    with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
        fo.write(str(f.ncx_index).encode('utf-8'))
    for part in f.files:
        part.dump(os.path.join(ddir, 'files'))