KF8 debug: Dump skeleton structure

This commit is contained in:
Kovid Goyal 2012-04-18 17:02:59 +05:30
parent 27a855b477
commit 02920d2e7d

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import sys, os, imghdr, struct import sys, os, imghdr, struct
from itertools import izip from itertools import izip
from calibre import CurrentDir
from calibre.ebooks.mobi.debug.headers import TextRecord from calibre.ebooks.mobi.debug.headers import TextRecord
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex) from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
from calibre.ebooks.mobi.utils import read_font_record from calibre.ebooks.mobi.utils import read_font_record
@ -43,6 +44,24 @@ class FDST(object):
return '\n'.join(ans) return '\n'.join(ans)
class File(object):
def __init__(self, skel, skeleton, text, first_aid, sections):
self.name = 'part%04d'%skel.file_number
self.skeleton, self.text, self.first_aid = skeleton, text, first_aid
self.sections = sections
def dump(self, ddir):
with open(os.path.join(ddir, self.name + '.html'), 'wb') as f:
f.write(self.text)
base = os.path.join(ddir, self.name + '-parts')
os.mkdir(base)
with CurrentDir(base):
with open('skeleton.html', 'wb') as f:
f.write(self.skeleton)
for i, text in enumerate(self.sections):
with open('sect-%04d.html'%i, 'wb') as f:
f.write(text)
class MOBIFile(object): class MOBIFile(object):
@ -67,6 +86,7 @@ class MOBIFile(object):
self.extract_resources() self.extract_resources()
self.read_fdst() self.read_fdst()
self.read_indices() self.read_indices()
self.build_files()
def print_header(self, f=sys.stdout): def print_header(self, f=sys.stdout):
print (str(self.mf.palmdb).encode('utf-8'), file=f) print (str(self.mf.palmdb).encode('utf-8'), file=f)
@ -95,6 +115,26 @@ class MOBIFile(object):
self.ncx_index = NCXIndex(self.header.primary_index_record, self.ncx_index = NCXIndex(self.header.primary_index_record,
self.mf.records, self.header.encoding) self.mf.records, self.header.encoding)
def build_files(self):
text = self.raw_text
self.files = []
for skel in self.skel_index.records:
sects = [x for x in self.sect_index.records if x.file_number
== skel.file_number]
skeleton = text[skel.start_position:skel.start_position+skel.length]
ftext = skeleton
first_aid = sects[0].toc_text
sections = []
for sect in sects:
start_pos = skel.start_position + skel.length + sect.start_pos
sect_text = text[start_pos:start_pos+sect.length]
insert_pos = sect.insert_pos - skel.start_position
ftext = ftext[:insert_pos] + sect_text + ftext[insert_pos:]
sections.append(sect_text)
self.files.append(File(skel, skeleton, ftext, first_aid, sections))
def extract_resources(self): def extract_resources(self):
self.resource_map = [] self.resource_map = []
known_types = {b'FLIS', b'FCIS', b'SRCS', known_types = {b'FLIS', b'FCIS', b'SRCS',
@ -141,7 +181,7 @@ def inspect_mobi(mobi_file, ddir):
with open(alltext, 'wb') as of: with open(alltext, 'wb') as of:
of.write(f.raw_text) of.write(f.raw_text)
for x in ('text_records', 'images', 'fonts', 'binary'): for x in ('text_records', 'images', 'fonts', 'binary', 'files'):
os.mkdir(os.path.join(ddir, x)) os.mkdir(os.path.join(ddir, x))
for rec in f.text_records: for rec in f.text_records:
@ -164,3 +204,6 @@ def inspect_mobi(mobi_file, ddir):
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo: with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
fo.write(str(f.ncx_index).encode('utf-8')) fo.write(str(f.ncx_index).encode('utf-8'))
for part in f.files:
part.dump(os.path.join(ddir, 'files'))