mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 debug: Dump skeleton structure
This commit is contained in:
parent
27a855b477
commit
02920d2e7d
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import sys, os, imghdr, struct
|
import sys, os, imghdr, struct
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
|
|
||||||
|
from calibre import CurrentDir
|
||||||
from calibre.ebooks.mobi.debug.headers import TextRecord
|
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||||
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
|
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
|
||||||
from calibre.ebooks.mobi.utils import read_font_record
|
from calibre.ebooks.mobi.utils import read_font_record
|
||||||
@ -43,6 +44,24 @@ class FDST(object):
|
|||||||
|
|
||||||
return '\n'.join(ans)
|
return '\n'.join(ans)
|
||||||
|
|
||||||
|
class File(object):
|
||||||
|
|
||||||
|
def __init__(self, skel, skeleton, text, first_aid, sections):
|
||||||
|
self.name = 'part%04d'%skel.file_number
|
||||||
|
self.skeleton, self.text, self.first_aid = skeleton, text, first_aid
|
||||||
|
self.sections = sections
|
||||||
|
|
||||||
|
def dump(self, ddir):
|
||||||
|
with open(os.path.join(ddir, self.name + '.html'), 'wb') as f:
|
||||||
|
f.write(self.text)
|
||||||
|
base = os.path.join(ddir, self.name + '-parts')
|
||||||
|
os.mkdir(base)
|
||||||
|
with CurrentDir(base):
|
||||||
|
with open('skeleton.html', 'wb') as f:
|
||||||
|
f.write(self.skeleton)
|
||||||
|
for i, text in enumerate(self.sections):
|
||||||
|
with open('sect-%04d.html'%i, 'wb') as f:
|
||||||
|
f.write(text)
|
||||||
|
|
||||||
class MOBIFile(object):
|
class MOBIFile(object):
|
||||||
|
|
||||||
@ -67,6 +86,7 @@ class MOBIFile(object):
|
|||||||
self.extract_resources()
|
self.extract_resources()
|
||||||
self.read_fdst()
|
self.read_fdst()
|
||||||
self.read_indices()
|
self.read_indices()
|
||||||
|
self.build_files()
|
||||||
|
|
||||||
def print_header(self, f=sys.stdout):
|
def print_header(self, f=sys.stdout):
|
||||||
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
||||||
@ -95,6 +115,26 @@ class MOBIFile(object):
|
|||||||
self.ncx_index = NCXIndex(self.header.primary_index_record,
|
self.ncx_index = NCXIndex(self.header.primary_index_record,
|
||||||
self.mf.records, self.header.encoding)
|
self.mf.records, self.header.encoding)
|
||||||
|
|
||||||
|
def build_files(self):
|
||||||
|
text = self.raw_text
|
||||||
|
self.files = []
|
||||||
|
for skel in self.skel_index.records:
|
||||||
|
sects = [x for x in self.sect_index.records if x.file_number
|
||||||
|
== skel.file_number]
|
||||||
|
skeleton = text[skel.start_position:skel.start_position+skel.length]
|
||||||
|
ftext = skeleton
|
||||||
|
first_aid = sects[0].toc_text
|
||||||
|
sections = []
|
||||||
|
|
||||||
|
for sect in sects:
|
||||||
|
start_pos = skel.start_position + skel.length + sect.start_pos
|
||||||
|
sect_text = text[start_pos:start_pos+sect.length]
|
||||||
|
insert_pos = sect.insert_pos - skel.start_position
|
||||||
|
ftext = ftext[:insert_pos] + sect_text + ftext[insert_pos:]
|
||||||
|
sections.append(sect_text)
|
||||||
|
|
||||||
|
self.files.append(File(skel, skeleton, ftext, first_aid, sections))
|
||||||
|
|
||||||
def extract_resources(self):
|
def extract_resources(self):
|
||||||
self.resource_map = []
|
self.resource_map = []
|
||||||
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
||||||
@ -141,7 +181,7 @@ def inspect_mobi(mobi_file, ddir):
|
|||||||
with open(alltext, 'wb') as of:
|
with open(alltext, 'wb') as of:
|
||||||
of.write(f.raw_text)
|
of.write(f.raw_text)
|
||||||
|
|
||||||
for x in ('text_records', 'images', 'fonts', 'binary'):
|
for x in ('text_records', 'images', 'fonts', 'binary', 'files'):
|
||||||
os.mkdir(os.path.join(ddir, x))
|
os.mkdir(os.path.join(ddir, x))
|
||||||
|
|
||||||
for rec in f.text_records:
|
for rec in f.text_records:
|
||||||
@ -164,3 +204,6 @@ def inspect_mobi(mobi_file, ddir):
|
|||||||
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
|
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
|
||||||
fo.write(str(f.ncx_index).encode('utf-8'))
|
fo.write(str(f.ncx_index).encode('utf-8'))
|
||||||
|
|
||||||
|
for part in f.files:
|
||||||
|
part.dump(os.path.join(ddir, 'files'))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user