mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 debug: Dump skeleton structure
This commit is contained in:
parent
27a855b477
commit
02920d2e7d
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import sys, os, imghdr, struct
|
||||
from itertools import izip
|
||||
|
||||
from calibre import CurrentDir
|
||||
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
|
||||
from calibre.ebooks.mobi.utils import read_font_record
|
||||
@ -43,6 +44,24 @@ class FDST(object):
|
||||
|
||||
return '\n'.join(ans)
|
||||
|
||||
class File(object):
|
||||
|
||||
def __init__(self, skel, skeleton, text, first_aid, sections):
|
||||
self.name = 'part%04d'%skel.file_number
|
||||
self.skeleton, self.text, self.first_aid = skeleton, text, first_aid
|
||||
self.sections = sections
|
||||
|
||||
def dump(self, ddir):
|
||||
with open(os.path.join(ddir, self.name + '.html'), 'wb') as f:
|
||||
f.write(self.text)
|
||||
base = os.path.join(ddir, self.name + '-parts')
|
||||
os.mkdir(base)
|
||||
with CurrentDir(base):
|
||||
with open('skeleton.html', 'wb') as f:
|
||||
f.write(self.skeleton)
|
||||
for i, text in enumerate(self.sections):
|
||||
with open('sect-%04d.html'%i, 'wb') as f:
|
||||
f.write(text)
|
||||
|
||||
class MOBIFile(object):
|
||||
|
||||
@ -67,6 +86,7 @@ class MOBIFile(object):
|
||||
self.extract_resources()
|
||||
self.read_fdst()
|
||||
self.read_indices()
|
||||
self.build_files()
|
||||
|
||||
def print_header(self, f=sys.stdout):
|
||||
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
||||
@ -95,6 +115,26 @@ class MOBIFile(object):
|
||||
self.ncx_index = NCXIndex(self.header.primary_index_record,
|
||||
self.mf.records, self.header.encoding)
|
||||
|
||||
def build_files(self):
|
||||
text = self.raw_text
|
||||
self.files = []
|
||||
for skel in self.skel_index.records:
|
||||
sects = [x for x in self.sect_index.records if x.file_number
|
||||
== skel.file_number]
|
||||
skeleton = text[skel.start_position:skel.start_position+skel.length]
|
||||
ftext = skeleton
|
||||
first_aid = sects[0].toc_text
|
||||
sections = []
|
||||
|
||||
for sect in sects:
|
||||
start_pos = skel.start_position + skel.length + sect.start_pos
|
||||
sect_text = text[start_pos:start_pos+sect.length]
|
||||
insert_pos = sect.insert_pos - skel.start_position
|
||||
ftext = ftext[:insert_pos] + sect_text + ftext[insert_pos:]
|
||||
sections.append(sect_text)
|
||||
|
||||
self.files.append(File(skel, skeleton, ftext, first_aid, sections))
|
||||
|
||||
def extract_resources(self):
|
||||
self.resource_map = []
|
||||
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
||||
@ -141,7 +181,7 @@ def inspect_mobi(mobi_file, ddir):
|
||||
with open(alltext, 'wb') as of:
|
||||
of.write(f.raw_text)
|
||||
|
||||
for x in ('text_records', 'images', 'fonts', 'binary'):
|
||||
for x in ('text_records', 'images', 'fonts', 'binary', 'files'):
|
||||
os.mkdir(os.path.join(ddir, x))
|
||||
|
||||
for rec in f.text_records:
|
||||
@ -164,3 +204,6 @@ def inspect_mobi(mobi_file, ddir):
|
||||
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
|
||||
fo.write(str(f.ncx_index).encode('utf-8'))
|
||||
|
||||
for part in f.files:
|
||||
part.dump(os.path.join(ddir, 'files'))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user