mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
When dumping joint MOBI files, properly extract all image/font resources and only put resources upto the KF8 boundary in the mobi6 part
This commit is contained in:
parent
843624f6c2
commit
f0a890e8c1
@ -383,6 +383,13 @@ class MOBIHeader(object): # {{{
|
||||
if hasattr(self, x) and getattr(self, x) != NULL_INDEX:
|
||||
setattr(self, x, self.header_offset+getattr(self, x))
|
||||
|
||||
# Try to find the first non-text record
|
||||
self.first_resource_record = offset + 1 + self.number_of_text_records # Default to first record after all text records
|
||||
pointer = min(getattr(self, 'first_non_book_record', NULL_INDEX), getattr(self, 'first_image_index', NULL_INDEX))
|
||||
if pointer != NULL_INDEX:
|
||||
self.first_resource_record = max(pointer, self.first_resource_record)
|
||||
self.last_resource_record = NULL_INDEX
|
||||
|
||||
if self.has_exth:
|
||||
self.exth_offset = 16 + self.length
|
||||
|
||||
@ -391,6 +398,10 @@ class MOBIHeader(object): # {{{
|
||||
self.end_of_exth = self.exth_offset + self.exth.length
|
||||
self.bytes_after_exth = self.raw[self.end_of_exth:self.fullname_offset]
|
||||
|
||||
if self.exth.kf8_header_index is not None and offset == 0:
|
||||
# MOBI 6 header in a joint file, adjust self.last_resource_record
|
||||
self.last_resource_record = self.exth.kf8_header_index - 2
|
||||
|
||||
def __str__(self):
|
||||
ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20]
|
||||
|
||||
|
@ -41,7 +41,7 @@ class SecondaryIndexHeader(object): # {{{
|
||||
def __init__(self, record):
|
||||
self.record = record
|
||||
raw = self.record.raw
|
||||
#open('/t/index_header.bin', 'wb').write(raw)
|
||||
# open('/t/index_header.bin', 'wb').write(raw)
|
||||
if raw[:4] != b'INDX':
|
||||
raise ValueError('Invalid Secondary Index Record')
|
||||
self.header_length, = struct.unpack('>I', raw[4:8])
|
||||
@ -136,7 +136,7 @@ class IndexHeader(object): # {{{
|
||||
def __init__(self, record):
|
||||
self.record = record
|
||||
raw = self.record.raw
|
||||
#open('/t/index_header.bin', 'wb').write(raw)
|
||||
# open('/t/index_header.bin', 'wb').write(raw)
|
||||
if raw[:4] != b'INDX':
|
||||
raise ValueError('Invalid Primary Index Record')
|
||||
|
||||
@ -492,7 +492,7 @@ class BinaryRecord(object): # {{{
|
||||
sig = self.raw[:4]
|
||||
name = '%06d'%idx
|
||||
if sig in {b'FCIS', b'FLIS', b'SRCS', b'DATP', b'RESC', b'BOUN',
|
||||
b'FDST', b'AUDI', b'VIDE',}:
|
||||
b'FDST', b'AUDI', b'VIDE', b'CRES', b'CONT', b'CMET'}:
|
||||
name += '-' + sig.decode('ascii')
|
||||
elif sig == b'\xe9\x8e\r\n':
|
||||
name += '-' + 'EOF'
|
||||
@ -743,17 +743,14 @@ class MOBIFile(object): # {{{
|
||||
self.indexing_record_nums |= set(xrange(sir+1, sir+1+numi))
|
||||
|
||||
ntr = self.mobi_header.number_of_text_records
|
||||
fntbr = self.mobi_header.first_non_book_record
|
||||
fii = self.mobi_header.first_image_index
|
||||
if fntbr == NULL_INDEX:
|
||||
fntbr = len(self.records)
|
||||
self.text_records = [TextRecord(r, self.records[r],
|
||||
self.mobi_header.extra_data_flags, mf.decompress6) for r in xrange(1,
|
||||
min(len(self.records), ntr+1))]
|
||||
self.image_records, self.binary_records = [], []
|
||||
self.font_records = []
|
||||
image_index = 0
|
||||
for i in xrange(fntbr, len(self.records)):
|
||||
for i in xrange(self.mobi_header.first_resource_record, min(self.mobi_header.last_resource_record, len(self.records))):
|
||||
if i in self.indexing_record_nums or i in self.huffman_record_nums:
|
||||
continue
|
||||
image_index += 1
|
||||
@ -761,7 +758,7 @@ class MOBIFile(object): # {{{
|
||||
fmt = None
|
||||
if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
|
||||
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
||||
b'AUDI', b'VIDE', b'FONT'}:
|
||||
b'AUDI', b'VIDE', b'FONT', b'CRES', b'CONT', b'CMET'}:
|
||||
try:
|
||||
fmt = what(None, r.raw)
|
||||
except:
|
||||
@ -832,7 +829,6 @@ def inspect_mobi(mobi_file, ddir):
|
||||
rec.dump(tdir)
|
||||
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -73,12 +73,11 @@ class MOBIFile(object):
|
||||
h, h8 = mf.mobi_header, mf.mobi8_header
|
||||
first_text_record = 1
|
||||
offset = 0
|
||||
res_end = len(mf.records)
|
||||
self.resource_ranges = [(h8.first_resource_record, h8.last_resource_record, h8.first_image_index)]
|
||||
if mf.kf8_type == 'joint':
|
||||
offset = h.exth.kf8_header_index
|
||||
res_end = offset - 1
|
||||
self.resource_ranges.insert(0, (h.first_resource_record, h.last_resource_record, h.first_image_index))
|
||||
|
||||
self.resource_records = mf.records[h.first_non_book_record:res_end]
|
||||
self.text_records = [TextRecord(i, r, h8.extra_data_flags,
|
||||
mf.decompress8) for i, r in
|
||||
enumerate(mf.records[first_text_record+offset:
|
||||
@ -86,7 +85,7 @@ class MOBIFile(object):
|
||||
|
||||
self.raw_text = b''.join(r.raw for r in self.text_records)
|
||||
self.header = self.mf.mobi8_header
|
||||
self.extract_resources()
|
||||
self.extract_resources(mf.records)
|
||||
self.read_fdst()
|
||||
self.read_indices()
|
||||
self.build_files()
|
||||
@ -151,13 +150,21 @@ class MOBIFile(object):
|
||||
with open(os.path.join(ddir, 'flow%04d.txt'%i), 'wb') as f:
|
||||
f.write(raw)
|
||||
|
||||
def extract_resources(self):
|
||||
def extract_resources(self, records):
|
||||
self.resource_map = []
|
||||
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
||||
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
||||
b'AUDI', b'VIDE'}
|
||||
b'AUDI', b'VIDE', b'CRES', b'CONT', b'CMET'}
|
||||
|
||||
for i, rec in enumerate(self.resource_records):
|
||||
for i, rec in enumerate(records):
|
||||
for (l, r, offset) in self.resource_ranges:
|
||||
if l <= i <= r:
|
||||
resource_index = i + 1
|
||||
if offset is not None and resource_index >= offset:
|
||||
resource_index -= offset
|
||||
break
|
||||
else:
|
||||
continue
|
||||
sig = rec.raw[:4]
|
||||
payload = rec.raw
|
||||
ext = 'dat'
|
||||
@ -185,7 +192,7 @@ class MOBIFile(object):
|
||||
elif sig in known_types:
|
||||
suffix = '-' + sig.decode('ascii')
|
||||
|
||||
self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext),
|
||||
self.resource_map.append(('%s/%06d%s.%s'%(prefix, resource_index, suffix, ext),
|
||||
payload))
|
||||
|
||||
def read_tbs(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user