mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
When dumping joint MOBI files, properly extract all image/font resources and only put resources upto the KF8 boundary in the mobi6 part
This commit is contained in:
parent
843624f6c2
commit
f0a890e8c1
@ -383,6 +383,13 @@ class MOBIHeader(object): # {{{
|
|||||||
if hasattr(self, x) and getattr(self, x) != NULL_INDEX:
|
if hasattr(self, x) and getattr(self, x) != NULL_INDEX:
|
||||||
setattr(self, x, self.header_offset+getattr(self, x))
|
setattr(self, x, self.header_offset+getattr(self, x))
|
||||||
|
|
||||||
|
# Try to find the first non-text record
|
||||||
|
self.first_resource_record = offset + 1 + self.number_of_text_records # Default to first record after all text records
|
||||||
|
pointer = min(getattr(self, 'first_non_book_record', NULL_INDEX), getattr(self, 'first_image_index', NULL_INDEX))
|
||||||
|
if pointer != NULL_INDEX:
|
||||||
|
self.first_resource_record = max(pointer, self.first_resource_record)
|
||||||
|
self.last_resource_record = NULL_INDEX
|
||||||
|
|
||||||
if self.has_exth:
|
if self.has_exth:
|
||||||
self.exth_offset = 16 + self.length
|
self.exth_offset = 16 + self.length
|
||||||
|
|
||||||
@ -391,6 +398,10 @@ class MOBIHeader(object): # {{{
|
|||||||
self.end_of_exth = self.exth_offset + self.exth.length
|
self.end_of_exth = self.exth_offset + self.exth.length
|
||||||
self.bytes_after_exth = self.raw[self.end_of_exth:self.fullname_offset]
|
self.bytes_after_exth = self.raw[self.end_of_exth:self.fullname_offset]
|
||||||
|
|
||||||
|
if self.exth.kf8_header_index is not None and offset == 0:
|
||||||
|
# MOBI 6 header in a joint file, adjust self.last_resource_record
|
||||||
|
self.last_resource_record = self.exth.kf8_header_index - 2
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20]
|
ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20]
|
||||||
|
|
||||||
|
@ -492,7 +492,7 @@ class BinaryRecord(object): # {{{
|
|||||||
sig = self.raw[:4]
|
sig = self.raw[:4]
|
||||||
name = '%06d'%idx
|
name = '%06d'%idx
|
||||||
if sig in {b'FCIS', b'FLIS', b'SRCS', b'DATP', b'RESC', b'BOUN',
|
if sig in {b'FCIS', b'FLIS', b'SRCS', b'DATP', b'RESC', b'BOUN',
|
||||||
b'FDST', b'AUDI', b'VIDE',}:
|
b'FDST', b'AUDI', b'VIDE', b'CRES', b'CONT', b'CMET'}:
|
||||||
name += '-' + sig.decode('ascii')
|
name += '-' + sig.decode('ascii')
|
||||||
elif sig == b'\xe9\x8e\r\n':
|
elif sig == b'\xe9\x8e\r\n':
|
||||||
name += '-' + 'EOF'
|
name += '-' + 'EOF'
|
||||||
@ -743,17 +743,14 @@ class MOBIFile(object): # {{{
|
|||||||
self.indexing_record_nums |= set(xrange(sir+1, sir+1+numi))
|
self.indexing_record_nums |= set(xrange(sir+1, sir+1+numi))
|
||||||
|
|
||||||
ntr = self.mobi_header.number_of_text_records
|
ntr = self.mobi_header.number_of_text_records
|
||||||
fntbr = self.mobi_header.first_non_book_record
|
|
||||||
fii = self.mobi_header.first_image_index
|
fii = self.mobi_header.first_image_index
|
||||||
if fntbr == NULL_INDEX:
|
|
||||||
fntbr = len(self.records)
|
|
||||||
self.text_records = [TextRecord(r, self.records[r],
|
self.text_records = [TextRecord(r, self.records[r],
|
||||||
self.mobi_header.extra_data_flags, mf.decompress6) for r in xrange(1,
|
self.mobi_header.extra_data_flags, mf.decompress6) for r in xrange(1,
|
||||||
min(len(self.records), ntr+1))]
|
min(len(self.records), ntr+1))]
|
||||||
self.image_records, self.binary_records = [], []
|
self.image_records, self.binary_records = [], []
|
||||||
self.font_records = []
|
self.font_records = []
|
||||||
image_index = 0
|
image_index = 0
|
||||||
for i in xrange(fntbr, len(self.records)):
|
for i in xrange(self.mobi_header.first_resource_record, min(self.mobi_header.last_resource_record, len(self.records))):
|
||||||
if i in self.indexing_record_nums or i in self.huffman_record_nums:
|
if i in self.indexing_record_nums or i in self.huffman_record_nums:
|
||||||
continue
|
continue
|
||||||
image_index += 1
|
image_index += 1
|
||||||
@ -761,7 +758,7 @@ class MOBIFile(object): # {{{
|
|||||||
fmt = None
|
fmt = None
|
||||||
if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
|
if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
|
||||||
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
||||||
b'AUDI', b'VIDE', b'FONT'}:
|
b'AUDI', b'VIDE', b'FONT', b'CRES', b'CONT', b'CMET'}:
|
||||||
try:
|
try:
|
||||||
fmt = what(None, r.raw)
|
fmt = what(None, r.raw)
|
||||||
except:
|
except:
|
||||||
@ -832,7 +829,6 @@ def inspect_mobi(mobi_file, ddir):
|
|||||||
rec.dump(tdir)
|
rec.dump(tdir)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
@ -73,12 +73,11 @@ class MOBIFile(object):
|
|||||||
h, h8 = mf.mobi_header, mf.mobi8_header
|
h, h8 = mf.mobi_header, mf.mobi8_header
|
||||||
first_text_record = 1
|
first_text_record = 1
|
||||||
offset = 0
|
offset = 0
|
||||||
res_end = len(mf.records)
|
self.resource_ranges = [(h8.first_resource_record, h8.last_resource_record, h8.first_image_index)]
|
||||||
if mf.kf8_type == 'joint':
|
if mf.kf8_type == 'joint':
|
||||||
offset = h.exth.kf8_header_index
|
offset = h.exth.kf8_header_index
|
||||||
res_end = offset - 1
|
self.resource_ranges.insert(0, (h.first_resource_record, h.last_resource_record, h.first_image_index))
|
||||||
|
|
||||||
self.resource_records = mf.records[h.first_non_book_record:res_end]
|
|
||||||
self.text_records = [TextRecord(i, r, h8.extra_data_flags,
|
self.text_records = [TextRecord(i, r, h8.extra_data_flags,
|
||||||
mf.decompress8) for i, r in
|
mf.decompress8) for i, r in
|
||||||
enumerate(mf.records[first_text_record+offset:
|
enumerate(mf.records[first_text_record+offset:
|
||||||
@ -86,7 +85,7 @@ class MOBIFile(object):
|
|||||||
|
|
||||||
self.raw_text = b''.join(r.raw for r in self.text_records)
|
self.raw_text = b''.join(r.raw for r in self.text_records)
|
||||||
self.header = self.mf.mobi8_header
|
self.header = self.mf.mobi8_header
|
||||||
self.extract_resources()
|
self.extract_resources(mf.records)
|
||||||
self.read_fdst()
|
self.read_fdst()
|
||||||
self.read_indices()
|
self.read_indices()
|
||||||
self.build_files()
|
self.build_files()
|
||||||
@ -151,13 +150,21 @@ class MOBIFile(object):
|
|||||||
with open(os.path.join(ddir, 'flow%04d.txt'%i), 'wb') as f:
|
with open(os.path.join(ddir, 'flow%04d.txt'%i), 'wb') as f:
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
|
|
||||||
def extract_resources(self):
|
def extract_resources(self, records):
|
||||||
self.resource_map = []
|
self.resource_map = []
|
||||||
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
||||||
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
||||||
b'AUDI', b'VIDE'}
|
b'AUDI', b'VIDE', b'CRES', b'CONT', b'CMET'}
|
||||||
|
|
||||||
for i, rec in enumerate(self.resource_records):
|
for i, rec in enumerate(records):
|
||||||
|
for (l, r, offset) in self.resource_ranges:
|
||||||
|
if l <= i <= r:
|
||||||
|
resource_index = i + 1
|
||||||
|
if offset is not None and resource_index >= offset:
|
||||||
|
resource_index -= offset
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
sig = rec.raw[:4]
|
sig = rec.raw[:4]
|
||||||
payload = rec.raw
|
payload = rec.raw
|
||||||
ext = 'dat'
|
ext = 'dat'
|
||||||
@ -185,7 +192,7 @@ class MOBIFile(object):
|
|||||||
elif sig in known_types:
|
elif sig in known_types:
|
||||||
suffix = '-' + sig.decode('ascii')
|
suffix = '-' + sig.decode('ascii')
|
||||||
|
|
||||||
self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext),
|
self.resource_map.append(('%s/%06d%s.%s'%(prefix, resource_index, suffix, ext),
|
||||||
payload))
|
payload))
|
||||||
|
|
||||||
def read_tbs(self):
|
def read_tbs(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user