diff --git a/src/calibre/ebooks/mobi/debug/headers.py b/src/calibre/ebooks/mobi/debug/headers.py index c672fd0141..c941c42024 100644 --- a/src/calibre/ebooks/mobi/debug/headers.py +++ b/src/calibre/ebooks/mobi/debug/headers.py @@ -383,6 +383,13 @@ class MOBIHeader(object): # {{{ if hasattr(self, x) and getattr(self, x) != NULL_INDEX: setattr(self, x, self.header_offset+getattr(self, x)) + # Try to find the first non-text record + self.first_resource_record = offset + 1 + self.number_of_text_records # Default to first record after all text records + pointer = min(getattr(self, 'first_non_book_record', NULL_INDEX), getattr(self, 'first_image_index', NULL_INDEX)) + if pointer != NULL_INDEX: + self.first_resource_record = max(pointer, self.first_resource_record) + self.last_resource_record = NULL_INDEX + if self.has_exth: self.exth_offset = 16 + self.length @@ -391,6 +398,10 @@ class MOBIHeader(object): # {{{ self.end_of_exth = self.exth_offset + self.exth.length self.bytes_after_exth = self.raw[self.end_of_exth:self.fullname_offset] + if self.exth.kf8_header_index is not None and offset == 0: + # MOBI 6 header in a joint file, adjust self.last_resource_record + self.last_resource_record = self.exth.kf8_header_index - 2 + def __str__(self): ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20] diff --git a/src/calibre/ebooks/mobi/debug/mobi6.py b/src/calibre/ebooks/mobi/debug/mobi6.py index 4bc0238a58..7b81d06118 100644 --- a/src/calibre/ebooks/mobi/debug/mobi6.py +++ b/src/calibre/ebooks/mobi/debug/mobi6.py @@ -41,7 +41,7 @@ class SecondaryIndexHeader(object): # {{{ def __init__(self, record): self.record = record raw = self.record.raw - #open('/t/index_header.bin', 'wb').write(raw) + # open('/t/index_header.bin', 'wb').write(raw) if raw[:4] != b'INDX': raise ValueError('Invalid Secondary Index Record') self.header_length, = struct.unpack('>I', raw[4:8]) @@ -136,7 +136,7 @@ class IndexHeader(object): # {{{ def __init__(self, record): self.record = record raw = self.record.raw - #open('/t/index_header.bin', 'wb').write(raw) + # open('/t/index_header.bin', 'wb').write(raw) if raw[:4] != b'INDX': raise ValueError('Invalid Primary Index Record') @@ -492,7 +492,7 @@ class BinaryRecord(object): # {{{ sig = self.raw[:4] name = '%06d'%idx if sig in {b'FCIS', b'FLIS', b'SRCS', b'DATP', b'RESC', b'BOUN', - b'FDST', b'AUDI', b'VIDE',}: + b'FDST', b'AUDI', b'VIDE', b'CRES', b'CONT', b'CMET'}: name += '-' + sig.decode('ascii') elif sig == b'\xe9\x8e\r\n': name += '-' + 'EOF' @@ -743,17 +743,14 @@ class MOBIFile(object): # {{{ self.indexing_record_nums |= set(xrange(sir+1, sir+1+numi)) ntr = self.mobi_header.number_of_text_records - fntbr = self.mobi_header.first_non_book_record fii = self.mobi_header.first_image_index - if fntbr == NULL_INDEX: - fntbr = len(self.records) self.text_records = [TextRecord(r, self.records[r], self.mobi_header.extra_data_flags, mf.decompress6) for r in xrange(1, min(len(self.records), ntr+1))] self.image_records, self.binary_records = [], [] self.font_records = [] image_index = 0 - for i in xrange(fntbr, len(self.records)): + for i in xrange(self.mobi_header.first_resource_record, min(self.mobi_header.last_resource_record, len(self.records))): if i in self.indexing_record_nums or i in self.huffman_record_nums: continue image_index += 1 @@ -761,7 +758,7 @@ class MOBIFile(object): # {{{ fmt = None if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', - b'AUDI', b'VIDE', b'FONT'}: + b'AUDI', b'VIDE', b'FONT', b'CRES', b'CONT', b'CMET'}: try: fmt = what(None, r.raw) except: @@ -832,7 +829,6 @@ def inspect_mobi(mobi_file, ddir): rec.dump(tdir) - # }}} diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py index a180b11ad0..3a50a1dcf9 100644 --- a/src/calibre/ebooks/mobi/debug/mobi8.py +++ b/src/calibre/ebooks/mobi/debug/mobi8.py @@ -73,12 +73,11 @@ class MOBIFile(object): h, h8 = mf.mobi_header, mf.mobi8_header first_text_record = 1 offset = 0 - res_end = len(mf.records) + self.resource_ranges = [(h8.first_resource_record, h8.last_resource_record, h8.first_image_index)] if mf.kf8_type == 'joint': offset = h.exth.kf8_header_index - res_end = offset - 1 + self.resource_ranges.insert(0, (h.first_resource_record, h.last_resource_record, h.first_image_index)) - self.resource_records = mf.records[h.first_non_book_record:res_end] self.text_records = [TextRecord(i, r, h8.extra_data_flags, mf.decompress8) for i, r in enumerate(mf.records[first_text_record+offset: @@ -86,7 +85,7 @@ class MOBIFile(object): self.raw_text = b''.join(r.raw for r in self.text_records) self.header = self.mf.mobi8_header - self.extract_resources() + self.extract_resources(mf.records) self.read_fdst() self.read_indices() self.build_files() @@ -151,13 +150,21 @@ class MOBIFile(object): with open(os.path.join(ddir, 'flow%04d.txt'%i), 'wb') as f: f.write(raw) - def extract_resources(self): + def extract_resources(self, records): self.resource_map = [] known_types = {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', - b'AUDI', b'VIDE'} + b'AUDI', b'VIDE', b'CRES', b'CONT', b'CMET'} - for i, rec in enumerate(self.resource_records): + for i, rec in enumerate(records): + for (l, r, offset) in self.resource_ranges: + if l <= i <= r: + resource_index = i + 1 + if offset is not None and resource_index >= offset: + resource_index -= offset + break + else: + continue sig = rec.raw[:4] payload = rec.raw ext = 'dat' @@ -185,7 +192,7 @@ class MOBIFile(object): elif sig in known_types: suffix = '-' + sig.decode('ascii') - self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext), + self.resource_map.append(('%s/%06d%s.%s'%(prefix, resource_index, suffix, ext), payload)) def read_tbs(self):