diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py index 788ca3ed0a..803d5e5667 100644 --- a/src/calibre/ebooks/mobi/debug/mobi8.py +++ b/src/calibre/ebooks/mobi/debug/mobi8.py @@ -189,7 +189,7 @@ class MOBIFile(object): def read_tbs(self): from calibre.ebooks.mobi.writer8.tbs import (Entry, DOC, collect_indexing_data, encode_strands_as_sequences, - sequences_to_bytes) + sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex) entry_map = [] for index in self.ncx_index: vals = list(index)[:-1] + [None, None, None, None] @@ -206,6 +206,14 @@ class MOBIFile(object): the start of the text record. ''')] + + tbs_type = 8 + try: + calculate_all_tbs(indexing_data) + except NegativeStrandIndex: + calculate_all_tbs(indexing_data, tbs_type=5) + tbs_type = 5 + for i, strands in enumerate(indexing_data): rec = self.text_records[i] tbs_bytes = rec.trailing_data.get('indexing', b'') @@ -236,8 +244,12 @@ class MOBIFile(object): desc.append('Sequence #%d: %r %r'%(j, seq[0], seq[1])) if tbs_bytes: desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes)) - calculated_sequences = encode_strands_as_sequences(strands) - calculated_bytes = sequences_to_bytes(calculated_sequences) + calculated_sequences = encode_strands_as_sequences(strands, + tbs_type=tbs_type) + try: + calculated_bytes = sequences_to_bytes(calculated_sequences) + except: + calculated_bytes = b'failed to calculate tbs bytes' if calculated_bytes != otbs: print ('WARNING: TBS mismatch for record %d'%i) desc.append('WARNING: TBS mismatch!') diff --git a/src/calibre/ebooks/mobi/writer8/tbs.py b/src/calibre/ebooks/mobi/writer8/tbs.py index 377343629e..0a522c4301 100644 --- a/src/calibre/ebooks/mobi/writer8/tbs.py +++ b/src/calibre/ebooks/mobi/writer8/tbs.py @@ -106,6 +106,9 @@ def collect_indexing_data(entries, text_record_lengths): return data +class NegativeStrandIndex(Exception): + pass + def encode_strands_as_sequences(strands, tbs_type=8): ''' Encode the list of strands for a single text record into a list of sequences, ready to be converted into TBS bytes. ''' @@ -144,10 +147,16 @@ def encode_strands_as_sequences(strands, tbs_type=8): index = entries[0].index - (entries[0].parent or 0) if ans and not strand_seqs: # We are in the second or later strands, so we need to use a - # special flag and index value. The index value if the entry + # special flag and index value. The index value is the entry # index - the index of the last entry in the previous strand. - extra[0b1000] = True index = last_index - entries[0].index + if index < 0: + if tbs_type == 5: + index = -index + else: + raise NegativeStrandIndex() + else: + extra[0b1000] = True last_index = entries[-1].index strand_seqs.append((index, extra)) @@ -167,20 +176,31 @@ def sequences_to_bytes(sequences): flag_size = 3 for val, extra in sequences: ans.append(encode_tbs(val, extra, flag_size)) - flag_size = 4 # only the first seuqence has flag size 3 as all + flag_size = 4 # only the first sequence has flag size 3 as all # subsequent sequences could need the 0b1000 flag return b''.join(ans) +def calculate_all_tbs(indexing_data, tbs_type=8): + rmap = {} + for i, strands in enumerate(indexing_data): + sequences = encode_strands_as_sequences(strands, tbs_type=tbs_type) + tbs_bytes = sequences_to_bytes(sequences) + rmap[i+1] = tbs_bytes + return rmap + def apply_trailing_byte_sequences(index_table, records, text_record_lengths): entries = tuple(Entry(r['index'], r['offset'], r['length'], r['depth'], r.get('parent', None), r.get('first_child', None), r.get('last_child', None), r['label'], None, None, None, None) for r in index_table) indexing_data = collect_indexing_data(entries, text_record_lengths) - for i, strands in enumerate(indexing_data): - sequences = encode_strands_as_sequences(strands) - tbs_bytes = sequences_to_bytes(sequences) - records[i+1] += encode_trailing_data(tbs_bytes) + try: + rmap = calculate_all_tbs(indexing_data) + except NegativeStrandIndex: + rmap = calculate_all_tbs(indexing_data, tbs_type=5) + + for i, tbs_bytes in rmap.iteritems(): + records[i] += encode_trailing_data(tbs_bytes) return True