KF8 Output: Handle the case of many very small entries in the TOC. Fixes #1000493 (ePub > KF8 crash during conversion)

This commit is contained in:
Kovid Goyal 2012-05-17 12:01:19 +05:30
parent 37d3895dc4
commit 34263c3ac0
2 changed files with 42 additions and 10 deletions

View File

@ -189,7 +189,7 @@ class MOBIFile(object):
def read_tbs(self): def read_tbs(self):
from calibre.ebooks.mobi.writer8.tbs import (Entry, DOC, from calibre.ebooks.mobi.writer8.tbs import (Entry, DOC,
collect_indexing_data, encode_strands_as_sequences, collect_indexing_data, encode_strands_as_sequences,
sequences_to_bytes) sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex)
entry_map = [] entry_map = []
for index in self.ncx_index: for index in self.ncx_index:
vals = list(index)[:-1] + [None, None, None, None] vals = list(index)[:-1] + [None, None, None, None]
@ -206,6 +206,14 @@ class MOBIFile(object):
the start of the text record. the start of the text record.
''')] ''')]
tbs_type = 8
try:
calculate_all_tbs(indexing_data)
except NegativeStrandIndex:
calculate_all_tbs(indexing_data, tbs_type=5)
tbs_type = 5
for i, strands in enumerate(indexing_data): for i, strands in enumerate(indexing_data):
rec = self.text_records[i] rec = self.text_records[i]
tbs_bytes = rec.trailing_data.get('indexing', b'') tbs_bytes = rec.trailing_data.get('indexing', b'')
@ -236,8 +244,12 @@ class MOBIFile(object):
desc.append('Sequence #%d: %r %r'%(j, seq[0], seq[1])) desc.append('Sequence #%d: %r %r'%(j, seq[0], seq[1]))
if tbs_bytes: if tbs_bytes:
desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes)) desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes))
calculated_sequences = encode_strands_as_sequences(strands) calculated_sequences = encode_strands_as_sequences(strands,
tbs_type=tbs_type)
try:
calculated_bytes = sequences_to_bytes(calculated_sequences) calculated_bytes = sequences_to_bytes(calculated_sequences)
except:
calculated_bytes = b'failed to calculate tbs bytes'
if calculated_bytes != otbs: if calculated_bytes != otbs:
print ('WARNING: TBS mismatch for record %d'%i) print ('WARNING: TBS mismatch for record %d'%i)
desc.append('WARNING: TBS mismatch!') desc.append('WARNING: TBS mismatch!')

View File

@ -106,6 +106,9 @@ def collect_indexing_data(entries, text_record_lengths):
return data return data
class NegativeStrandIndex(Exception):
pass
def encode_strands_as_sequences(strands, tbs_type=8): def encode_strands_as_sequences(strands, tbs_type=8):
''' Encode the list of strands for a single text record into a list of ''' Encode the list of strands for a single text record into a list of
sequences, ready to be converted into TBS bytes. ''' sequences, ready to be converted into TBS bytes. '''
@ -144,10 +147,16 @@ def encode_strands_as_sequences(strands, tbs_type=8):
index = entries[0].index - (entries[0].parent or 0) index = entries[0].index - (entries[0].parent or 0)
if ans and not strand_seqs: if ans and not strand_seqs:
# We are in the second or later strands, so we need to use a # We are in the second or later strands, so we need to use a
# special flag and index value. The index value if the entry # special flag and index value. The index value is the entry
# index - the index of the last entry in the previous strand. # index - the index of the last entry in the previous strand.
extra[0b1000] = True
index = last_index - entries[0].index index = last_index - entries[0].index
if index < 0:
if tbs_type == 5:
index = -index
else:
raise NegativeStrandIndex()
else:
extra[0b1000] = True
last_index = entries[-1].index last_index = entries[-1].index
strand_seqs.append((index, extra)) strand_seqs.append((index, extra))
@ -167,20 +176,31 @@ def sequences_to_bytes(sequences):
flag_size = 3 flag_size = 3
for val, extra in sequences: for val, extra in sequences:
ans.append(encode_tbs(val, extra, flag_size)) ans.append(encode_tbs(val, extra, flag_size))
flag_size = 4 # only the first seuqence has flag size 3 as all flag_size = 4 # only the first sequence has flag size 3 as all
# subsequent sequences could need the 0b1000 flag # subsequent sequences could need the 0b1000 flag
return b''.join(ans) return b''.join(ans)
def calculate_all_tbs(indexing_data, tbs_type=8):
rmap = {}
for i, strands in enumerate(indexing_data):
sequences = encode_strands_as_sequences(strands, tbs_type=tbs_type)
tbs_bytes = sequences_to_bytes(sequences)
rmap[i+1] = tbs_bytes
return rmap
def apply_trailing_byte_sequences(index_table, records, text_record_lengths): def apply_trailing_byte_sequences(index_table, records, text_record_lengths):
entries = tuple(Entry(r['index'], r['offset'], r['length'], r['depth'], entries = tuple(Entry(r['index'], r['offset'], r['length'], r['depth'],
r.get('parent', None), r.get('first_child', None), r.get('last_child', r.get('parent', None), r.get('first_child', None), r.get('last_child',
None), r['label'], None, None, None, None) for r in index_table) None), r['label'], None, None, None, None) for r in index_table)
indexing_data = collect_indexing_data(entries, text_record_lengths) indexing_data = collect_indexing_data(entries, text_record_lengths)
for i, strands in enumerate(indexing_data): try:
sequences = encode_strands_as_sequences(strands) rmap = calculate_all_tbs(indexing_data)
tbs_bytes = sequences_to_bytes(sequences) except NegativeStrandIndex:
records[i+1] += encode_trailing_data(tbs_bytes) rmap = calculate_all_tbs(indexing_data, tbs_type=5)
for i, tbs_bytes in rmap.iteritems():
records[i] += encode_trailing_data(tbs_bytes)
return True return True