diff --git a/src/calibre/ebooks/mobi/debug/index.py b/src/calibre/ebooks/mobi/debug/index.py index d8ab38f3ac..b412bcb380 100644 --- a/src/calibre/ebooks/mobi/debug/index.py +++ b/src/calibre/ebooks/mobi/debug/index.py @@ -7,13 +7,14 @@ __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' import struct - from collections import OrderedDict, namedtuple from calibre.ebooks.mobi.reader.headers import NULL_INDEX -from calibre.ebooks.mobi.reader.index import (CNCX, parse_indx_header, - parse_tagx_section, parse_index_record, INDEX_HEADER_FIELDS) -from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry) +from calibre.ebooks.mobi.reader.index import ( + CNCX, INDEX_HEADER_FIELDS, get_tag_section_start, parse_index_record, + parse_indx_header, parse_tagx_section +) +from calibre.ebooks.mobi.reader.ncx import default_entry, tag_fieldname_map from polyglot.builtins import iteritems File = namedtuple('File', @@ -71,7 +72,7 @@ def read_index(sections, idx, codec): cncx_records = [x.raw for x in sections[off:off+indx_header['ncncx']]] cncx = CNCX(cncx_records, codec) - tag_section_start = indx_header['tagx'] + tag_section_start = get_tag_section_start(data, indx_header) control_byte_count, tags = parse_tagx_section(data[tag_section_start:]) read_variable_len_data(data, indx_header) diff --git a/src/calibre/ebooks/mobi/reader/index.py b/src/calibre/ebooks/mobi/reader/index.py index 7913542c99..8e31211014 100644 --- a/src/calibre/ebooks/mobi/reader/index.py +++ b/src/calibre/ebooks/mobi/reader/index.py @@ -51,6 +51,7 @@ def parse_indx_header(data): num = len(words) values = struct.unpack('>%dL' % num, data[4:4*(num+1)]) ans = dict(zip(words, values)) + ans['idx_header_end_pos'] = 4 * (num+1) ordt1, ordt2 = ans['ordt1'], ans['ordt2'] ans['ordt1_raw'], ans['ordt2_raw'] = [], [] ans['ordt_map'] = '' @@ -253,6 +254,15 @@ def parse_index_record(table, data, control_byte_count, tags, codec, return header +def get_tag_section_start(data, indx_header): + tag_section_start = indx_header['tagx'] + if data[tag_section_start:tag_section_start + 4] != b'TAGX': + tpos = data.find(b'TAGX', indx_header['idx_header_end_pos']) + if tpos > -1: + tag_section_start = tpos + return tag_section_start + + def read_index(sections, idx, codec): table, cncx = OrderedDict(), CNCX([], codec) @@ -266,7 +276,7 @@ def read_index(sections, idx, codec): cncx_records = [x[0] for x in sections[off:off+indx_header['ncncx']]] cncx = CNCX(cncx_records, codec) - tag_section_start = indx_header['tagx'] + tag_section_start = get_tag_section_start(data, indx_header) control_byte_count, tags = parse_tagx_section(data[tag_section_start:]) for i in range(idx + 1, idx + 1 + indx_count):