mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
AZW3 Input: Handle AZW3 files with incorrect TAGX Offset INDX header fields. Fixes #1955308 [MOBI reader loads TAGX from wrong offset](https://bugs.launchpad.net/calibre/+bug/1955308)
This commit is contained in:
parent
172ee5d531
commit
aec2c1a551
@ -7,13 +7,14 @@ __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct
|
||||
|
||||
from collections import OrderedDict, namedtuple
|
||||
|
||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||
from calibre.ebooks.mobi.reader.index import (CNCX, parse_indx_header,
|
||||
parse_tagx_section, parse_index_record, INDEX_HEADER_FIELDS)
|
||||
from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry)
|
||||
from calibre.ebooks.mobi.reader.index import (
|
||||
CNCX, INDEX_HEADER_FIELDS, get_tag_section_start, parse_index_record,
|
||||
parse_indx_header, parse_tagx_section
|
||||
)
|
||||
from calibre.ebooks.mobi.reader.ncx import default_entry, tag_fieldname_map
|
||||
from polyglot.builtins import iteritems
|
||||
|
||||
File = namedtuple('File',
|
||||
@ -71,7 +72,7 @@ def read_index(sections, idx, codec):
|
||||
cncx_records = [x.raw for x in sections[off:off+indx_header['ncncx']]]
|
||||
cncx = CNCX(cncx_records, codec)
|
||||
|
||||
tag_section_start = indx_header['tagx']
|
||||
tag_section_start = get_tag_section_start(data, indx_header)
|
||||
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
|
||||
|
||||
read_variable_len_data(data, indx_header)
|
||||
|
@ -51,6 +51,7 @@ def parse_indx_header(data):
|
||||
num = len(words)
|
||||
values = struct.unpack('>%dL' % num, data[4:4*(num+1)])
|
||||
ans = dict(zip(words, values))
|
||||
ans['idx_header_end_pos'] = 4 * (num+1)
|
||||
ordt1, ordt2 = ans['ordt1'], ans['ordt2']
|
||||
ans['ordt1_raw'], ans['ordt2_raw'] = [], []
|
||||
ans['ordt_map'] = ''
|
||||
@ -253,6 +254,15 @@ def parse_index_record(table, data, control_byte_count, tags, codec,
|
||||
return header
|
||||
|
||||
|
||||
def get_tag_section_start(data, indx_header):
|
||||
tag_section_start = indx_header['tagx']
|
||||
if data[tag_section_start:tag_section_start + 4] != b'TAGX':
|
||||
tpos = data.find(b'TAGX', indx_header['idx_header_end_pos'])
|
||||
if tpos > -1:
|
||||
tag_section_start = tpos
|
||||
return tag_section_start
|
||||
|
||||
|
||||
def read_index(sections, idx, codec):
|
||||
table, cncx = OrderedDict(), CNCX([], codec)
|
||||
|
||||
@ -266,7 +276,7 @@ def read_index(sections, idx, codec):
|
||||
cncx_records = [x[0] for x in sections[off:off+indx_header['ncncx']]]
|
||||
cncx = CNCX(cncx_records, codec)
|
||||
|
||||
tag_section_start = indx_header['tagx']
|
||||
tag_section_start = get_tag_section_start(data, indx_header)
|
||||
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
|
||||
|
||||
for i in range(idx + 1, idx + 1 + indx_count):
|
||||
|
Loading…
x
Reference in New Issue
Block a user