mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
AZW3 Input: Handle AZW3 files with incorrect TAGX Offset INDX header fields. Fixes #1955308 [MOBI reader loads TAGX from wrong offset](https://bugs.launchpad.net/calibre/+bug/1955308)
This commit is contained in:
parent
172ee5d531
commit
aec2c1a551
@ -7,13 +7,14 @@ __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
from collections import OrderedDict, namedtuple
|
from collections import OrderedDict, namedtuple
|
||||||
|
|
||||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||||
from calibre.ebooks.mobi.reader.index import (CNCX, parse_indx_header,
|
from calibre.ebooks.mobi.reader.index import (
|
||||||
parse_tagx_section, parse_index_record, INDEX_HEADER_FIELDS)
|
CNCX, INDEX_HEADER_FIELDS, get_tag_section_start, parse_index_record,
|
||||||
from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry)
|
parse_indx_header, parse_tagx_section
|
||||||
|
)
|
||||||
|
from calibre.ebooks.mobi.reader.ncx import default_entry, tag_fieldname_map
|
||||||
from polyglot.builtins import iteritems
|
from polyglot.builtins import iteritems
|
||||||
|
|
||||||
File = namedtuple('File',
|
File = namedtuple('File',
|
||||||
@ -71,7 +72,7 @@ def read_index(sections, idx, codec):
|
|||||||
cncx_records = [x.raw for x in sections[off:off+indx_header['ncncx']]]
|
cncx_records = [x.raw for x in sections[off:off+indx_header['ncncx']]]
|
||||||
cncx = CNCX(cncx_records, codec)
|
cncx = CNCX(cncx_records, codec)
|
||||||
|
|
||||||
tag_section_start = indx_header['tagx']
|
tag_section_start = get_tag_section_start(data, indx_header)
|
||||||
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
|
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
|
||||||
|
|
||||||
read_variable_len_data(data, indx_header)
|
read_variable_len_data(data, indx_header)
|
||||||
|
@ -51,6 +51,7 @@ def parse_indx_header(data):
|
|||||||
num = len(words)
|
num = len(words)
|
||||||
values = struct.unpack('>%dL' % num, data[4:4*(num+1)])
|
values = struct.unpack('>%dL' % num, data[4:4*(num+1)])
|
||||||
ans = dict(zip(words, values))
|
ans = dict(zip(words, values))
|
||||||
|
ans['idx_header_end_pos'] = 4 * (num+1)
|
||||||
ordt1, ordt2 = ans['ordt1'], ans['ordt2']
|
ordt1, ordt2 = ans['ordt1'], ans['ordt2']
|
||||||
ans['ordt1_raw'], ans['ordt2_raw'] = [], []
|
ans['ordt1_raw'], ans['ordt2_raw'] = [], []
|
||||||
ans['ordt_map'] = ''
|
ans['ordt_map'] = ''
|
||||||
@ -253,6 +254,15 @@ def parse_index_record(table, data, control_byte_count, tags, codec,
|
|||||||
return header
|
return header
|
||||||
|
|
||||||
|
|
||||||
|
def get_tag_section_start(data, indx_header):
|
||||||
|
tag_section_start = indx_header['tagx']
|
||||||
|
if data[tag_section_start:tag_section_start + 4] != b'TAGX':
|
||||||
|
tpos = data.find(b'TAGX', indx_header['idx_header_end_pos'])
|
||||||
|
if tpos > -1:
|
||||||
|
tag_section_start = tpos
|
||||||
|
return tag_section_start
|
||||||
|
|
||||||
|
|
||||||
def read_index(sections, idx, codec):
|
def read_index(sections, idx, codec):
|
||||||
table, cncx = OrderedDict(), CNCX([], codec)
|
table, cncx = OrderedDict(), CNCX([], codec)
|
||||||
|
|
||||||
@ -266,7 +276,7 @@ def read_index(sections, idx, codec):
|
|||||||
cncx_records = [x[0] for x in sections[off:off+indx_header['ncncx']]]
|
cncx_records = [x[0] for x in sections[off:off+indx_header['ncncx']]]
|
||||||
cncx = CNCX(cncx_records, codec)
|
cncx = CNCX(cncx_records, codec)
|
||||||
|
|
||||||
tag_section_start = indx_header['tagx']
|
tag_section_start = get_tag_section_start(data, indx_header)
|
||||||
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
|
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
|
||||||
|
|
||||||
for i in range(idx + 1, idx + 1 + indx_count):
|
for i in range(idx + 1, idx + 1 + indx_count):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user