py3: Misc AZW3 input fixes

This commit is contained in:
Kovid Goyal 2019-04-11 10:29:37 +05:30
parent 2c3c25da02
commit 11537ddd46
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 12 additions and 9 deletions

View File

@ -1593,7 +1593,7 @@ class OPFCreator(Metadata):
root.set('version', '2.0') root.set('version', '2.0')
raw = etree.tostring(root, pretty_print=True, xml_declaration=True, raw = etree.tostring(root, pretty_print=True, xml_declaration=True,
encoding=encoding) encoding=encoding)
raw = raw.replace(DNS, OPF2_NS) raw = raw.replace(DNS.encode('utf-8'), OPF2_NS.encode('utf-8'))
opf_stream.write(raw) opf_stream.write(raw)
opf_stream.flush() opf_stream.flush()
if toc is not None and ncx_stream is not None: if toc is not None and ncx_stream is not None:

View File

@ -50,7 +50,7 @@ def parse_indx_header(data):
check_signature(data, b'INDX') check_signature(data, b'INDX')
words = INDEX_HEADER_FIELDS words = INDEX_HEADER_FIELDS
num = len(words) num = len(words)
values = struct.unpack(bytes('>%dL' % num), data[4:4*(num+1)]) values = struct.unpack('>%dL' % num, data[4:4*(num+1)])
ans = dict(zip(words, values)) ans = dict(zip(words, values))
ordt1, ordt2 = ans['ordt1'], ans['ordt2'] ordt1, ordt2 = ans['ordt1'], ans['ordt2']
ans['ordt1_raw'], ans['ordt2_raw'] = [], [] ans['ordt1_raw'], ans['ordt2_raw'] = [], []

View File

@ -165,8 +165,9 @@ class Mobi8Reader(object):
fileno = tag_map[3][0] fileno = tag_map[3][0]
if 6 in list(tag_map.keys()): if 6 in list(tag_map.keys()):
fileno = tag_map[6] fileno = tag_map[6]
self.guide.append(Item(ref_type.decode(self.header.codec), if isinstance(ref_type, bytes):
title, fileno)) ref_type = ref_type.decode(self.header.codec)
self.guide.append(Item(ref_type, title, fileno))
def build_parts(self): def build_parts(self):
raw_ml = self.mobi6_reader.mobi_html raw_ml = self.mobi6_reader.mobi_html
@ -354,7 +355,9 @@ class Mobi8Reader(object):
continue # thumbnailstandard record, ignore it continue # thumbnailstandard record, ignore it
linktgt, idtext = self.get_id_tag_by_pos_fid(*pos_fid) linktgt, idtext = self.get_id_tag_by_pos_fid(*pos_fid)
if idtext: if idtext:
linktgt += b'#' + idtext if isinstance(idtext, bytes):
idtext = idtext.decode(self.header.codec)
linktgt += '#' + idtext
g = Guide.Reference(linktgt, os.getcwdu()) g = Guide.Reference(linktgt, os.getcwdu())
g.title, g.type = ref_title, ref_type g.title, g.type = ref_title, ref_type
if g.title == 'start' or g.type == 'text': if g.title == 'start' or g.type == 'text':

View File

@ -21,12 +21,12 @@ IMAGE_MAX_SIZE = 10 * 1024 * 1024
RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed)) RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))
def decode_string(raw, codec='utf-8', ordt_map=''): def decode_string(raw, codec='utf-8', ordt_map=None):
length, = struct.unpack(b'>B', raw[0]) length, = struct.unpack(b'>B', raw[0:1])
raw = raw[1:1+length] raw = raw[1:1+length]
consumed = length+1 consumed = length+1
if ordt_map: if ordt_map:
return ''.join(ordt_map[ord(x)] for x in raw), consumed return ''.join(ordt_map[x] for x in bytearray(raw)), consumed
return raw.decode(codec), consumed return raw.decode(codec), consumed
@ -60,7 +60,7 @@ def encode_number_as_hex(num):
The bytes that follow are simply the hexadecimal representation of the The bytes that follow are simply the hexadecimal representation of the
number. number.
''' '''
num = bytes(hex(num)[2:].upper()) num = hex(num)[2:].upper().encode('ascii')
nlen = len(num) nlen = len(num)
if nlen % 2 != 0: if nlen % 2 != 0:
num = b'0'+num num = b'0'+num