py3: Misc AZW3 input fixes

This commit is contained in:
Kovid Goyal 2019-04-11 10:29:37 +05:30
parent 2c3c25da02
commit 11537ddd46
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 12 additions and 9 deletions

View File

@ -1593,7 +1593,7 @@ class OPFCreator(Metadata):
root.set('version', '2.0')
raw = etree.tostring(root, pretty_print=True, xml_declaration=True,
encoding=encoding)
raw = raw.replace(DNS, OPF2_NS)
raw = raw.replace(DNS.encode('utf-8'), OPF2_NS.encode('utf-8'))
opf_stream.write(raw)
opf_stream.flush()
if toc is not None and ncx_stream is not None:

View File

@ -50,7 +50,7 @@ def parse_indx_header(data):
check_signature(data, b'INDX')
words = INDEX_HEADER_FIELDS
num = len(words)
values = struct.unpack(bytes('>%dL' % num), data[4:4*(num+1)])
values = struct.unpack('>%dL' % num, data[4:4*(num+1)])
ans = dict(zip(words, values))
ordt1, ordt2 = ans['ordt1'], ans['ordt2']
ans['ordt1_raw'], ans['ordt2_raw'] = [], []

View File

@ -165,8 +165,9 @@ class Mobi8Reader(object):
fileno = tag_map[3][0]
if 6 in list(tag_map.keys()):
fileno = tag_map[6]
self.guide.append(Item(ref_type.decode(self.header.codec),
title, fileno))
if isinstance(ref_type, bytes):
ref_type = ref_type.decode(self.header.codec)
self.guide.append(Item(ref_type, title, fileno))
def build_parts(self):
raw_ml = self.mobi6_reader.mobi_html
@ -354,7 +355,9 @@ class Mobi8Reader(object):
continue # thumbnailstandard record, ignore it
linktgt, idtext = self.get_id_tag_by_pos_fid(*pos_fid)
if idtext:
linktgt += b'#' + idtext
if isinstance(idtext, bytes):
idtext = idtext.decode(self.header.codec)
linktgt += '#' + idtext
g = Guide.Reference(linktgt, os.getcwdu())
g.title, g.type = ref_title, ref_type
if g.title == 'start' or g.type == 'text':

View File

@ -21,12 +21,12 @@ IMAGE_MAX_SIZE = 10 * 1024 * 1024
RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))
def decode_string(raw, codec='utf-8', ordt_map=''):
length, = struct.unpack(b'>B', raw[0])
def decode_string(raw, codec='utf-8', ordt_map=None):
length, = struct.unpack(b'>B', raw[0:1])
raw = raw[1:1+length]
consumed = length+1
if ordt_map:
return ''.join(ordt_map[ord(x)] for x in raw), consumed
return ''.join(ordt_map[x] for x in bytearray(raw)), consumed
return raw.decode(codec), consumed
@ -60,7 +60,7 @@ def encode_number_as_hex(num):
The bytes that follow are simply the hexadecimal representation of the
number.
'''
num = bytes(hex(num)[2:].upper())
num = hex(num)[2:].upper().encode('ascii')
nlen = len(num)
if nlen % 2 != 0:
num = b'0'+num