From 11537ddd46801080851f8c5703cdcfb5a7ebf494 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 11 Apr 2019 10:29:37 +0530 Subject: [PATCH] py3: Misc AZW3 input fixes --- src/calibre/ebooks/metadata/opf2.py | 2 +- src/calibre/ebooks/mobi/reader/index.py | 2 +- src/calibre/ebooks/mobi/reader/mobi8.py | 9 ++++++--- src/calibre/ebooks/mobi/utils.py | 8 ++++---- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 3bbe43c6bc..15b9aa8812 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -1593,7 +1593,7 @@ class OPFCreator(Metadata): root.set('version', '2.0') raw = etree.tostring(root, pretty_print=True, xml_declaration=True, encoding=encoding) - raw = raw.replace(DNS, OPF2_NS) + raw = raw.replace(DNS.encode('utf-8'), OPF2_NS.encode('utf-8')) opf_stream.write(raw) opf_stream.flush() if toc is not None and ncx_stream is not None: diff --git a/src/calibre/ebooks/mobi/reader/index.py b/src/calibre/ebooks/mobi/reader/index.py index eb2ab5e31f..b489dd2e6e 100644 --- a/src/calibre/ebooks/mobi/reader/index.py +++ b/src/calibre/ebooks/mobi/reader/index.py @@ -50,7 +50,7 @@ def parse_indx_header(data): check_signature(data, b'INDX') words = INDEX_HEADER_FIELDS num = len(words) - values = struct.unpack(bytes('>%dL' % num), data[4:4*(num+1)]) + values = struct.unpack('>%dL' % num, data[4:4*(num+1)]) ans = dict(zip(words, values)) ordt1, ordt2 = ans['ordt1'], ans['ordt2'] ans['ordt1_raw'], ans['ordt2_raw'] = [], [] diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py index 52ba91c1cd..be4cb554fe 100644 --- a/src/calibre/ebooks/mobi/reader/mobi8.py +++ b/src/calibre/ebooks/mobi/reader/mobi8.py @@ -165,8 +165,9 @@ class Mobi8Reader(object): fileno = tag_map[3][0] if 6 in list(tag_map.keys()): fileno = tag_map[6] - self.guide.append(Item(ref_type.decode(self.header.codec), - title, fileno)) + if isinstance(ref_type, bytes): + ref_type = ref_type.decode(self.header.codec) + self.guide.append(Item(ref_type, title, fileno)) def build_parts(self): raw_ml = self.mobi6_reader.mobi_html @@ -354,7 +355,9 @@ class Mobi8Reader(object): continue # thumbnailstandard record, ignore it linktgt, idtext = self.get_id_tag_by_pos_fid(*pos_fid) if idtext: - linktgt += b'#' + idtext + if isinstance(idtext, bytes): + idtext = idtext.decode(self.header.codec) + linktgt += '#' + idtext g = Guide.Reference(linktgt, os.getcwdu()) g.title, g.type = ref_title, ref_type if g.title == 'start' or g.type == 'text': diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index 4ca8eec878..11ffd66906 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -21,12 +21,12 @@ IMAGE_MAX_SIZE = 10 * 1024 * 1024 RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed)) -def decode_string(raw, codec='utf-8', ordt_map=''): - length, = struct.unpack(b'>B', raw[0]) +def decode_string(raw, codec='utf-8', ordt_map=None): + length, = struct.unpack(b'>B', raw[0:1]) raw = raw[1:1+length] consumed = length+1 if ordt_map: - return ''.join(ordt_map[ord(x)] for x in raw), consumed + return ''.join(ordt_map[x] for x in bytearray(raw)), consumed return raw.decode(codec), consumed @@ -60,7 +60,7 @@ def encode_number_as_hex(num): The bytes that follow are simply the hexadecimal representation of the number. ''' - num = bytes(hex(num)[2:].upper()) + num = hex(num)[2:].upper().encode('ascii') nlen = len(num) if nlen % 2 != 0: num = b'0'+num