py3: Misc AZW3 input fixes

2025-07-09 03:04:10 -04:00 · 2019-04-11 10:29:37 +05:30 · 2019-04-11 10:29:37 +05:30 · 11537ddd46
commit 11537ddd46
parent 2c3c25da02
4 changed files with 12 additions and 9 deletions
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -1593,7 +1593,7 @@ class OPFCreator(Metadata):
        root.set('version', '2.0')
        raw = etree.tostring(root, pretty_print=True, xml_declaration=True,
                encoding=encoding)
-        raw = raw.replace(DNS, OPF2_NS)
+        raw = raw.replace(DNS.encode('utf-8'), OPF2_NS.encode('utf-8'))
        opf_stream.write(raw)
        opf_stream.flush()
        if toc is not None and ncx_stream is not None:
--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@ -50,7 +50,7 @@ def parse_indx_header(data):
    check_signature(data, b'INDX')
    words = INDEX_HEADER_FIELDS
    num = len(words)
-    values = struct.unpack(bytes('>%dL' % num), data[4:4*(num+1)])
+    values = struct.unpack('>%dL' % num, data[4:4*(num+1)])
    ans = dict(zip(words, values))
    ordt1, ordt2 = ans['ordt1'], ans['ordt2']
    ans['ordt1_raw'], ans['ordt2_raw'] = [], []
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -165,8 +165,9 @@ class Mobi8Reader(object):
                    fileno  = tag_map[3][0]
                if 6 in list(tag_map.keys()):
                    fileno = tag_map[6]
-                self.guide.append(Item(ref_type.decode(self.header.codec),
-                    title, fileno))
+                if isinstance(ref_type, bytes):
+                    ref_type = ref_type.decode(self.header.codec)
+                self.guide.append(Item(ref_type, title, fileno))

    def build_parts(self):
        raw_ml = self.mobi6_reader.mobi_html
@ -354,7 +355,9 @@ class Mobi8Reader(object):
                continue  # thumbnailstandard record, ignore it
            linktgt, idtext = self.get_id_tag_by_pos_fid(*pos_fid)
            if idtext:
-                linktgt += b'#' + idtext
+                if isinstance(idtext, bytes):
+                    idtext = idtext.decode(self.header.codec)
+                linktgt += '#' + idtext
            g = Guide.Reference(linktgt, os.getcwdu())
            g.title, g.type = ref_title, ref_type
            if g.title == 'start' or g.type == 'text':
--- a/src/calibre/ebooks/mobi/utils.py
+++ b/src/calibre/ebooks/mobi/utils.py
@ -21,12 +21,12 @@ IMAGE_MAX_SIZE = 10 * 1024 * 1024
 RECORD_SIZE = 0x1000  # 4096 (Text record size (uncompressed))


-def decode_string(raw, codec='utf-8', ordt_map=''):
-    length, = struct.unpack(b'>B', raw[0])
+def decode_string(raw, codec='utf-8', ordt_map=None):
+    length, = struct.unpack(b'>B', raw[0:1])
    raw = raw[1:1+length]
    consumed = length+1
    if ordt_map:
-        return ''.join(ordt_map[ord(x)] for x in raw), consumed
+        return ''.join(ordt_map[x] for x in bytearray(raw)), consumed
    return raw.decode(codec), consumed


@ -60,7 +60,7 @@ def encode_number_as_hex(num):
    The bytes that follow are simply the hexadecimal representation of the
    number.
    '''
-    num = bytes(hex(num)[2:].upper())
+    num = hex(num)[2:].upper().encode('ascii')
    nlen = len(num)
    if nlen % 2 != 0:
        num = b'0'+num