py3: fix bytes handling by using bytes as needed

2025-07-09 03:04:10 -04:00 · 2019-06-17 17:27:58 -04:00 · 2019-06-17 17:27:58 -04:00 · 7993757099
commit 7993757099
parent 2da800aa6e
6 changed files with 30 additions and 25 deletions
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@ -30,7 +30,7 @@ def archive_type(stream):
    ans = None
    if id_ == stringFileHeader:
        ans = 'zip'
-    elif id_.startswith('Rar'):
+    elif id_.startswith(b'Rar'):
        ans = 'rar'
    try:
        stream.seek(pos)
--- a/src/calibre/ebooks/metadata/ereader.py
+++ b/src/calibre/ebooks/metadata/ereader.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
 '''
 Read meta information from eReader pdb files.
@ -24,7 +25,7 @@ def get_cover(pheader, eheader):
    for i in range(eheader.image_count):
        raw = pheader.section_data(eheader.image_data_offset + i)
-        if raw[4:4 + 32].strip('\x00') == 'cover.png':
+        if raw[4:4 + 32].strip(b'\x00') == b'cover.png':
            cover_data = raw[62:]
            break
@ -48,7 +49,7 @@ def get_metadata(stream, extract_cover=True):
            try:
                mdata = pheader.section_data(hr.metadata_offset)
-                mdata = mdata.split('\x00')
+                mdata = mdata.decode('utf-8').split('\x00')
                mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
                mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
                mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
@ -80,7 +81,7 @@ def set_metadata(stream, mi):
    # Create a metadata record for the file if one does not alreay exist
    if not hr.has_metadata:
-        sections += ['', 'MeTaInFo\x00']
+        sections += [b'', b'MeTaInFo\x00']
        last_data = len(sections) - 1
        for i in range(0, 132, 2):
@ -95,8 +96,8 @@ def set_metadata(stream, mi):
    # Merge the metadata into the file
    file_mi = get_metadata(stream, False)
    file_mi.smart_update(mi)
-    sections[hr.metadata_offset] = '%s\x00%s\x00%s\x00%s\x00%s\x00' % \
+    sections[hr.metadata_offset] = ('%s\x00%s\x00%s\x00%s\x00%s\x00' % \
-        (file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)
+        (file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)).encode('utf-8')
    # Rebuild the PDB wrapper because the offsets have changed due to the
    # new metadata.
--- a/src/calibre/ebooks/metadata/haodoo.py
+++ b/src/calibre/ebooks/metadata/haodoo.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
 '''
 Read meta information from Haodoo.net pdb files.
--- a/src/calibre/ebooks/metadata/imp.py
+++ b/src/calibre/ebooks/metadata/imp.py
@ -1,4 +1,5 @@
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
 '''Read meta information from IMP files'''
@ -8,7 +9,7 @@ import sys
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
 from polyglot.builtins import unicode_type
-MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']
+MAGIC = [b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG']
 def get_metadata(stream):
@ -18,18 +19,18 @@ def get_metadata(stream):
    stream.seek(0)
    try:
        if stream.read(10) not in MAGIC:
-            print(u'Couldn\'t read IMP header from file', file=sys.stderr)
+            print('Couldn\'t read IMP header from file', file=sys.stderr)
            return mi
        def cString(skip=0):
-            result = ''
+            result = b''
            while 1:
                data = stream.read(1)
-                if data == '\x00':
+                if data == b'\x00':
                    if not skip:
-                        return result
+                        return result.decode('utf-8')
                    skip -= 1
-                    result, data = '', ''
+                    result, data = b'', b''
                result += data
        stream.read(38)  # skip past some uninteresting headers
@ -44,6 +45,6 @@ def get_metadata(stream):
        if category:
            mi.category = category
    except Exception as err:
-        msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
+        msg = 'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
        print(msg.encode('utf8'), file=sys.stderr)
    return mi
--- a/src/calibre/ebooks/metadata/lrx.py
+++ b/src/calibre/ebooks/metadata/lrx.py
@ -1,4 +1,6 @@
 #!/usr/bin/env  python2
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
@ -39,7 +41,7 @@ def get_metadata(f):
    read = lambda at, amount: _read(f, at, amount)
    f.seek(0)
    buf = f.read(12)
-    if buf[4:] == 'ftypLRX2':
+    if buf[4:] == b'ftypLRX2':
        offset = 0
        while True:
            offset += word_be(buf[:4])
@ -47,7 +49,7 @@ def get_metadata(f):
                buf = read(offset, 8)
            except:
                raise ValueError('Not a valid LRX file')
-            if buf[4:] == 'bbeb':
+            if buf[4:] == b'bbeb':
                break
        offset += 8
        buf = read(offset, 16)
@ -80,8 +82,7 @@ def get_metadata(f):
        mi.language = root.find('DocInfo').find('Language').text
        return mi
-    elif buf[4:8] == 'LRX':
+    elif buf[4:8] == b'LRX':
        raise ValueError('Librie LRX format not supported')
    else:
        raise ValueError('Not a LRX file')
--- a/src/calibre/ebooks/metadata/pml.py
+++ b/src/calibre/ebooks/metadata/pml.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -24,7 +25,7 @@ def get_metadata(stream, extract_cover=True):
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
-    pml = ''
+    pml = b''
    if stream.name.endswith('.pmlz'):
        with TemporaryDirectory('_unpmlz') as tdir:
            zf = ZipFile(stream)
@ -41,22 +42,22 @@ def get_metadata(stream, extract_cover=True):
        if extract_cover:
            mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name)))
-    for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
+    for comment in re.findall(br'(?ms)\\v.*?\\v', pml):
-        m = re.search(r'TITLE="(.*?)"', comment)
+        m = re.search(br'TITLE="(.*?)"', comment)
        if m:
            mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
-        m = re.search(r'AUTHOR="(.*?)"', comment)
+        m = re.search(br'AUTHOR="(.*?)"', comment)
        if m:
            if mi.authors == [_('Unknown')]:
                mi.authors = []
            mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))))
-        m = re.search(r'PUBLISHER="(.*?)"', comment)
+        m = re.search(br'PUBLISHER="(.*?)"', comment)
        if m:
            mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
-        m = re.search(r'COPYRIGHT="(.*?)"', comment)
+        m = re.search(br'COPYRIGHT="(.*?)"', comment)
        if m:
            mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
-        m = re.search(r'ISBN="(.*?)"', comment)
+        m = re.search(br'ISBN="(.*?)"', comment)
        if m:
            mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))