py3: various bytestring fixups while porting unicode_literals

- use bytestrings to write raw xml data - compare bytestream to bytestring when checking formats - use bytestrings to search inside mobi_html (rebased on master). Since it starts life as a bytestring it does not need to be reconverted.
2026-01-06 20:20:30 -05:00 · 2019-05-26 15:15:44 -04:00 · 2019-05-26 15:15:44 -04:00 · 4d547f40e7
commit 4d547f40e7
parent 45ca4c7934
5 changed files with 11 additions and 11 deletions
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -237,7 +237,7 @@ class APNXBuilder(object):
        # not modifying the text. In this case the case
        # doesn't matter just the absolute character and
        # the position within the stream.
-        data = bytearray(as_bytes(mr.mobi_html.lower()))
+        data = mr.mobi_html.lower()
        slash, p, lt, gt = map(ord, '/p<>')
        for c in data:
            pos += 1
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -135,10 +135,10 @@ class PRS505(USBMS):
                            time.sleep(5)
                            os.makedirs(dname, mode=0o777)
                    with lopen(cachep, 'wb') as f:
-                        f.write(u'''<?xml version="1.0" encoding="UTF-8"?>
+                        f.write(b'''<?xml version="1.0" encoding="UTF-8"?>
                            <cache xmlns="http://www.kinoma.com/FskCache/1">
                            </cache>
-                            '''.encode('utf8'))
+                            ''')
                        fsync(f)
                return True
            except:
--- a/src/calibre/devices/prs505/sony_cache.py
+++ b/src/calibre/devices/prs505/sony_cache.py
@ -35,13 +35,13 @@ Periodical identifier sample from a PRS-650:
 '''

 # Utility functions {{{
-EMPTY_CARD_CACHE = '''\
+EMPTY_CARD_CACHE = b'''\
 <?xml version="1.0" encoding="UTF-8"?>
 <cache xmlns="http://www.kinoma.com/FskCache/1">
 </cache>
 '''

-EMPTY_EXT_CACHE = '''\
+EMPTY_EXT_CACHE = b'''\
 <?xml version="1.0" encoding="UTF-8"?>
 <cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1">
 </cacheExt>
@ -726,8 +726,8 @@ class XMLCache(object):
            self.cleanup_whitespace(i)
            raw = etree.tostring(self.roots[i], encoding='UTF-8',
                    xml_declaration=True)
-            raw = raw.replace("<?xml version='1.0' encoding='UTF-8'?>",
-                    '<?xml version="1.0" encoding="UTF-8"?>')
+            raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>",
+                    b'<?xml version="1.0" encoding="UTF-8"?>')
            with lopen(path, 'wb') as f:
                f.write(raw)
                fsync(f)
@ -738,8 +738,8 @@ class XMLCache(object):
                    xml_declaration=True)
            except:
                continue
-            raw = raw.replace("<?xml version='1.0' encoding='UTF-8'?>",
-                    '<?xml version="1.0" encoding="UTF-8"?>')
+            raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>",
+                    b'<?xml version="1.0" encoding="UTF-8"?>')
            with lopen(path, 'wb') as f:
                f.write(raw)
                fsync(f)
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -204,7 +204,7 @@ def check_ebook_format(stream, current_guess):
    ans = current_guess
    if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1', 'azw3'):
        stream.seek(0)
-        if stream.read(3) == 'TPZ':
+        if stream.read(3) == b'TPZ':
            ans = 'tpz'
        stream.seek(0)
    return ans
--- a/src/calibre/ebooks/mobi/reader/mobi6.py
+++ b/src/calibre/ebooks/mobi/reader/mobi6.py
@ -346,7 +346,7 @@ class MobiReader(object):
    def cleanup_html(self):
        self.log.debug('Cleaning up HTML...')
        self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
-        if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
+        if self.book_header.ancient and b'<html' not in self.mobi_html[:300].lower():
            self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
        self.processed_html = self.processed_html.replace('\r\n', '\n')
        self.processed_html = self.processed_html.replace('> <', '>\n<')