py3: various bytestring fixups while porting unicode_literals

- use bytestrings to write raw xml data
- compare bytestream to bytestring when checking formats
- use bytestrings to search inside mobi_html (rebased on master). Since
  it starts life as a bytestring it does not need to be reconverted.
This commit is contained in:
Eli Schwartz 2019-05-26 15:15:44 -04:00
parent 45ca4c7934
commit 4d547f40e7
No known key found for this signature in database
GPG Key ID: CEB167EFB5722BD6
5 changed files with 11 additions and 11 deletions

View File

@ -237,7 +237,7 @@ class APNXBuilder(object):
# not modifying the text. In this case the case
# doesn't matter just the absolute character and
# the position within the stream.
data = bytearray(as_bytes(mr.mobi_html.lower()))
data = mr.mobi_html.lower()
slash, p, lt, gt = map(ord, '/p<>')
for c in data:
pos += 1

View File

@ -135,10 +135,10 @@ class PRS505(USBMS):
time.sleep(5)
os.makedirs(dname, mode=0o777)
with lopen(cachep, 'wb') as f:
f.write(u'''<?xml version="1.0" encoding="UTF-8"?>
f.write(b'''<?xml version="1.0" encoding="UTF-8"?>
<cache xmlns="http://www.kinoma.com/FskCache/1">
</cache>
'''.encode('utf8'))
''')
fsync(f)
return True
except:

View File

@ -35,13 +35,13 @@ Periodical identifier sample from a PRS-650:
'''
# Utility functions {{{
EMPTY_CARD_CACHE = '''\
EMPTY_CARD_CACHE = b'''\
<?xml version="1.0" encoding="UTF-8"?>
<cache xmlns="http://www.kinoma.com/FskCache/1">
</cache>
'''
EMPTY_EXT_CACHE = '''\
EMPTY_EXT_CACHE = b'''\
<?xml version="1.0" encoding="UTF-8"?>
<cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1">
</cacheExt>
@ -726,8 +726,8 @@ class XMLCache(object):
self.cleanup_whitespace(i)
raw = etree.tostring(self.roots[i], encoding='UTF-8',
xml_declaration=True)
raw = raw.replace("<?xml version='1.0' encoding='UTF-8'?>",
'<?xml version="1.0" encoding="UTF-8"?>')
raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>",
b'<?xml version="1.0" encoding="UTF-8"?>')
with lopen(path, 'wb') as f:
f.write(raw)
fsync(f)
@ -738,8 +738,8 @@ class XMLCache(object):
xml_declaration=True)
except:
continue
raw = raw.replace("<?xml version='1.0' encoding='UTF-8'?>",
'<?xml version="1.0" encoding="UTF-8"?>')
raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>",
b'<?xml version="1.0" encoding="UTF-8"?>')
with lopen(path, 'wb') as f:
f.write(raw)
fsync(f)

View File

@ -204,7 +204,7 @@ def check_ebook_format(stream, current_guess):
ans = current_guess
if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1', 'azw3'):
stream.seek(0)
if stream.read(3) == 'TPZ':
if stream.read(3) == b'TPZ':
ans = 'tpz'
stream.seek(0)
return ans

View File

@ -346,7 +346,7 @@ class MobiReader(object):
def cleanup_html(self):
self.log.debug('Cleaning up HTML...')
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
if self.book_header.ancient and b'<html' not in self.mobi_html[:300].lower():
self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
self.processed_html = self.processed_html.replace('\r\n', '\n')
self.processed_html = self.processed_html.replace('> <', '>\n<')