py3: various bytestring fixups while porting unicode_literals

- use bytestrings to write raw xml data
- compare bytestream to bytestring when checking formats
- use bytestrings to search inside mobi_html (rebased on master). Since
  it starts life as a bytestring it does not need to be reconverted.
This commit is contained in:
Eli Schwartz 2019-05-26 15:15:44 -04:00
parent 45ca4c7934
commit 4d547f40e7
No known key found for this signature in database
GPG Key ID: CEB167EFB5722BD6
5 changed files with 11 additions and 11 deletions

View File

@ -237,7 +237,7 @@ class APNXBuilder(object):
# not modifying the text. In this case the case # not modifying the text. In this case the case
# doesn't matter just the absolute character and # doesn't matter just the absolute character and
# the position within the stream. # the position within the stream.
data = bytearray(as_bytes(mr.mobi_html.lower())) data = mr.mobi_html.lower()
slash, p, lt, gt = map(ord, '/p<>') slash, p, lt, gt = map(ord, '/p<>')
for c in data: for c in data:
pos += 1 pos += 1

View File

@ -135,10 +135,10 @@ class PRS505(USBMS):
time.sleep(5) time.sleep(5)
os.makedirs(dname, mode=0o777) os.makedirs(dname, mode=0o777)
with lopen(cachep, 'wb') as f: with lopen(cachep, 'wb') as f:
f.write(u'''<?xml version="1.0" encoding="UTF-8"?> f.write(b'''<?xml version="1.0" encoding="UTF-8"?>
<cache xmlns="http://www.kinoma.com/FskCache/1"> <cache xmlns="http://www.kinoma.com/FskCache/1">
</cache> </cache>
'''.encode('utf8')) ''')
fsync(f) fsync(f)
return True return True
except: except:

View File

@ -35,13 +35,13 @@ Periodical identifier sample from a PRS-650:
''' '''
# Utility functions {{{ # Utility functions {{{
EMPTY_CARD_CACHE = '''\ EMPTY_CARD_CACHE = b'''\
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<cache xmlns="http://www.kinoma.com/FskCache/1"> <cache xmlns="http://www.kinoma.com/FskCache/1">
</cache> </cache>
''' '''
EMPTY_EXT_CACHE = '''\ EMPTY_EXT_CACHE = b'''\
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1"> <cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1">
</cacheExt> </cacheExt>
@ -726,8 +726,8 @@ class XMLCache(object):
self.cleanup_whitespace(i) self.cleanup_whitespace(i)
raw = etree.tostring(self.roots[i], encoding='UTF-8', raw = etree.tostring(self.roots[i], encoding='UTF-8',
xml_declaration=True) xml_declaration=True)
raw = raw.replace("<?xml version='1.0' encoding='UTF-8'?>", raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>",
'<?xml version="1.0" encoding="UTF-8"?>') b'<?xml version="1.0" encoding="UTF-8"?>')
with lopen(path, 'wb') as f: with lopen(path, 'wb') as f:
f.write(raw) f.write(raw)
fsync(f) fsync(f)
@ -738,8 +738,8 @@ class XMLCache(object):
xml_declaration=True) xml_declaration=True)
except: except:
continue continue
raw = raw.replace("<?xml version='1.0' encoding='UTF-8'?>", raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>",
'<?xml version="1.0" encoding="UTF-8"?>') b'<?xml version="1.0" encoding="UTF-8"?>')
with lopen(path, 'wb') as f: with lopen(path, 'wb') as f:
f.write(raw) f.write(raw)
fsync(f) fsync(f)

View File

@ -204,7 +204,7 @@ def check_ebook_format(stream, current_guess):
ans = current_guess ans = current_guess
if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1', 'azw3'): if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1', 'azw3'):
stream.seek(0) stream.seek(0)
if stream.read(3) == 'TPZ': if stream.read(3) == b'TPZ':
ans = 'tpz' ans = 'tpz'
stream.seek(0) stream.seek(0)
return ans return ans

View File

@ -346,7 +346,7 @@ class MobiReader(object):
def cleanup_html(self): def cleanup_html(self):
self.log.debug('Cleaning up HTML...') self.log.debug('Cleaning up HTML...')
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html) self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower(): if self.book_header.ancient and b'<html' not in self.mobi_html[:300].lower():
self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>' self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
self.processed_html = self.processed_html.replace('\r\n', '\n') self.processed_html = self.processed_html.replace('\r\n', '\n')
self.processed_html = self.processed_html.replace('> <', '>\n<') self.processed_html = self.processed_html.replace('> <', '>\n<')