py3: fix bytes handling by using bytes as needed

This commit is contained in:
Eli Schwartz 2019-06-17 17:27:58 -04:00
parent 2da800aa6e
commit 7993757099
No known key found for this signature in database
GPG Key ID: CEB167EFB5722BD6
6 changed files with 30 additions and 25 deletions

View File

@ -30,7 +30,7 @@ def archive_type(stream):
ans = None
if id_ == stringFileHeader:
ans = 'zip'
elif id_.startswith('Rar'):
elif id_.startswith(b'Rar'):
ans = 'rar'
try:
stream.seek(pos)

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Read meta information from eReader pdb files.
@ -24,7 +25,7 @@ def get_cover(pheader, eheader):
for i in range(eheader.image_count):
raw = pheader.section_data(eheader.image_data_offset + i)
if raw[4:4 + 32].strip('\x00') == 'cover.png':
if raw[4:4 + 32].strip(b'\x00') == b'cover.png':
cover_data = raw[62:]
break
@ -48,7 +49,7 @@ def get_metadata(stream, extract_cover=True):
try:
mdata = pheader.section_data(hr.metadata_offset)
mdata = mdata.split('\x00')
mdata = mdata.decode('utf-8').split('\x00')
mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
@ -80,7 +81,7 @@ def set_metadata(stream, mi):
# Create a metadata record for the file if one does not alreay exist
if not hr.has_metadata:
sections += ['', 'MeTaInFo\x00']
sections += [b'', b'MeTaInFo\x00']
last_data = len(sections) - 1
for i in range(0, 132, 2):
@ -95,8 +96,8 @@ def set_metadata(stream, mi):
# Merge the metadata into the file
file_mi = get_metadata(stream, False)
file_mi.smart_update(mi)
sections[hr.metadata_offset] = '%s\x00%s\x00%s\x00%s\x00%s\x00' % \
(file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)
sections[hr.metadata_offset] = ('%s\x00%s\x00%s\x00%s\x00%s\x00' % \
(file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)).encode('utf-8')
# Rebuild the PDB wrapper because the offsets have changed due to the
# new metadata.

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Read meta information from Haodoo.net pdb files.

View File

@ -1,4 +1,5 @@
from __future__ import print_function
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
'''Read meta information from IMP files'''
@ -8,7 +9,7 @@ import sys
from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import unicode_type
MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']
MAGIC = [b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG']
def get_metadata(stream):
@ -18,18 +19,18 @@ def get_metadata(stream):
stream.seek(0)
try:
if stream.read(10) not in MAGIC:
print(u'Couldn\'t read IMP header from file', file=sys.stderr)
print('Couldn\'t read IMP header from file', file=sys.stderr)
return mi
def cString(skip=0):
result = ''
result = b''
while 1:
data = stream.read(1)
if data == '\x00':
if data == b'\x00':
if not skip:
return result
return result.decode('utf-8')
skip -= 1
result, data = '', ''
result, data = b'', b''
result += data
stream.read(38) # skip past some uninteresting headers
@ -44,6 +45,6 @@ def get_metadata(stream):
if category:
mi.category = category
except Exception as err:
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
msg = 'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
print(msg.encode('utf8'), file=sys.stderr)
return mi

View File

@ -1,4 +1,6 @@
#!/usr/bin/env python2
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
@ -39,7 +41,7 @@ def get_metadata(f):
read = lambda at, amount: _read(f, at, amount)
f.seek(0)
buf = f.read(12)
if buf[4:] == 'ftypLRX2':
if buf[4:] == b'ftypLRX2':
offset = 0
while True:
offset += word_be(buf[:4])
@ -47,7 +49,7 @@ def get_metadata(f):
buf = read(offset, 8)
except:
raise ValueError('Not a valid LRX file')
if buf[4:] == 'bbeb':
if buf[4:] == b'bbeb':
break
offset += 8
buf = read(offset, 16)
@ -80,8 +82,7 @@ def get_metadata(f):
mi.language = root.find('DocInfo').find('Language').text
return mi
elif buf[4:8] == 'LRX':
elif buf[4:8] == b'LRX':
raise ValueError('Librie LRX format not supported')
else:
raise ValueError('Not a LRX file')

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -24,7 +25,7 @@ def get_metadata(stream, extract_cover=True):
mi = MetaInformation(_('Unknown'), [_('Unknown')])
stream.seek(0)
pml = ''
pml = b''
if stream.name.endswith('.pmlz'):
with TemporaryDirectory('_unpmlz') as tdir:
zf = ZipFile(stream)
@ -41,22 +42,22 @@ def get_metadata(stream, extract_cover=True):
if extract_cover:
mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name)))
for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
m = re.search(r'TITLE="(.*?)"', comment)
for comment in re.findall(br'(?ms)\\v.*?\\v', pml):
m = re.search(br'TITLE="(.*?)"', comment)
if m:
mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
m = re.search(r'AUTHOR="(.*?)"', comment)
m = re.search(br'AUTHOR="(.*?)"', comment)
if m:
if mi.authors == [_('Unknown')]:
mi.authors = []
mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))))
m = re.search(r'PUBLISHER="(.*?)"', comment)
m = re.search(br'PUBLISHER="(.*?)"', comment)
if m:
mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
m = re.search(r'COPYRIGHT="(.*?)"', comment)
m = re.search(br'COPYRIGHT="(.*?)"', comment)
if m:
mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
m = re.search(r'ISBN="(.*?)"', comment)
m = re.search(br'ISBN="(.*?)"', comment)
if m:
mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))