diff --git a/src/calibre/ebooks/metadata/archive.py b/src/calibre/ebooks/metadata/archive.py index fb24ae205d..56414ba0a3 100644 --- a/src/calibre/ebooks/metadata/archive.py +++ b/src/calibre/ebooks/metadata/archive.py @@ -30,7 +30,7 @@ def archive_type(stream): ans = None if id_ == stringFileHeader: ans = 'zip' - elif id_.startswith('Rar'): + elif id_.startswith(b'Rar'): ans = 'rar' try: stream.seek(pos) diff --git a/src/calibre/ebooks/metadata/ereader.py b/src/calibre/ebooks/metadata/ereader.py index 79119e759e..6a2d54832e 100644 --- a/src/calibre/ebooks/metadata/ereader.py +++ b/src/calibre/ebooks/metadata/ereader.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals ''' Read meta information from eReader pdb files. @@ -24,7 +25,7 @@ def get_cover(pheader, eheader): for i in range(eheader.image_count): raw = pheader.section_data(eheader.image_data_offset + i) - if raw[4:4 + 32].strip('\x00') == 'cover.png': + if raw[4:4 + 32].strip(b'\x00') == b'cover.png': cover_data = raw[62:] break @@ -48,7 +49,7 @@ def get_metadata(stream, extract_cover=True): try: mdata = pheader.section_data(hr.metadata_offset) - mdata = mdata.split('\x00') + mdata = mdata.decode('utf-8').split('\x00') mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) @@ -80,7 +81,7 @@ def set_metadata(stream, mi): # Create a metadata record for the file if one does not alreay exist if not hr.has_metadata: - sections += ['', 'MeTaInFo\x00'] + sections += [b'', b'MeTaInFo\x00'] last_data = len(sections) - 1 for i in range(0, 132, 2): @@ -95,8 +96,8 @@ def set_metadata(stream, mi): # Merge the metadata into the file file_mi = get_metadata(stream, False) file_mi.smart_update(mi) - sections[hr.metadata_offset] = '%s\x00%s\x00%s\x00%s\x00%s\x00' % \ - (file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn) + sections[hr.metadata_offset] = ('%s\x00%s\x00%s\x00%s\x00%s\x00' % \ + (file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)).encode('utf-8') # Rebuild the PDB wrapper because the offsets have changed due to the # new metadata. diff --git a/src/calibre/ebooks/metadata/haodoo.py b/src/calibre/ebooks/metadata/haodoo.py index 1d17980cd1..a5be356682 100644 --- a/src/calibre/ebooks/metadata/haodoo.py +++ b/src/calibre/ebooks/metadata/haodoo.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals ''' Read meta information from Haodoo.net pdb files. diff --git a/src/calibre/ebooks/metadata/imp.py b/src/calibre/ebooks/metadata/imp.py index d8991c8ea0..cfd0bed7b2 100644 --- a/src/calibre/ebooks/metadata/imp.py +++ b/src/calibre/ebooks/metadata/imp.py @@ -1,4 +1,5 @@ -from __future__ import print_function +from __future__ import absolute_import, division, print_function, unicode_literals + __license__ = 'GPL v3' __copyright__ = '2008, Ashish Kulkarni ' '''Read meta information from IMP files''' @@ -8,7 +9,7 @@ import sys from calibre.ebooks.metadata import MetaInformation, string_to_authors from polyglot.builtins import unicode_type -MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG'] +MAGIC = [b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG'] def get_metadata(stream): @@ -18,18 +19,18 @@ def get_metadata(stream): stream.seek(0) try: if stream.read(10) not in MAGIC: - print(u'Couldn\'t read IMP header from file', file=sys.stderr) + print('Couldn\'t read IMP header from file', file=sys.stderr) return mi def cString(skip=0): - result = '' + result = b'' while 1: data = stream.read(1) - if data == '\x00': + if data == b'\x00': if not skip: - return result + return result.decode('utf-8') skip -= 1 - result, data = '', '' + result, data = b'', b'' result += data stream.read(38) # skip past some uninteresting headers @@ -44,6 +45,6 @@ def get_metadata(stream): if category: mi.category = category except Exception as err: - msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err)) + msg = 'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err)) print(msg.encode('utf8'), file=sys.stderr) return mi diff --git a/src/calibre/ebooks/metadata/lrx.py b/src/calibre/ebooks/metadata/lrx.py index bfcb2a4c81..678d05f294 100644 --- a/src/calibre/ebooks/metadata/lrx.py +++ b/src/calibre/ebooks/metadata/lrx.py @@ -1,4 +1,6 @@ #!/usr/bin/env python2 +from __future__ import absolute_import, division, print_function, unicode_literals + __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' @@ -39,7 +41,7 @@ def get_metadata(f): read = lambda at, amount: _read(f, at, amount) f.seek(0) buf = f.read(12) - if buf[4:] == 'ftypLRX2': + if buf[4:] == b'ftypLRX2': offset = 0 while True: offset += word_be(buf[:4]) @@ -47,7 +49,7 @@ def get_metadata(f): buf = read(offset, 8) except: raise ValueError('Not a valid LRX file') - if buf[4:] == 'bbeb': + if buf[4:] == b'bbeb': break offset += 8 buf = read(offset, 16) @@ -80,8 +82,7 @@ def get_metadata(f): mi.language = root.find('DocInfo').find('Language').text return mi - elif buf[4:8] == 'LRX': + elif buf[4:8] == b'LRX': raise ValueError('Librie LRX format not supported') else: raise ValueError('Not a LRX file') - diff --git a/src/calibre/ebooks/metadata/pml.py b/src/calibre/ebooks/metadata/pml.py index fbacae4b17..8f579074d9 100644 --- a/src/calibre/ebooks/metadata/pml.py +++ b/src/calibre/ebooks/metadata/pml.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' @@ -24,7 +25,7 @@ def get_metadata(stream, extract_cover=True): mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) - pml = '' + pml = b'' if stream.name.endswith('.pmlz'): with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) @@ -41,22 +42,22 @@ def get_metadata(stream, extract_cover=True): if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name))) - for comment in re.findall(r'(?mus)\\v.*?\\v', pml): - m = re.search(r'TITLE="(.*?)"', comment) + for comment in re.findall(br'(?ms)\\v.*?\\v', pml): + m = re.search(br'TITLE="(.*?)"', comment) if m: mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) - m = re.search(r'AUTHOR="(.*?)"', comment) + m = re.search(br'AUTHOR="(.*?)"', comment) if m: if mi.authors == [_('Unknown')]: mi.authors = [] mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))) - m = re.search(r'PUBLISHER="(.*?)"', comment) + m = re.search(br'PUBLISHER="(.*?)"', comment) if m: mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) - m = re.search(r'COPYRIGHT="(.*?)"', comment) + m = re.search(br'COPYRIGHT="(.*?)"', comment) if m: mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) - m = re.search(r'ISBN="(.*?)"', comment) + m = re.search(br'ISBN="(.*?)"', comment) if m: mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))