mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: fix bytes handling by using bytes as needed
This commit is contained in:
parent
2da800aa6e
commit
7993757099
@ -30,7 +30,7 @@ def archive_type(stream):
|
|||||||
ans = None
|
ans = None
|
||||||
if id_ == stringFileHeader:
|
if id_ == stringFileHeader:
|
||||||
ans = 'zip'
|
ans = 'zip'
|
||||||
elif id_.startswith('Rar'):
|
elif id_.startswith(b'Rar'):
|
||||||
ans = 'rar'
|
ans = 'rar'
|
||||||
try:
|
try:
|
||||||
stream.seek(pos)
|
stream.seek(pos)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Read meta information from eReader pdb files.
|
Read meta information from eReader pdb files.
|
||||||
@ -24,7 +25,7 @@ def get_cover(pheader, eheader):
|
|||||||
for i in range(eheader.image_count):
|
for i in range(eheader.image_count):
|
||||||
raw = pheader.section_data(eheader.image_data_offset + i)
|
raw = pheader.section_data(eheader.image_data_offset + i)
|
||||||
|
|
||||||
if raw[4:4 + 32].strip('\x00') == 'cover.png':
|
if raw[4:4 + 32].strip(b'\x00') == b'cover.png':
|
||||||
cover_data = raw[62:]
|
cover_data = raw[62:]
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -48,7 +49,7 @@ def get_metadata(stream, extract_cover=True):
|
|||||||
try:
|
try:
|
||||||
mdata = pheader.section_data(hr.metadata_offset)
|
mdata = pheader.section_data(hr.metadata_offset)
|
||||||
|
|
||||||
mdata = mdata.split('\x00')
|
mdata = mdata.decode('utf-8').split('\x00')
|
||||||
mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
|
mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
|
||||||
mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
|
mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
|
||||||
mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
|
mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
|
||||||
@ -80,7 +81,7 @@ def set_metadata(stream, mi):
|
|||||||
|
|
||||||
# Create a metadata record for the file if one does not alreay exist
|
# Create a metadata record for the file if one does not alreay exist
|
||||||
if not hr.has_metadata:
|
if not hr.has_metadata:
|
||||||
sections += ['', 'MeTaInFo\x00']
|
sections += [b'', b'MeTaInFo\x00']
|
||||||
last_data = len(sections) - 1
|
last_data = len(sections) - 1
|
||||||
|
|
||||||
for i in range(0, 132, 2):
|
for i in range(0, 132, 2):
|
||||||
@ -95,8 +96,8 @@ def set_metadata(stream, mi):
|
|||||||
# Merge the metadata into the file
|
# Merge the metadata into the file
|
||||||
file_mi = get_metadata(stream, False)
|
file_mi = get_metadata(stream, False)
|
||||||
file_mi.smart_update(mi)
|
file_mi.smart_update(mi)
|
||||||
sections[hr.metadata_offset] = '%s\x00%s\x00%s\x00%s\x00%s\x00' % \
|
sections[hr.metadata_offset] = ('%s\x00%s\x00%s\x00%s\x00%s\x00' % \
|
||||||
(file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)
|
(file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)).encode('utf-8')
|
||||||
|
|
||||||
# Rebuild the PDB wrapper because the offsets have changed due to the
|
# Rebuild the PDB wrapper because the offsets have changed due to the
|
||||||
# new metadata.
|
# new metadata.
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Read meta information from Haodoo.net pdb files.
|
Read meta information from Haodoo.net pdb files.
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from __future__ import print_function
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
|
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
|
||||||
'''Read meta information from IMP files'''
|
'''Read meta information from IMP files'''
|
||||||
@ -8,7 +9,7 @@ import sys
|
|||||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']
|
MAGIC = [b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG']
|
||||||
|
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
@ -18,18 +19,18 @@ def get_metadata(stream):
|
|||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
try:
|
try:
|
||||||
if stream.read(10) not in MAGIC:
|
if stream.read(10) not in MAGIC:
|
||||||
print(u'Couldn\'t read IMP header from file', file=sys.stderr)
|
print('Couldn\'t read IMP header from file', file=sys.stderr)
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
def cString(skip=0):
|
def cString(skip=0):
|
||||||
result = ''
|
result = b''
|
||||||
while 1:
|
while 1:
|
||||||
data = stream.read(1)
|
data = stream.read(1)
|
||||||
if data == '\x00':
|
if data == b'\x00':
|
||||||
if not skip:
|
if not skip:
|
||||||
return result
|
return result.decode('utf-8')
|
||||||
skip -= 1
|
skip -= 1
|
||||||
result, data = '', ''
|
result, data = b'', b''
|
||||||
result += data
|
result += data
|
||||||
|
|
||||||
stream.read(38) # skip past some uninteresting headers
|
stream.read(38) # skip past some uninteresting headers
|
||||||
@ -44,6 +45,6 @@ def get_metadata(stream):
|
|||||||
if category:
|
if category:
|
||||||
mi.category = category
|
mi.category = category
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
|
msg = 'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
|
||||||
print(msg.encode('utf8'), file=sys.stderr)
|
print(msg.encode('utf8'), file=sys.stderr)
|
||||||
return mi
|
return mi
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
@ -39,7 +41,7 @@ def get_metadata(f):
|
|||||||
read = lambda at, amount: _read(f, at, amount)
|
read = lambda at, amount: _read(f, at, amount)
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
buf = f.read(12)
|
buf = f.read(12)
|
||||||
if buf[4:] == 'ftypLRX2':
|
if buf[4:] == b'ftypLRX2':
|
||||||
offset = 0
|
offset = 0
|
||||||
while True:
|
while True:
|
||||||
offset += word_be(buf[:4])
|
offset += word_be(buf[:4])
|
||||||
@ -47,7 +49,7 @@ def get_metadata(f):
|
|||||||
buf = read(offset, 8)
|
buf = read(offset, 8)
|
||||||
except:
|
except:
|
||||||
raise ValueError('Not a valid LRX file')
|
raise ValueError('Not a valid LRX file')
|
||||||
if buf[4:] == 'bbeb':
|
if buf[4:] == b'bbeb':
|
||||||
break
|
break
|
||||||
offset += 8
|
offset += 8
|
||||||
buf = read(offset, 16)
|
buf = read(offset, 16)
|
||||||
@ -80,8 +82,7 @@ def get_metadata(f):
|
|||||||
mi.language = root.find('DocInfo').find('Language').text
|
mi.language = root.find('DocInfo').find('Language').text
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
elif buf[4:8] == 'LRX':
|
elif buf[4:8] == b'LRX':
|
||||||
raise ValueError('Librie LRX format not supported')
|
raise ValueError('Librie LRX format not supported')
|
||||||
else:
|
else:
|
||||||
raise ValueError('Not a LRX file')
|
raise ValueError('Not a LRX file')
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -24,7 +25,7 @@ def get_metadata(stream, extract_cover=True):
|
|||||||
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
|
|
||||||
pml = ''
|
pml = b''
|
||||||
if stream.name.endswith('.pmlz'):
|
if stream.name.endswith('.pmlz'):
|
||||||
with TemporaryDirectory('_unpmlz') as tdir:
|
with TemporaryDirectory('_unpmlz') as tdir:
|
||||||
zf = ZipFile(stream)
|
zf = ZipFile(stream)
|
||||||
@ -41,22 +42,22 @@ def get_metadata(stream, extract_cover=True):
|
|||||||
if extract_cover:
|
if extract_cover:
|
||||||
mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name)))
|
mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name)))
|
||||||
|
|
||||||
for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
|
for comment in re.findall(br'(?ms)\\v.*?\\v', pml):
|
||||||
m = re.search(r'TITLE="(.*?)"', comment)
|
m = re.search(br'TITLE="(.*?)"', comment)
|
||||||
if m:
|
if m:
|
||||||
mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
|
mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
|
||||||
m = re.search(r'AUTHOR="(.*?)"', comment)
|
m = re.search(br'AUTHOR="(.*?)"', comment)
|
||||||
if m:
|
if m:
|
||||||
if mi.authors == [_('Unknown')]:
|
if mi.authors == [_('Unknown')]:
|
||||||
mi.authors = []
|
mi.authors = []
|
||||||
mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))))
|
mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))))
|
||||||
m = re.search(r'PUBLISHER="(.*?)"', comment)
|
m = re.search(br'PUBLISHER="(.*?)"', comment)
|
||||||
if m:
|
if m:
|
||||||
mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
|
mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
|
||||||
m = re.search(r'COPYRIGHT="(.*?)"', comment)
|
m = re.search(br'COPYRIGHT="(.*?)"', comment)
|
||||||
if m:
|
if m:
|
||||||
mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
|
mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
|
||||||
m = re.search(r'ISBN="(.*?)"', comment)
|
m = re.search(br'ISBN="(.*?)"', comment)
|
||||||
if m:
|
if m:
|
||||||
mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
|
mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user