diff --git a/src/calibre/ebooks/metadata/rb.py b/src/calibre/ebooks/metadata/rb.py index a193ae44d9..251faeb00f 100644 --- a/src/calibre/ebooks/metadata/rb.py +++ b/src/calibre/ebooks/metadata/rb.py @@ -5,10 +5,11 @@ __copyright__ = '2008, Ashish Kulkarni ' import sys, struct +from calibre import prints from calibre.ebooks.metadata import MetaInformation, string_to_authors from polyglot.builtins import unicode_type -MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00' +MAGIC = b'\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00' def get_metadata(stream): @@ -37,7 +38,7 @@ def get_metadata(stream): return mi stream.seek(offset) - info = stream.read(length).splitlines() + info = stream.read(length).decode('utf-8', 'replace').splitlines() for line in info: if '=' not in line: continue @@ -45,10 +46,9 @@ def get_metadata(stream): if key.strip() == 'TITLE': mi.title = value.strip() elif key.strip() == 'AUTHOR': - mi.author = value mi.authors = string_to_authors(value) except Exception as err: msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode_type(err)) - print(msg.encode('utf8'), file=sys.stderr) + prints(msg, file=sys.stderr) raise return mi diff --git a/src/calibre/ebooks/rb/__init__.py b/src/calibre/ebooks/rb/__init__.py index acf9c04995..f45b966dad 100644 --- a/src/calibre/ebooks/rb/__init__.py +++ b/src/calibre/ebooks/rb/__init__.py @@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en' import os -HEADER = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00' +HEADER = b'\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00' class RocketBookError(Exception): diff --git a/src/calibre/ebooks/rb/reader.py b/src/calibre/ebooks/rb/reader.py index da5330c7f1..0d111107fa 100644 --- a/src/calibre/ebooks/rb/reader.py +++ b/src/calibre/ebooks/rb/reader.py @@ -64,7 +64,7 @@ class Reader(object): toc = RBToc() for i in range(pages): - name = unquote(self.stream.read(32).strip('\x00')) + name = unquote(self.stream.read(32).strip(b'\x00')) size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32() toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags)) @@ -90,7 +90,7 @@ class Reader(object): else: output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace') - with open(os.path.join(output_dir, toc_item.name), 'wb') as html: + with open(os.path.join(output_dir, toc_item.name.decode('utf-8')), 'wb') as html: html.write(output.replace('', '<TITLE> ').encode('utf-8')) def get_image(self, toc_item, output_dir): @@ -100,7 +100,7 @@ class Reader(object): self.stream.seek(toc_item.offset) data = self.stream.read(toc_item.size) - with open(os.path.join(output_dir, toc_item.name), 'wb') as img: + with open(os.path.join(output_dir, toc_item.name.decode('utf-8')), 'wb') as img: img.write(data) def extract_content(self, output_dir): @@ -109,13 +109,14 @@ class Reader(object): images = [] for item in self.toc: - if item.name.lower().endswith('html'): - self.log.debug('HTML item %s found...' % item.name) - html.append(item.name) + iname = item.name.decode('utf-8') + if iname.lower().endswith('html'): + self.log.debug('HTML item %s found...' % iname) + html.append(iname) self.get_text(item, output_dir) - if item.name.lower().endswith('png'): - self.log.debug('PNG item %s found...' % item.name) - images.append(item.name) + if iname.lower().endswith('png'): + self.log.debug('PNG item %s found...' % iname) + images.append(iname) self.get_image(item, output_dir) opf_path = self.create_opf(output_dir, html, images) diff --git a/src/calibre/ebooks/rb/writer.py b/src/calibre/ebooks/rb/writer.py index dc83476b39..9f4818725f 100644 --- a/src/calibre/ebooks/rb/writer.py +++ b/src/calibre/ebooks/rb/writer.py @@ -73,13 +73,13 @@ class RBWriter(object): out_stream.write(struct.pack('<I', page_count)) offset = out_stream.tell() + (len(toc_items) * 44) for item in toc_items: - out_stream.write(item.name) + out_stream.write(item.name.encode('utf-8')) out_stream.write(struct.pack('<I', item.size)) out_stream.write(struct.pack('<I', offset)) out_stream.write(struct.pack('<I', item.flags)) offset += item.size - out_stream.write(info[0][1]) + out_stream.write(info[0][1].encode('utf-8')) self.log.debug('Writing compressed RB HTHML...') # Compressed text with proper heading @@ -92,7 +92,10 @@ class RBWriter(object): self.log.debug('Writing images...') for item in hidx+images: - out_stream.write(item[1]) + w = item[1] + if not isinstance(w, bytes): + w = w.encode('utf-8') + out_stream.write(w) total_size = out_stream.tell() out_stream.seek(0x1c) @@ -104,7 +107,7 @@ class RBWriter(object): size = len(text) pages = [] - for i in range(0, (len(text) + TEXT_RECORD_SIZE-1) / TEXT_RECORD_SIZE): + for i in range(0, (len(text) + TEXT_RECORD_SIZE-1) // TEXT_RECORD_SIZE): zobj = zlib.compressobj(9, zlib.DEFLATED, 13, 8, 0) pages.append(zobj.compress(text[i * TEXT_RECORD_SIZE : (i * TEXT_RECORD_SIZE) + TEXT_RECORD_SIZE]) + zobj.flush())