This commit is contained in:
Kovid Goyal 2019-04-29 13:10:22 +05:30
commit 1fe67d919b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 22 additions and 18 deletions

View File

@ -5,10 +5,11 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
import sys, struct import sys, struct
from calibre import prints
from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type
MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00' MAGIC = b'\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
def get_metadata(stream): def get_metadata(stream):
@ -37,7 +38,7 @@ def get_metadata(stream):
return mi return mi
stream.seek(offset) stream.seek(offset)
info = stream.read(length).splitlines() info = stream.read(length).decode('utf-8', 'replace').splitlines()
for line in info: for line in info:
if '=' not in line: if '=' not in line:
continue continue
@ -45,10 +46,9 @@ def get_metadata(stream):
if key.strip() == 'TITLE': if key.strip() == 'TITLE':
mi.title = value.strip() mi.title = value.strip()
elif key.strip() == 'AUTHOR': elif key.strip() == 'AUTHOR':
mi.author = value
mi.authors = string_to_authors(value) mi.authors = string_to_authors(value)
except Exception as err: except Exception as err:
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode_type(err)) msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode_type(err))
print(msg.encode('utf8'), file=sys.stderr) prints(msg, file=sys.stderr)
raise raise
return mi return mi

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
import os import os
HEADER = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00' HEADER = b'\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
class RocketBookError(Exception): class RocketBookError(Exception):

View File

@ -64,7 +64,7 @@ class Reader(object):
toc = RBToc() toc = RBToc()
for i in range(pages): for i in range(pages):
name = unquote(self.stream.read(32).strip('\x00')) name = unquote(self.stream.read(32).strip(b'\x00'))
size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32() size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32()
toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags)) toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags))
@ -90,7 +90,7 @@ class Reader(object):
else: else:
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace') output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
with open(os.path.join(output_dir, toc_item.name), 'wb') as html: with open(os.path.join(output_dir, toc_item.name.decode('utf-8')), 'wb') as html:
html.write(output.replace('<TITLE>', '<TITLE> ').encode('utf-8')) html.write(output.replace('<TITLE>', '<TITLE> ').encode('utf-8'))
def get_image(self, toc_item, output_dir): def get_image(self, toc_item, output_dir):
@ -100,7 +100,7 @@ class Reader(object):
self.stream.seek(toc_item.offset) self.stream.seek(toc_item.offset)
data = self.stream.read(toc_item.size) data = self.stream.read(toc_item.size)
with open(os.path.join(output_dir, toc_item.name), 'wb') as img: with open(os.path.join(output_dir, toc_item.name.decode('utf-8')), 'wb') as img:
img.write(data) img.write(data)
def extract_content(self, output_dir): def extract_content(self, output_dir):
@ -109,13 +109,14 @@ class Reader(object):
images = [] images = []
for item in self.toc: for item in self.toc:
if item.name.lower().endswith('html'): iname = item.name.decode('utf-8')
self.log.debug('HTML item %s found...' % item.name) if iname.lower().endswith('html'):
html.append(item.name) self.log.debug('HTML item %s found...' % iname)
html.append(iname)
self.get_text(item, output_dir) self.get_text(item, output_dir)
if item.name.lower().endswith('png'): if iname.lower().endswith('png'):
self.log.debug('PNG item %s found...' % item.name) self.log.debug('PNG item %s found...' % iname)
images.append(item.name) images.append(iname)
self.get_image(item, output_dir) self.get_image(item, output_dir)
opf_path = self.create_opf(output_dir, html, images) opf_path = self.create_opf(output_dir, html, images)

View File

@ -73,13 +73,13 @@ class RBWriter(object):
out_stream.write(struct.pack('<I', page_count)) out_stream.write(struct.pack('<I', page_count))
offset = out_stream.tell() + (len(toc_items) * 44) offset = out_stream.tell() + (len(toc_items) * 44)
for item in toc_items: for item in toc_items:
out_stream.write(item.name) out_stream.write(item.name.encode('utf-8'))
out_stream.write(struct.pack('<I', item.size)) out_stream.write(struct.pack('<I', item.size))
out_stream.write(struct.pack('<I', offset)) out_stream.write(struct.pack('<I', offset))
out_stream.write(struct.pack('<I', item.flags)) out_stream.write(struct.pack('<I', item.flags))
offset += item.size offset += item.size
out_stream.write(info[0][1]) out_stream.write(info[0][1].encode('utf-8'))
self.log.debug('Writing compressed RB HTHML...') self.log.debug('Writing compressed RB HTHML...')
# Compressed text with proper heading # Compressed text with proper heading
@ -92,7 +92,10 @@ class RBWriter(object):
self.log.debug('Writing images...') self.log.debug('Writing images...')
for item in hidx+images: for item in hidx+images:
out_stream.write(item[1]) w = item[1]
if not isinstance(w, bytes):
w = w.encode('utf-8')
out_stream.write(w)
total_size = out_stream.tell() total_size = out_stream.tell()
out_stream.seek(0x1c) out_stream.seek(0x1c)
@ -104,7 +107,7 @@ class RBWriter(object):
size = len(text) size = len(text)
pages = [] pages = []
for i in range(0, (len(text) + TEXT_RECORD_SIZE-1) / TEXT_RECORD_SIZE): for i in range(0, (len(text) + TEXT_RECORD_SIZE-1) // TEXT_RECORD_SIZE):
zobj = zlib.compressobj(9, zlib.DEFLATED, 13, 8, 0) zobj = zlib.compressobj(9, zlib.DEFLATED, 13, 8, 0)
pages.append(zobj.compress(text[i * TEXT_RECORD_SIZE : (i * TEXT_RECORD_SIZE) + TEXT_RECORD_SIZE]) + zobj.flush()) pages.append(zobj.compress(text[i * TEXT_RECORD_SIZE : (i * TEXT_RECORD_SIZE) + TEXT_RECORD_SIZE]) + zobj.flush())