This commit is contained in:
Kovid Goyal 2019-04-11 08:58:25 +05:30
commit 0e1a89c338
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 18 additions and 18 deletions

View File

@ -79,17 +79,17 @@ class MobiReader(object):
stream = open(filename_or_stream, 'rb') stream = open(filename_or_stream, 'rb')
raw = stream.read() raw = stream.read()
if raw.startswith('TPZ'): if raw.startswith(b'TPZ'):
raise TopazError(_('This is an Amazon Topaz book. It cannot be processed.')) raise TopazError(_('This is an Amazon Topaz book. It cannot be processed.'))
if raw.startswith(b'\xeaDRMION\xee'): if raw.startswith(b'\xeaDRMION\xee'):
raise KFXError() raise KFXError()
self.header = raw[0:72] self.header = raw[0:72]
self.name = self.header[:32].replace('\x00', '') self.name = self.header[:32].replace(b'\x00', b'')
self.num_sections, = struct.unpack('>H', raw[76:78]) self.num_sections, = struct.unpack('>H', raw[76:78])
self.ident = self.header[0x3C:0x3C + 8].upper() self.ident = self.header[0x3C:0x3C + 8].upper()
if self.ident not in ['BOOKMOBI', 'TEXTREAD']: if self.ident not in [b'BOOKMOBI', b'TEXTREAD']:
raise MobiError('Unknown book type: %s' % repr(self.ident)) raise MobiError('Unknown book type: %s' % repr(self.ident))
self.sections = [] self.sections = []
@ -750,7 +750,7 @@ class MobiReader(object):
def sizeof_trailing_entry(ptr, psize): def sizeof_trailing_entry(ptr, psize):
bitpos, result = 0, 0 bitpos, result = 0, 0
while True: while True:
v = ord(ptr[psize-1]) v = ord(ptr[psize-1:psize])
result |= (v & 0x7F) << bitpos result |= (v & 0x7F) << bitpos
bitpos += 7 bitpos += 7
psize -= 1 psize -= 1
@ -769,7 +769,8 @@ class MobiReader(object):
return 0 return 0
flags >>= 1 flags >>= 1
if self.book_header.extra_flags & 1: if self.book_header.extra_flags & 1:
num += (ord(data[size - num - 1]) & 0x3) + 1 off = size - num - 1
num += (ord(data[off:off+1]) & 0x3) + 1
return num return num
def warn_about_trailing_entry_corruption(self): def warn_about_trailing_entry_corruption(self):
@ -791,7 +792,7 @@ class MobiReader(object):
self.mobi_html = b'' self.mobi_html = b''
if self.book_header.compression_type == 'DH': if self.book_header.compression_type == b'DH':
huffs = [self.sections[i][0] for i in huffs = [self.sections[i][0] for i in
range(self.book_header.huff_offset, range(self.book_header.huff_offset,
self.book_header.huff_offset + self.book_header.huff_number)] self.book_header.huff_offset + self.book_header.huff_number)]
@ -800,23 +801,23 @@ class MobiReader(object):
huff = HuffReader(huffs) huff = HuffReader(huffs)
unpack = huff.unpack unpack = huff.unpack
elif self.book_header.compression_type == '\x00\x02': elif self.book_header.compression_type == b'\x00\x02':
unpack = decompress_doc unpack = decompress_doc
elif self.book_header.compression_type == '\x00\x01': elif self.book_header.compression_type == b'\x00\x01':
unpack = lambda x: x unpack = lambda x: x
else: else:
raise MobiError('Unknown compression algorithm: %s' % repr(self.book_header.compression_type)) raise MobiError('Unknown compression algorithm: %r' % self.book_header.compression_type)
self.mobi_html = b''.join(map(unpack, text_sections)) self.mobi_html = b''.join(map(unpack, text_sections))
if self.mobi_html.endswith(b'#'): if self.mobi_html.endswith(b'#'):
self.mobi_html = self.mobi_html[:-1] self.mobi_html = self.mobi_html[:-1]
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower(): if self.book_header.ancient and b'<html' not in self.mobi_html[:300].lower():
self.mobi_html = self.mobi_html.replace('\r ', '\n\n ') self.mobi_html = self.mobi_html.replace(b'\r ', b'\n\n ')
self.mobi_html = self.mobi_html.replace('\0', '') self.mobi_html = self.mobi_html.replace(b'\0', b'')
if self.book_header.codec == 'cp1252': if self.book_header.codec == 'cp1252':
self.mobi_html = self.mobi_html.replace('\x1e', '') # record separator self.mobi_html = self.mobi_html.replace(b'\x1e', b'') # record separator
self.mobi_html = self.mobi_html.replace('\x02', '') # start of text self.mobi_html = self.mobi_html.replace(b'\x02', b'') # start of text
return processed_records return processed_records
def replace_page_breaks(self): def replace_page_breaks(self):

View File

@ -24,7 +24,7 @@ from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.mobi.utils import read_font_record from calibre.ebooks.mobi.utils import read_font_record
from calibre.ebooks.oeb.parse_utils import parse_html from calibre.ebooks.oeb.parse_utils import parse_html
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
from polyglot.builtins import range, zip from polyglot.builtins import range, zip, unicode_type
from polyglot.urllib import urldefrag from polyglot.urllib import urldefrag
Part = namedtuple('Part', Part = namedtuple('Part',
@ -85,7 +85,7 @@ class Mobi8Reader(object):
def __call__(self): def __call__(self):
self.mobi6_reader.check_for_drm() self.mobi6_reader.check_for_drm()
self.aid_anchor_suffix = bytes(uuid4().hex) self.aid_anchor_suffix = uuid4().hex.encode('utf-8')
bh = self.mobi6_reader.book_header bh = self.mobi6_reader.book_header
if self.mobi6_reader.kf8_type == 'joint': if self.mobi6_reader.kf8_type == 'joint':
offset = self.mobi6_reader.kf8_boundary + 2 offset = self.mobi6_reader.kf8_boundary + 2
@ -225,8 +225,7 @@ class Mobi8Reader(object):
self.parts.append(skeleton) self.parts.append(skeleton)
if divcnt < 1: if divcnt < 1:
# Empty file # Empty file
import uuid aidtext = unicode_type(uuid4())
aidtext = str(uuid.uuid4())
filename = aidtext + '.html' filename = aidtext + '.html'
self.partinfo.append(Part(skelnum, 'text', filename, skelpos, self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
baseptr, aidtext)) baseptr, aidtext))