mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Merge branch 'py3' of https://github.com/eli-schwartz/calibre
This commit is contained in:
commit
0e1a89c338
@ -79,17 +79,17 @@ class MobiReader(object):
|
|||||||
stream = open(filename_or_stream, 'rb')
|
stream = open(filename_or_stream, 'rb')
|
||||||
|
|
||||||
raw = stream.read()
|
raw = stream.read()
|
||||||
if raw.startswith('TPZ'):
|
if raw.startswith(b'TPZ'):
|
||||||
raise TopazError(_('This is an Amazon Topaz book. It cannot be processed.'))
|
raise TopazError(_('This is an Amazon Topaz book. It cannot be processed.'))
|
||||||
if raw.startswith(b'\xeaDRMION\xee'):
|
if raw.startswith(b'\xeaDRMION\xee'):
|
||||||
raise KFXError()
|
raise KFXError()
|
||||||
|
|
||||||
self.header = raw[0:72]
|
self.header = raw[0:72]
|
||||||
self.name = self.header[:32].replace('\x00', '')
|
self.name = self.header[:32].replace(b'\x00', b'')
|
||||||
self.num_sections, = struct.unpack('>H', raw[76:78])
|
self.num_sections, = struct.unpack('>H', raw[76:78])
|
||||||
|
|
||||||
self.ident = self.header[0x3C:0x3C + 8].upper()
|
self.ident = self.header[0x3C:0x3C + 8].upper()
|
||||||
if self.ident not in ['BOOKMOBI', 'TEXTREAD']:
|
if self.ident not in [b'BOOKMOBI', b'TEXTREAD']:
|
||||||
raise MobiError('Unknown book type: %s' % repr(self.ident))
|
raise MobiError('Unknown book type: %s' % repr(self.ident))
|
||||||
|
|
||||||
self.sections = []
|
self.sections = []
|
||||||
@ -750,7 +750,7 @@ class MobiReader(object):
|
|||||||
def sizeof_trailing_entry(ptr, psize):
|
def sizeof_trailing_entry(ptr, psize):
|
||||||
bitpos, result = 0, 0
|
bitpos, result = 0, 0
|
||||||
while True:
|
while True:
|
||||||
v = ord(ptr[psize-1])
|
v = ord(ptr[psize-1:psize])
|
||||||
result |= (v & 0x7F) << bitpos
|
result |= (v & 0x7F) << bitpos
|
||||||
bitpos += 7
|
bitpos += 7
|
||||||
psize -= 1
|
psize -= 1
|
||||||
@ -769,7 +769,8 @@ class MobiReader(object):
|
|||||||
return 0
|
return 0
|
||||||
flags >>= 1
|
flags >>= 1
|
||||||
if self.book_header.extra_flags & 1:
|
if self.book_header.extra_flags & 1:
|
||||||
num += (ord(data[size - num - 1]) & 0x3) + 1
|
off = size - num - 1
|
||||||
|
num += (ord(data[off:off+1]) & 0x3) + 1
|
||||||
return num
|
return num
|
||||||
|
|
||||||
def warn_about_trailing_entry_corruption(self):
|
def warn_about_trailing_entry_corruption(self):
|
||||||
@ -791,7 +792,7 @@ class MobiReader(object):
|
|||||||
|
|
||||||
self.mobi_html = b''
|
self.mobi_html = b''
|
||||||
|
|
||||||
if self.book_header.compression_type == 'DH':
|
if self.book_header.compression_type == b'DH':
|
||||||
huffs = [self.sections[i][0] for i in
|
huffs = [self.sections[i][0] for i in
|
||||||
range(self.book_header.huff_offset,
|
range(self.book_header.huff_offset,
|
||||||
self.book_header.huff_offset + self.book_header.huff_number)]
|
self.book_header.huff_offset + self.book_header.huff_number)]
|
||||||
@ -800,23 +801,23 @@ class MobiReader(object):
|
|||||||
huff = HuffReader(huffs)
|
huff = HuffReader(huffs)
|
||||||
unpack = huff.unpack
|
unpack = huff.unpack
|
||||||
|
|
||||||
elif self.book_header.compression_type == '\x00\x02':
|
elif self.book_header.compression_type == b'\x00\x02':
|
||||||
unpack = decompress_doc
|
unpack = decompress_doc
|
||||||
|
|
||||||
elif self.book_header.compression_type == '\x00\x01':
|
elif self.book_header.compression_type == b'\x00\x01':
|
||||||
unpack = lambda x: x
|
unpack = lambda x: x
|
||||||
else:
|
else:
|
||||||
raise MobiError('Unknown compression algorithm: %s' % repr(self.book_header.compression_type))
|
raise MobiError('Unknown compression algorithm: %r' % self.book_header.compression_type)
|
||||||
self.mobi_html = b''.join(map(unpack, text_sections))
|
self.mobi_html = b''.join(map(unpack, text_sections))
|
||||||
if self.mobi_html.endswith(b'#'):
|
if self.mobi_html.endswith(b'#'):
|
||||||
self.mobi_html = self.mobi_html[:-1]
|
self.mobi_html = self.mobi_html[:-1]
|
||||||
|
|
||||||
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
|
if self.book_header.ancient and b'<html' not in self.mobi_html[:300].lower():
|
||||||
self.mobi_html = self.mobi_html.replace('\r ', '\n\n ')
|
self.mobi_html = self.mobi_html.replace(b'\r ', b'\n\n ')
|
||||||
self.mobi_html = self.mobi_html.replace('\0', '')
|
self.mobi_html = self.mobi_html.replace(b'\0', b'')
|
||||||
if self.book_header.codec == 'cp1252':
|
if self.book_header.codec == 'cp1252':
|
||||||
self.mobi_html = self.mobi_html.replace('\x1e', '') # record separator
|
self.mobi_html = self.mobi_html.replace(b'\x1e', b'') # record separator
|
||||||
self.mobi_html = self.mobi_html.replace('\x02', '') # start of text
|
self.mobi_html = self.mobi_html.replace(b'\x02', b'') # start of text
|
||||||
return processed_records
|
return processed_records
|
||||||
|
|
||||||
def replace_page_breaks(self):
|
def replace_page_breaks(self):
|
||||||
|
@ -24,7 +24,7 @@ from calibre.ebooks.metadata.toc import TOC
|
|||||||
from calibre.ebooks.mobi.utils import read_font_record
|
from calibre.ebooks.mobi.utils import read_font_record
|
||||||
from calibre.ebooks.oeb.parse_utils import parse_html
|
from calibre.ebooks.oeb.parse_utils import parse_html
|
||||||
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
||||||
from polyglot.builtins import range, zip
|
from polyglot.builtins import range, zip, unicode_type
|
||||||
from polyglot.urllib import urldefrag
|
from polyglot.urllib import urldefrag
|
||||||
|
|
||||||
Part = namedtuple('Part',
|
Part = namedtuple('Part',
|
||||||
@ -85,7 +85,7 @@ class Mobi8Reader(object):
|
|||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
self.mobi6_reader.check_for_drm()
|
self.mobi6_reader.check_for_drm()
|
||||||
self.aid_anchor_suffix = bytes(uuid4().hex)
|
self.aid_anchor_suffix = uuid4().hex.encode('utf-8')
|
||||||
bh = self.mobi6_reader.book_header
|
bh = self.mobi6_reader.book_header
|
||||||
if self.mobi6_reader.kf8_type == 'joint':
|
if self.mobi6_reader.kf8_type == 'joint':
|
||||||
offset = self.mobi6_reader.kf8_boundary + 2
|
offset = self.mobi6_reader.kf8_boundary + 2
|
||||||
@ -225,8 +225,7 @@ class Mobi8Reader(object):
|
|||||||
self.parts.append(skeleton)
|
self.parts.append(skeleton)
|
||||||
if divcnt < 1:
|
if divcnt < 1:
|
||||||
# Empty file
|
# Empty file
|
||||||
import uuid
|
aidtext = unicode_type(uuid4())
|
||||||
aidtext = str(uuid.uuid4())
|
|
||||||
filename = aidtext + '.html'
|
filename = aidtext + '.html'
|
||||||
self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
|
self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
|
||||||
baseptr, aidtext))
|
baseptr, aidtext))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user