mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
py3: Port LIT Output
This commit is contained in:
parent
73f58e6868
commit
c569f857bb
@ -1,9 +1,9 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
"""
|
||||
Modified version of SHA-1 used in Microsoft LIT files.
|
||||
|
||||
Adapted from the PyPy pure-Python SHA-1 implementation.
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
@ -28,44 +28,36 @@ def _long2bytesBigEndian(n, blocksize=0):
|
||||
"""
|
||||
|
||||
# After much testing, this algorithm was deemed to be the fastest.
|
||||
s = ''
|
||||
s = b''
|
||||
pack = struct.pack
|
||||
while n > 0:
|
||||
s = pack('>I', n & 0xffffffff) + s
|
||||
n = n >> 32
|
||||
|
||||
# Strip off leading zeros.
|
||||
for i in range(len(s)):
|
||||
if s[i] != '\000':
|
||||
break
|
||||
else:
|
||||
# Only happens when n == 0.
|
||||
s = '\000'
|
||||
i = 0
|
||||
|
||||
s = s[i:]
|
||||
s = s.lstrip(b'\0')
|
||||
|
||||
# Add back some pad bytes. This could be done more efficiently
|
||||
# w.r.t. the de-padding being done above, but sigh...
|
||||
if blocksize > 0 and len(s) % blocksize:
|
||||
s = (blocksize - len(s) % blocksize) * '\000' + s
|
||||
s = (blocksize - len(s) % blocksize) * b'\000' + s
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def _bytelist2longBigEndian(list):
|
||||
def _bytelist2longBigEndian(blist):
|
||||
"Transform a list of characters into a list of longs."
|
||||
|
||||
imax = len(list)/4
|
||||
imax = len(blist)//4
|
||||
hl = [0] * imax
|
||||
|
||||
j = 0
|
||||
i = 0
|
||||
while i < imax:
|
||||
b0 = long_type(ord(list[j])) << 24
|
||||
b1 = long_type(ord(list[j+1])) << 16
|
||||
b2 = long_type(ord(list[j+2])) << 8
|
||||
b3 = long_type(ord(list[j+3]))
|
||||
b0 = long_type(blist[j]) << 24
|
||||
b1 = long_type(blist[j+1]) << 16
|
||||
b2 = long_type(blist[j+2]) << 8
|
||||
b3 = long_type(blist[j+3])
|
||||
hl[i] = b0 | b1 | b2 | b3
|
||||
i = i+1
|
||||
j = j+4
|
||||
@ -140,7 +132,7 @@ class mssha1(object):
|
||||
self.count = [0, 0]
|
||||
|
||||
# Initial empty message as a sequence of bytes (8 bit characters).
|
||||
self.input = []
|
||||
self.input = bytearray()
|
||||
|
||||
# Call a separate init function, that can be used repeatedly
|
||||
# to start from scratch on the same object.
|
||||
@ -172,7 +164,7 @@ class mssha1(object):
|
||||
E = self.H4
|
||||
|
||||
for t in range(0, 80):
|
||||
TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
|
||||
TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t//20]
|
||||
E = D
|
||||
D = C
|
||||
C = _rotateLeft(B, 30) & 0xffffffff
|
||||
@ -204,6 +196,7 @@ class mssha1(object):
|
||||
to the hashed string.
|
||||
"""
|
||||
|
||||
inBuf = bytearray(inBuf)
|
||||
leninBuf = long_type(len(inBuf))
|
||||
|
||||
# Compute number of bytes mod 64.
|
||||
@ -218,17 +211,17 @@ class mssha1(object):
|
||||
partLen = 64 - index
|
||||
|
||||
if leninBuf >= partLen:
|
||||
self.input[index:] = list(inBuf[:partLen])
|
||||
self.input[index:] = inBuf[:partLen]
|
||||
self._transform(_bytelist2longBigEndian(self.input))
|
||||
i = partLen
|
||||
while i + 63 < leninBuf:
|
||||
self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
|
||||
self._transform(_bytelist2longBigEndian(inBuf[i:i+64]))
|
||||
i = i + 64
|
||||
else:
|
||||
self.input = list(inBuf[i:leninBuf])
|
||||
self.input = inBuf[i:leninBuf]
|
||||
else:
|
||||
i = 0
|
||||
self.input = self.input + list(inBuf)
|
||||
self.input = self.input + inBuf
|
||||
|
||||
def digest(self):
|
||||
"""Terminate the message-digest computation and return digest.
|
||||
@ -243,7 +236,7 @@ class mssha1(object):
|
||||
H2 = self.H2
|
||||
H3 = self.H3
|
||||
H4 = self.H4
|
||||
input = [] + self.input
|
||||
inp = bytearray(self.input)
|
||||
count = [] + self.count
|
||||
|
||||
index = (self.count[1] >> 3) & 0x3f
|
||||
@ -253,7 +246,7 @@ class mssha1(object):
|
||||
else:
|
||||
padLen = 120 - index
|
||||
|
||||
padding = ['\200'] + ['\000'] * 63
|
||||
padding = b'\200' + (b'\000' * 63)
|
||||
self.update(padding[:padLen])
|
||||
|
||||
# Append length (before padding).
|
||||
@ -273,7 +266,7 @@ class mssha1(object):
|
||||
self.H2 = H2
|
||||
self.H3 = H3
|
||||
self.H4 = H4
|
||||
self.input = input
|
||||
self.input = inp
|
||||
self.count = count
|
||||
|
||||
return digest
|
||||
@ -286,7 +279,7 @@ class mssha1(object):
|
||||
used to exchange the value safely in email or other non-
|
||||
binary environments.
|
||||
"""
|
||||
return ''.join(['%02x' % ord(c) for c in self.digest()])
|
||||
return ''.join(['%02x' % c for c in bytearray(self.digest())])
|
||||
|
||||
def copy(self):
|
||||
"""Return a clone object.
|
||||
|
@ -1,14 +1,14 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
'''
|
||||
Basic support for writing LIT files.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
from __future__ import print_function
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
from struct import pack
|
||||
from itertools import count, chain
|
||||
from operator import attrgetter
|
||||
import io
|
||||
import time
|
||||
import random
|
||||
@ -30,7 +30,7 @@ import calibre
|
||||
from calibre import plugins
|
||||
msdes, msdeserror = plugins['msdes']
|
||||
import calibre.ebooks.lit.mssha1 as mssha1
|
||||
from polyglot.builtins import codepoint_to_chr, unicode_type, string_or_bytes, range, zip
|
||||
from polyglot.builtins import codepoint_to_chr, unicode_type, string_or_bytes, range, zip, native_string_type
|
||||
from polyglot.urllib import urldefrag, unquote
|
||||
|
||||
__all__ = ['LitWriter']
|
||||
@ -62,7 +62,7 @@ def invert_tag_map(tag_map):
|
||||
OPF_MAP = invert_tag_map(maps.OPF_MAP)
|
||||
HTML_MAP = invert_tag_map(maps.HTML_MAP)
|
||||
|
||||
LIT_MAGIC = 'ITOLITLS'
|
||||
LIT_MAGIC = b'ITOLITLS'
|
||||
|
||||
LITFILE_GUID = "{0A9007C1-4076-11D3-8789-0000F8105754}"
|
||||
PIECE3_GUID = "{0A9007C3-4076-11D3-8789-0000F8105754}"
|
||||
@ -97,24 +97,24 @@ ROOT_OFFSET = 1284508585713721976
|
||||
ROOT_SIZE = 4165955342166943123
|
||||
|
||||
BLOCK_CAOL = \
|
||||
"\x43\x41\x4f\x4c\x02\x00\x00\x00" \
|
||||
"\x50\x00\x00\x00\x37\x13\x03\x00" \
|
||||
"\x00\x00\x00\x00\x00\x20\x00\x00" \
|
||||
"\x00\x02\x00\x00\x00\x00\x10\x00" \
|
||||
"\x00\x00\x02\x00\x00\x00\x00\x00" \
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
b"\x43\x41\x4f\x4c\x02\x00\x00\x00" \
|
||||
b"\x50\x00\x00\x00\x37\x13\x03\x00" \
|
||||
b"\x00\x00\x00\x00\x00\x20\x00\x00" \
|
||||
b"\x00\x02\x00\x00\x00\x00\x10\x00" \
|
||||
b"\x00\x00\x02\x00\x00\x00\x00\x00" \
|
||||
b"\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
BLOCK_ITSF = \
|
||||
"\x49\x54\x53\x46\x04\x00\x00\x00" \
|
||||
"\x20\x00\x00\x00\x01\x00\x00\x00"
|
||||
b"\x49\x54\x53\x46\x04\x00\x00\x00" \
|
||||
b"\x20\x00\x00\x00\x01\x00\x00\x00"
|
||||
|
||||
MSDES_CONTROL = \
|
||||
"\x03\x00\x00\x00\x29\x17\x00\x00" \
|
||||
"\x01\x00\x00\x00\xa5\xa5\x00\x00"
|
||||
b"\x03\x00\x00\x00\x29\x17\x00\x00" \
|
||||
b"\x01\x00\x00\x00\xa5\xa5\x00\x00"
|
||||
LZXC_CONTROL = \
|
||||
"\x07\x00\x00\x00\x4c\x5a\x58\x43" \
|
||||
"\x03\x00\x00\x00\x04\x00\x00\x00" \
|
||||
"\x04\x00\x00\x00\x02\x00\x00\x00" \
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
b"\x07\x00\x00\x00\x4c\x5a\x58\x43" \
|
||||
b"\x03\x00\x00\x00\x04\x00\x00\x00" \
|
||||
b"\x04\x00\x00\x00\x02\x00\x00\x00" \
|
||||
b"\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
|
||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
@ -122,16 +122,16 @@ PAGE_BREAKS = {'always', 'left', 'right'}
|
||||
|
||||
|
||||
def decint(value):
|
||||
bytes = []
|
||||
ans = bytearray()
|
||||
while True:
|
||||
b = value & 0x7f
|
||||
value >>= 7
|
||||
if bytes:
|
||||
if len(ans):
|
||||
b |= 0x80
|
||||
bytes.append(chr(b))
|
||||
ans.append(b)
|
||||
if value == 0:
|
||||
break
|
||||
return ''.join(reversed(bytes))
|
||||
return bytes(bytearray(reversed(ans)))
|
||||
|
||||
|
||||
def randbytes(n):
|
||||
@ -366,7 +366,7 @@ class LitWriter(object):
|
||||
self._write(packguid(LITFILE_GUID))
|
||||
offset = self._tell()
|
||||
pieces = list(range(offset, offset + (PIECE_SIZE * 5), PIECE_SIZE))
|
||||
self._write((5 * PIECE_SIZE) * '\0')
|
||||
self._write((5 * PIECE_SIZE) * b'\0')
|
||||
aoli1 = len(dchunks) if ichunk else ULL_NEG1
|
||||
last = len(dchunks) - 1
|
||||
ddepth = 2 if ichunk else 1
|
||||
@ -391,7 +391,7 @@ class LitWriter(object):
|
||||
# Piece #1: Directory chunks
|
||||
piece1_offset = self._tell()
|
||||
number = len(dchunks) + ((ichunk and 1) or 0)
|
||||
self._write('IFCM', pack('<IIIQQ',
|
||||
self._write(b'IFCM', pack('<IIIQQ',
|
||||
1, DCHUNK_SIZE, 0x100000, ULL_NEG1, number))
|
||||
for dchunk in dchunks:
|
||||
self._write(dchunk)
|
||||
@ -402,7 +402,7 @@ class LitWriter(object):
|
||||
|
||||
# Piece #2: Count chunks
|
||||
piece2_offset = self._tell()
|
||||
self._write('IFCM', pack('<IIIQQ',
|
||||
self._write(b'IFCM', pack('<IIIQQ',
|
||||
1, CCHUNK_SIZE, 0x20000, ULL_NEG1, 1))
|
||||
cchunk = io.BytesIO()
|
||||
last = 0
|
||||
@ -413,9 +413,9 @@ class LitWriter(object):
|
||||
last = dcount
|
||||
cchunk = cchunk.getvalue()
|
||||
rem = CCHUNK_SIZE - (len(cchunk) + 50)
|
||||
self._write('AOLL', pack('<IQQQQQ',
|
||||
self._write(b'AOLL', pack('<IQQQQQ',
|
||||
rem, 0, ULL_NEG1, ULL_NEG1, 0, 1))
|
||||
filler = '\0' * rem
|
||||
filler = b'\0' * rem
|
||||
self._write(cchunk, filler, pack('<H', len(dcounts)))
|
||||
self._writeat(pieces[2], pack('<QQ',
|
||||
piece2_offset, self._tell() - piece2_offset))
|
||||
@ -491,7 +491,7 @@ class LitWriter(object):
|
||||
elif isinstance(data, unicode_type):
|
||||
data = data.encode('utf-8')
|
||||
elif hasattr(data, 'cssText'):
|
||||
data = str(item)
|
||||
data = item.bytes_representation
|
||||
self._add_file(name, data, secnum)
|
||||
item.size = len(data)
|
||||
|
||||
@ -507,10 +507,10 @@ class LitWriter(object):
|
||||
elif item.media_type in LIT_IMAGES:
|
||||
manifest['images'].append(item)
|
||||
data = io.BytesIO()
|
||||
data.write(pack('<Bc', 1, '\\'))
|
||||
data.write(pack('<Bc', 1, b'\\'))
|
||||
offset = 0
|
||||
for state in states:
|
||||
items = sorted(manifest[state])
|
||||
items = sorted(manifest[state], key=attrgetter('sort_key'))
|
||||
data.write(pack('<I', len(items)))
|
||||
for item in items:
|
||||
id, media_type = item.id, item.media_type
|
||||
@ -528,7 +528,7 @@ class LitWriter(object):
|
||||
codepoint_to_chr(len(media_type)), unicode_type(media_type)]
|
||||
for value in entry:
|
||||
data.write(value.encode('utf-8'))
|
||||
data.write('\0')
|
||||
data.write(b'\0')
|
||||
offset += item.size
|
||||
self._add_file('/manifest', data.getvalue())
|
||||
|
||||
@ -572,7 +572,7 @@ class LitWriter(object):
|
||||
_, meta = self._oeb.to_opf1()[OPF_MIME]
|
||||
meta.attrib['ms--minimum_level'] = '0'
|
||||
meta.attrib['ms--attr5'] = '1'
|
||||
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
||||
meta.attrib['ms--guid'] = '{%s}' % native_string_type(uuid.uuid4()).upper()
|
||||
rebin = ReBinary(meta, None, self._oeb, self.opts, map=OPF_MAP)
|
||||
meta = rebin.content
|
||||
self._meta = meta
|
||||
@ -583,9 +583,9 @@ class LitWriter(object):
|
||||
self._add_file('/DRMStorage/DRMSource', drmsource)
|
||||
tempkey = self._calculate_deskey([self._meta, drmsource])
|
||||
msdes.deskey(tempkey, msdes.EN0)
|
||||
self._add_file('/DRMStorage/DRMSealed', msdes.des("\0" * 16))
|
||||
self._bookkey = '\0' * 8
|
||||
self._add_file('/DRMStorage/ValidationStream', 'MSReader', 3)
|
||||
self._add_file('/DRMStorage/DRMSealed', msdes.des(b"\0" * 16))
|
||||
self._bookkey = b'\0' * 8
|
||||
self._add_file('/DRMStorage/ValidationStream', b'MSReader', 3)
|
||||
|
||||
def _build_version(self):
|
||||
self._add_file('/Version', pack('<HH', 8, 1))
|
||||
@ -598,7 +598,7 @@ class LitWriter(object):
|
||||
for name in names:
|
||||
data.write(pack('<H', len(name)))
|
||||
data.write(name.encode('utf-16-le'))
|
||||
data.write('\0\0')
|
||||
data.write(b'\0\0')
|
||||
self._add_file('::DataSpace/NameList', data.getvalue())
|
||||
|
||||
def _build_storage(self):
|
||||
@ -608,7 +608,7 @@ class LitWriter(object):
|
||||
for secnum, name, transforms in mapping:
|
||||
root = '::DataSpace/Storage/' + name
|
||||
data = self._sections[secnum].getvalue()
|
||||
cdata, sdata, tdata, rdata = '', '', '', ''
|
||||
cdata, sdata, tdata, rdata = b'', b'', b'', b''
|
||||
for guid in transforms:
|
||||
tdata = packguid(guid) + tdata
|
||||
sdata = sdata + pack('<Q', len(data))
|
||||
@ -619,7 +619,7 @@ class LitWriter(object):
|
||||
msdes.deskey(self._bookkey, msdes.EN0)
|
||||
pad = 8 - (len(data) & 0x7)
|
||||
if pad != 8:
|
||||
data = data + ('\0' * pad)
|
||||
data = data + (b'\0' * pad)
|
||||
data = msdes.des(data)
|
||||
elif guid == LZXCOMPRESS_GUID:
|
||||
cdata = LZXC_CONTROL + cdata
|
||||
@ -655,17 +655,20 @@ class LitWriter(object):
|
||||
hash = mssha1.new()
|
||||
for data in hashdata:
|
||||
if prepad > 0:
|
||||
data = ("\000" * prepad) + data
|
||||
data = (b"\000" * prepad) + data
|
||||
prepad = 0
|
||||
postpad = 64 - (len(data) % 64)
|
||||
if postpad < 64:
|
||||
data = data + ("\000" * postpad)
|
||||
data = data + (b"\000" * postpad)
|
||||
hash.update(data)
|
||||
digest = hash.digest()
|
||||
key = [0] * 8
|
||||
for i in range(0, len(digest)):
|
||||
key[i % 8] ^= ord(digest[i])
|
||||
return ''.join(chr(x) for x in key)
|
||||
if not isinstance(digest, bytes):
|
||||
digest = digest.encode('ascii')
|
||||
digest = bytearray(digest)
|
||||
key = bytearray(8)
|
||||
for i, k in enumerate(digest):
|
||||
key[i % 8] ^= k
|
||||
return bytes(key)
|
||||
|
||||
def _build_dchunks(self):
|
||||
ddata = []
|
||||
@ -677,11 +680,13 @@ class LitWriter(object):
|
||||
quickref = []
|
||||
name = directory[0].name
|
||||
for entry in directory:
|
||||
en = entry.name.encode('utf-8') if entry.name else entry.name
|
||||
next = ''.join([decint(len(en)), en,
|
||||
en = entry.name
|
||||
if not isinstance(en, bytes):
|
||||
en = en.encode('utf-8')
|
||||
nxt = b''.join([decint(len(en)), en,
|
||||
decint(entry.section), decint(entry.offset),
|
||||
decint(entry.size)])
|
||||
usedlen = dchunk.tell() + len(next) + (len(quickref) * 2) + 52
|
||||
usedlen = dchunk.tell() + len(nxt) + (len(quickref) * 2) + 52
|
||||
if usedlen >= DCHUNK_SIZE:
|
||||
ddata.append((dchunk.getvalue(), quickref, dcount, name))
|
||||
dchunk = io.BytesIO()
|
||||
@ -690,7 +695,7 @@ class LitWriter(object):
|
||||
name = en
|
||||
if (dcount % qrn) == 0:
|
||||
quickref.append(dchunk.tell())
|
||||
dchunk.write(next)
|
||||
dchunk.write(nxt)
|
||||
dcount = dcount + 1
|
||||
ddata.append((dchunk.getvalue(), quickref, dcount, name))
|
||||
cidmax = len(ddata) - 1
|
||||
@ -706,10 +711,10 @@ class LitWriter(object):
|
||||
next = cid + 1 if cid < cidmax else ULL_NEG1
|
||||
rem = DCHUNK_SIZE - (len(content) + 50)
|
||||
pad = rem - (len(quickref) * 2)
|
||||
dchunk.write('AOLL')
|
||||
dchunk.write(b'AOLL')
|
||||
dchunk.write(pack('<IQQQQQ', rem, cid, prev, next, rdcount, 1))
|
||||
dchunk.write(content)
|
||||
dchunk.write('\0' * pad)
|
||||
dchunk.write(b'\0' * pad)
|
||||
for ref in reversed(quickref):
|
||||
dchunk.write(pack('<H', ref))
|
||||
dchunk.write(pack('<H', dcount))
|
||||
@ -723,6 +728,6 @@ class LitWriter(object):
|
||||
if ichunk:
|
||||
rem = DCHUNK_SIZE - (ichunk.tell() + 16)
|
||||
pad = rem - 2
|
||||
ichunk = ''.join(['AOLI', pack('<IQ', rem, len(dchunks)),
|
||||
ichunk.getvalue(), ('\0' * pad), pack('<H', len(dchunks))])
|
||||
ichunk = b''.join([b'AOLI', pack('<IQ', rem, len(dchunks)),
|
||||
ichunk.getvalue(), (b'\0' * pad), pack('<H', len(dchunks))])
|
||||
return dcounts, dchunks, ichunk
|
||||
|
Loading…
x
Reference in New Issue
Block a user