diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py index 6e1e68ffbd..7c82a561de 100644 --- a/src/calibre/ebooks/lit/mssha1.py +++ b/src/calibre/ebooks/lit/mssha1.py @@ -1,9 +1,9 @@ +from __future__ import absolute_import, division, print_function, unicode_literals """ Modified version of SHA-1 used in Microsoft LIT files. Adapted from the PyPy pure-Python SHA-1 implementation. """ -from __future__ import print_function __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' @@ -28,44 +28,36 @@ def _long2bytesBigEndian(n, blocksize=0): """ # After much testing, this algorithm was deemed to be the fastest. - s = '' + s = b'' pack = struct.pack while n > 0: s = pack('>I', n & 0xffffffff) + s n = n >> 32 # Strip off leading zeros. - for i in range(len(s)): - if s[i] != '\000': - break - else: - # Only happens when n == 0. - s = '\000' - i = 0 - - s = s[i:] + s = s.lstrip(b'\0') # Add back some pad bytes. This could be done more efficiently # w.r.t. the de-padding being done above, but sigh... if blocksize > 0 and len(s) % blocksize: - s = (blocksize - len(s) % blocksize) * '\000' + s + s = (blocksize - len(s) % blocksize) * b'\000' + s return s -def _bytelist2longBigEndian(list): +def _bytelist2longBigEndian(blist): "Transform a list of characters into a list of longs." - imax = len(list)/4 + imax = len(blist)//4 hl = [0] * imax j = 0 i = 0 while i < imax: - b0 = long_type(ord(list[j])) << 24 - b1 = long_type(ord(list[j+1])) << 16 - b2 = long_type(ord(list[j+2])) << 8 - b3 = long_type(ord(list[j+3])) + b0 = long_type(blist[j]) << 24 + b1 = long_type(blist[j+1]) << 16 + b2 = long_type(blist[j+2]) << 8 + b3 = long_type(blist[j+3]) hl[i] = b0 | b1 | b2 | b3 i = i+1 j = j+4 @@ -140,7 +132,7 @@ class mssha1(object): self.count = [0, 0] # Initial empty message as a sequence of bytes (8 bit characters). - self.input = [] + self.input = bytearray() # Call a separate init function, that can be used repeatedly # to start from scratch on the same object. @@ -172,7 +164,7 @@ class mssha1(object): E = self.H4 for t in range(0, 80): - TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20] + TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t//20] E = D D = C C = _rotateLeft(B, 30) & 0xffffffff @@ -204,6 +196,7 @@ class mssha1(object): to the hashed string. """ + inBuf = bytearray(inBuf) leninBuf = long_type(len(inBuf)) # Compute number of bytes mod 64. @@ -218,17 +211,17 @@ class mssha1(object): partLen = 64 - index if leninBuf >= partLen: - self.input[index:] = list(inBuf[:partLen]) + self.input[index:] = inBuf[:partLen] self._transform(_bytelist2longBigEndian(self.input)) i = partLen while i + 63 < leninBuf: - self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64]))) + self._transform(_bytelist2longBigEndian(inBuf[i:i+64])) i = i + 64 else: - self.input = list(inBuf[i:leninBuf]) + self.input = inBuf[i:leninBuf] else: i = 0 - self.input = self.input + list(inBuf) + self.input = self.input + inBuf def digest(self): """Terminate the message-digest computation and return digest. @@ -243,7 +236,7 @@ class mssha1(object): H2 = self.H2 H3 = self.H3 H4 = self.H4 - input = [] + self.input + inp = bytearray(self.input) count = [] + self.count index = (self.count[1] >> 3) & 0x3f @@ -253,7 +246,7 @@ class mssha1(object): else: padLen = 120 - index - padding = ['\200'] + ['\000'] * 63 + padding = b'\200' + (b'\000' * 63) self.update(padding[:padLen]) # Append length (before padding). @@ -273,7 +266,7 @@ class mssha1(object): self.H2 = H2 self.H3 = H3 self.H4 = H4 - self.input = input + self.input = inp self.count = count return digest @@ -286,7 +279,7 @@ class mssha1(object): used to exchange the value safely in email or other non- binary environments. """ - return ''.join(['%02x' % ord(c) for c in self.digest()]) + return ''.join(['%02x' % c for c in bytearray(self.digest())]) def copy(self): """Return a clone object. diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py index 490c06b42f..68ff33d692 100644 --- a/src/calibre/ebooks/lit/writer.py +++ b/src/calibre/ebooks/lit/writer.py @@ -1,14 +1,14 @@ +from __future__ import absolute_import, division, print_function, unicode_literals ''' Basic support for writing LIT files. ''' -from __future__ import with_statement -from __future__ import print_function __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' from struct import pack from itertools import count, chain +from operator import attrgetter import io import time import random @@ -30,7 +30,7 @@ import calibre from calibre import plugins msdes, msdeserror = plugins['msdes'] import calibre.ebooks.lit.mssha1 as mssha1 -from polyglot.builtins import codepoint_to_chr, unicode_type, string_or_bytes, range, zip +from polyglot.builtins import codepoint_to_chr, unicode_type, string_or_bytes, range, zip, native_string_type from polyglot.urllib import urldefrag, unquote __all__ = ['LitWriter'] @@ -62,7 +62,7 @@ def invert_tag_map(tag_map): OPF_MAP = invert_tag_map(maps.OPF_MAP) HTML_MAP = invert_tag_map(maps.HTML_MAP) -LIT_MAGIC = 'ITOLITLS' +LIT_MAGIC = b'ITOLITLS' LITFILE_GUID = "{0A9007C1-4076-11D3-8789-0000F8105754}" PIECE3_GUID = "{0A9007C3-4076-11D3-8789-0000F8105754}" @@ -97,24 +97,24 @@ ROOT_OFFSET = 1284508585713721976 ROOT_SIZE = 4165955342166943123 BLOCK_CAOL = \ - "\x43\x41\x4f\x4c\x02\x00\x00\x00" \ - "\x50\x00\x00\x00\x37\x13\x03\x00" \ - "\x00\x00\x00\x00\x00\x20\x00\x00" \ - "\x00\x02\x00\x00\x00\x00\x10\x00" \ - "\x00\x00\x02\x00\x00\x00\x00\x00" \ - "\x00\x00\x00\x00\x00\x00\x00\x00" + b"\x43\x41\x4f\x4c\x02\x00\x00\x00" \ + b"\x50\x00\x00\x00\x37\x13\x03\x00" \ + b"\x00\x00\x00\x00\x00\x20\x00\x00" \ + b"\x00\x02\x00\x00\x00\x00\x10\x00" \ + b"\x00\x00\x02\x00\x00\x00\x00\x00" \ + b"\x00\x00\x00\x00\x00\x00\x00\x00" BLOCK_ITSF = \ - "\x49\x54\x53\x46\x04\x00\x00\x00" \ - "\x20\x00\x00\x00\x01\x00\x00\x00" + b"\x49\x54\x53\x46\x04\x00\x00\x00" \ + b"\x20\x00\x00\x00\x01\x00\x00\x00" MSDES_CONTROL = \ - "\x03\x00\x00\x00\x29\x17\x00\x00" \ - "\x01\x00\x00\x00\xa5\xa5\x00\x00" + b"\x03\x00\x00\x00\x29\x17\x00\x00" \ + b"\x01\x00\x00\x00\xa5\xa5\x00\x00" LZXC_CONTROL = \ - "\x07\x00\x00\x00\x4c\x5a\x58\x43" \ - "\x03\x00\x00\x00\x04\x00\x00\x00" \ - "\x04\x00\x00\x00\x02\x00\x00\x00" \ - "\x00\x00\x00\x00\x00\x00\x00\x00" + b"\x07\x00\x00\x00\x4c\x5a\x58\x43" \ + b"\x03\x00\x00\x00\x04\x00\x00\x00" \ + b"\x04\x00\x00\x00\x02\x00\x00\x00" \ + b"\x00\x00\x00\x00\x00\x00\x00\x00" COLLAPSE = re.compile(r'[ \t\r\n\v]+') @@ -122,16 +122,16 @@ PAGE_BREAKS = {'always', 'left', 'right'} def decint(value): - bytes = [] + ans = bytearray() while True: b = value & 0x7f value >>= 7 - if bytes: + if len(ans): b |= 0x80 - bytes.append(chr(b)) + ans.append(b) if value == 0: break - return ''.join(reversed(bytes)) + return bytes(bytearray(reversed(ans))) def randbytes(n): @@ -366,7 +366,7 @@ class LitWriter(object): self._write(packguid(LITFILE_GUID)) offset = self._tell() pieces = list(range(offset, offset + (PIECE_SIZE * 5), PIECE_SIZE)) - self._write((5 * PIECE_SIZE) * '\0') + self._write((5 * PIECE_SIZE) * b'\0') aoli1 = len(dchunks) if ichunk else ULL_NEG1 last = len(dchunks) - 1 ddepth = 2 if ichunk else 1 @@ -391,7 +391,7 @@ class LitWriter(object): # Piece #1: Directory chunks piece1_offset = self._tell() number = len(dchunks) + ((ichunk and 1) or 0) - self._write('IFCM', pack(' 0: - data = ("\000" * prepad) + data + data = (b"\000" * prepad) + data prepad = 0 postpad = 64 - (len(data) % 64) if postpad < 64: - data = data + ("\000" * postpad) + data = data + (b"\000" * postpad) hash.update(data) digest = hash.digest() - key = [0] * 8 - for i in range(0, len(digest)): - key[i % 8] ^= ord(digest[i]) - return ''.join(chr(x) for x in key) + if not isinstance(digest, bytes): + digest = digest.encode('ascii') + digest = bytearray(digest) + key = bytearray(8) + for i, k in enumerate(digest): + key[i % 8] ^= k + return bytes(key) def _build_dchunks(self): ddata = [] @@ -677,11 +680,13 @@ class LitWriter(object): quickref = [] name = directory[0].name for entry in directory: - en = entry.name.encode('utf-8') if entry.name else entry.name - next = ''.join([decint(len(en)), en, + en = entry.name + if not isinstance(en, bytes): + en = en.encode('utf-8') + nxt = b''.join([decint(len(en)), en, decint(entry.section), decint(entry.offset), decint(entry.size)]) - usedlen = dchunk.tell() + len(next) + (len(quickref) * 2) + 52 + usedlen = dchunk.tell() + len(nxt) + (len(quickref) * 2) + 52 if usedlen >= DCHUNK_SIZE: ddata.append((dchunk.getvalue(), quickref, dcount, name)) dchunk = io.BytesIO() @@ -690,7 +695,7 @@ class LitWriter(object): name = en if (dcount % qrn) == 0: quickref.append(dchunk.tell()) - dchunk.write(next) + dchunk.write(nxt) dcount = dcount + 1 ddata.append((dchunk.getvalue(), quickref, dcount, name)) cidmax = len(ddata) - 1 @@ -706,10 +711,10 @@ class LitWriter(object): next = cid + 1 if cid < cidmax else ULL_NEG1 rem = DCHUNK_SIZE - (len(content) + 50) pad = rem - (len(quickref) * 2) - dchunk.write('AOLL') + dchunk.write(b'AOLL') dchunk.write(pack('