diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index a2ec0e093b..6e54d8d2bf 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -569,7 +569,7 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252', if encoding is None or num > 255: return check(my_unichr(num)) try: - return check(chr(num).decode(encoding)) + return check(bytes(bytearray((num,))).decode(encoding)) except UnicodeDecodeError: return check(my_unichr(num)) from calibre.ebooks.html_entities import html5_entities diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 4cef2e9083..b4d0d7c373 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -1,8 +1,7 @@ +from __future__ import absolute_import, division, print_function, unicode_literals ''' Support for reading LIT files. ''' -from __future__ import with_statement -from __future__ import print_function __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' \ @@ -18,8 +17,9 @@ import calibre.ebooks.lit.mssha1 as mssha1 from calibre.ebooks.oeb.base import urlnormalize, xpath from calibre.ebooks.oeb.reader import OEBReader from calibre.ebooks import DRMError +from calibre.constants import ispy3 from calibre import plugins -from polyglot.builtins import codepoint_to_chr, unicode_type, string_or_bytes, range +from polyglot.builtins import codepoint_to_chr, unicode_type, string_or_bytes, range, itervalues from polyglot.urllib import unquote as urlunquote, urldefrag lzx, lxzerror = plugins['lzx'] @@ -69,17 +69,18 @@ def int32(bytes): return struct.unpack(' 0: - b = ord(bytes[pos]) + b = ba[pos] pos += 1 remaining -= 1 val <<= 7 val |= (b & 0x7f) if b & 0x80 == 0: break - return val, bytes[pos:], remaining + return val, byts[pos:], remaining def msguid(bytes): @@ -88,7 +89,7 @@ def msguid(bytes): def read_utf8_char(bytes, pos): - c = ord(bytes[pos]) + c = ord(bytes[pos:pos+1]) mask = 0x80 if (c & mask): elsize = 0 @@ -104,7 +105,7 @@ def read_utf8_char(bytes, pos): raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos])) c &= (mask - 1) for i in range(1, elsize): - b = ord(bytes[pos+i]) + b = ord(bytes[pos+i:pos+i+1]) if (b & 0xC0) != 0x80: raise LitError( 'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i])) @@ -118,7 +119,7 @@ def consume_sized_utf8_string(bytes, zpad=False): for i in range(ord(slen)): char, pos = read_utf8_char(bytes, pos) result.append(char) - if zpad and bytes[pos] == '\000': + if zpad and bytes[pos:pos+1] == b'\0': pos += 1 return u''.join(result), bytes[pos:] @@ -129,10 +130,10 @@ def encode(string): class UnBinary(object): AMPERSAND_RE = re.compile( - r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)') - OPEN_ANGLE_RE = re.compile(r'<<(?![!]--)') - CLOSE_ANGLE_RE = re.compile(r'(?