Fix #913 (lit2lrf conversion error)

This commit is contained in:
Kovid Goyal 2008-07-25 21:56:46 -07:00
commit 80c665544c
2 changed files with 15 additions and 11 deletions

View File

@ -93,9 +93,15 @@ def read_utf8_char(bytes, pos):
c = (c << 6) | (b & 0x3F) c = (c << 6) | (b & 0x3F)
return unichr(c), pos+elsize return unichr(c), pos+elsize
def consume_utf8_length(bytes): def consume_sized_utf8_string(bytes, zpad=False):
char, elsize = read_utf8_char(bytes, 0) result = []
return ord(char), bytes[elsize:] slen, pos = read_utf8_char(bytes, 0)
for i in xrange(ord(slen)):
char, pos = read_utf8_char(bytes, pos)
result.append(char)
if zpad and bytes[pos] == '\000':
pos += 1
return u''.join(result), bytes[pos:]
class UnBinary(object): class UnBinary(object):
AMPERSAND_RE = re.compile( AMPERSAND_RE = re.compile(
@ -519,7 +525,7 @@ class LitReader(object):
raise LitError('Directory entry had 64bit name length.') raise LitError('Directory entry had 64bit name length.')
if namelen > remaining - 3: if namelen > remaining - 3:
raise LitError('Read past end of directory chunk') raise LitError('Read past end of directory chunk')
name, chunk = chunk[:namelen], chunk[namelen:] name, chunk = chunk[:namelen].decode('utf-8'), chunk[namelen:]
section, chunk, remaining = encint(chunk, remaining) section, chunk, remaining = encint(chunk, remaining)
offset, chunk, remaining = encint(chunk, remaining) offset, chunk, remaining = encint(chunk, remaining)
size, chunk, remaining = encint(chunk, remaining) size, chunk, remaining = encint(chunk, remaining)
@ -564,12 +570,10 @@ class LitReader(object):
if len(raw) < 5: if len(raw) < 5:
raise LitError('Truncated manifest') raise LitError('Truncated manifest')
offset, raw = u32(raw), raw[4:] offset, raw = u32(raw), raw[4:]
slen, raw = consume_utf8_length(raw) internal, raw = consume_sized_utf8_string(raw)
internal, raw = raw[:slen].decode('utf8'), raw[slen:] original, raw = consume_sized_utf8_string(raw)
slen, raw = consume_utf8_length(raw) # Is this last one UTF-8 or ASCIIZ?
original, raw = raw[:slen].decode('utf8'), raw[slen:] mime_type, raw = consume_sized_utf8_string(raw, zpad=True)
slen, raw = consume_utf8_length(raw)
mime_type, raw = raw[:slen].decode('utf8'), raw[slen+1:]
self.manifest[internal] = ManifestItem( self.manifest[internal] = ManifestItem(
original, internal, mime_type, offset, root, state) original, internal, mime_type, offset, root, state)
# Remove any common path elements # Remove any common path elements

View File

@ -65,7 +65,7 @@ class Resource(object):
else: else:
pc = url[2] pc = url[2]
if isinstance(pc, unicode): if isinstance(pc, unicode):
pc.encode('utf-8') pc = pc.encode('utf-8')
pc = unquote(pc).decode('utf-8') pc = unquote(pc).decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep))) self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
self.fragment = unquote(url[-1]) self.fragment = unquote(url[-1])