Fix #913 (lit2lrf conversion error)

2025-07-09 03:04:10 -04:00 · 2008-07-25 21:56:46 -07:00 · 2008-07-25 21:56:46 -07:00 · 80c665544c
commit 80c665544c
parent 4f4af05fa2 41e7844b8b
2 changed files with 15 additions and 11 deletions
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -93,9 +93,15 @@ def read_utf8_char(bytes, pos):
            c = (c << 6) | (b & 0x3F)
    return unichr(c), pos+elsize
-def consume_utf8_length(bytes):
+def consume_sized_utf8_string(bytes, zpad=False):
-    char, elsize = read_utf8_char(bytes, 0)
+    result = []
-    return ord(char), bytes[elsize:]
+    slen, pos = read_utf8_char(bytes, 0)
    for i in xrange(ord(slen)):
        char, pos = read_utf8_char(bytes, pos)
        result.append(char)
    if zpad and bytes[pos] == '\000':
        pos += 1
    return u''.join(result), bytes[pos:]
 class UnBinary(object):
    AMPERSAND_RE = re.compile(
@ -519,7 +525,7 @@ class LitReader(object):
                    raise LitError('Directory entry had 64bit name length.')
                if namelen > remaining - 3:
                    raise LitError('Read past end of directory chunk')
-                name, chunk = chunk[:namelen], chunk[namelen:]
+                name, chunk = chunk[:namelen].decode('utf-8'), chunk[namelen:]
                section, chunk, remaining = encint(chunk, remaining)
                offset, chunk, remaining = encint(chunk, remaining)
                size, chunk, remaining = encint(chunk, remaining)
@ -564,12 +570,10 @@ class LitReader(object):
                    if len(raw) < 5:
                        raise LitError('Truncated manifest')
                    offset, raw = u32(raw), raw[4:]
-                    slen, raw = consume_utf8_length(raw)
+                    internal, raw = consume_sized_utf8_string(raw)
-                    internal, raw = raw[:slen].decode('utf8'), raw[slen:]
+                    original, raw = consume_sized_utf8_string(raw)
-                    slen, raw = consume_utf8_length(raw)
+                    # Is this last one UTF-8 or ASCIIZ?
-                    original, raw = raw[:slen].decode('utf8'), raw[slen:]
+                    mime_type, raw = consume_sized_utf8_string(raw, zpad=True)
                    slen, raw = consume_utf8_length(raw)
                    mime_type, raw = raw[:slen].decode('utf8'), raw[slen+1:]
                    self.manifest[internal] = ManifestItem(
                        original, internal, mime_type, offset, root, state)
        # Remove any common path elements
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -65,7 +65,7 @@ class Resource(object):
            else:
                pc = url[2]
                if isinstance(pc, unicode):
-                    pc.encode('utf-8')
+                    pc = pc.encode('utf-8')
                pc = unquote(pc).decode('utf-8')
                self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
                self.fragment = unquote(url[-1])