Re-integrate changes from home

2025-08-30 23:00:21 -04:00 · 2008-07-16 10:46:36 -04:00 · 2008-07-16 10:46:36 -04:00 · 76b3759947
commit 76b3759947
parent acac7bb0d1 615d5ea279
6 changed files with 1353 additions and 1039 deletions
--- a/src/calibre/ebooks/lit/maps/init.py
+++ b/src/calibre/ebooks/lit/maps/init.py
@ -1,5 +1,2 @@
-import calibre.ebooks.maps.opf as opf
+from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
-import calibre.ebooks.maps.html as html
+from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
 OPF_MAP = opf.MAP
 HTML_MAP = html.MAP
--- a/src/calibre/ebooks/lit/maps/html.py
+++ b/src/calibre/ebooks/lit/maps/html.py
--- a/src/calibre/ebooks/lit/maps/opf.py
+++ b/src/calibre/ebooks/lit/maps/opf.py
@ -1,28 +1,3 @@
 ATTRS = {
    0x0001 => "href",   
    0x0002 => "%never-used",
    0x0003 => "%guid",
    0x0004 => "%minimum_level",
    0x0005 => "%attr5",
    0x0006 => "id",
    0x0007 => "href",
    0x0008 => "media-type",
    0x0009 => "fallback",
    0x000A => "idref",
    0x000B => "xmlns:dc",
    0x000C => "xmlns:oebpackage",
    0x000D => "role",
    0x000E => "file-as",
    0x000F => "event",
    0x0010 => "scheme",
    0x0011 => "title",
    0x0012 => "type",
    0x0013 => "unique-identifier",
    0x0014 => "name",
    0x0015 => "content",
    0x0016 => "xml:lang",
    }
 TAGS = [
    None,
    "package",
@ -69,6 +44,31 @@ TAGS = [
    None,
   ]
-TAGS_ATTR = [{} for i in xrange(43)]
+ATTRS = {
    0x0001: "href",   
    0x0002: "%never-used",
    0x0003: "%guid",
    0x0004: "%minimum_level",
    0x0005: "%attr5",
    0x0006: "id",
    0x0007: "href",
    0x0008: "media-type",
    0x0009: "fallback",
    0x000A: "idref",
    0x000B: "xmlns:dc",
    0x000C: "xmlns:oebpackage",
    0x000D: "role",
    0x000E: "file-as",
    0x000F: "event",
    0x0010: "scheme",
    0x0011: "title",
    0x0012: "type",
    0x0013: "unique-identifier",
    0x0014: "name",
    0x0015: "content",
    0x0016: "xml:lang",
    }
-MAP = (TAGS, TAGS_ATTRS, ATTRS0)
+TAGS_ATTRS = [{} for i in xrange(43)]
 MAP = (TAGS, ATTRS, TAGS_ATTRS)
--- a/src/calibre/ebooks/lit/mssha1.py
+++ b/src/calibre/ebooks/lit/mssha1.py
@ -0,0 +1,343 @@
 #!/usr/bin/env python
 # -*- coding: iso-8859-1
 """A sample implementation of SHA-1 in pure Python.
   Framework adapted from Dinu Gherman's MD5 implementation by
   J. Hallén and L. Creighton. SHA-1 implementation based directly on
   the text of the NIST standard FIPS PUB 180-1.
 """
 __date__    = '2004-11-17'
 __version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy
 import struct, copy
 # ======================================================================
 # Bit-Manipulation helpers
 #
 #   _long2bytes() was contributed by Barry Warsaw
 #   and is reused here with tiny modifications.
 # ======================================================================
 def _long2bytesBigEndian(n, blocksize=0):
    """Convert a long integer to a byte string.
    If optional blocksize is given and greater than zero, pad the front
    of the byte string with binary zeros so that the length is a multiple
    of blocksize.
    """
    # After much testing, this algorithm was deemed to be the fastest.
    s = ''
    pack = struct.pack
    while n > 0:
        s = pack('>I', n & 0xffffffffL) + s
        n = n >> 32
    # Strip off leading zeros.
    for i in range(len(s)):
        if s[i] != '\000':
            break
    else:
        # Only happens when n == 0.
        s = '\000'
        i = 0
    s = s[i:]
    # Add back some pad bytes. This could be done more efficiently
    # w.r.t. the de-padding being done above, but sigh...
    if blocksize > 0 and len(s) % blocksize:
        s = (blocksize - len(s) % blocksize) * '\000' + s
    return s
 def _bytelist2longBigEndian(list):
    "Transform a list of characters into a list of longs."
    imax = len(list)/4
    hl = [0L] * imax
    j = 0
    i = 0
    while i < imax:
        b0 = long(ord(list[j])) << 24
        b1 = long(ord(list[j+1])) << 16
        b2 = long(ord(list[j+2])) << 8
        b3 = long(ord(list[j+3]))
        hl[i] = b0 | b1 | b2 | b3
        i = i+1
        j = j+4
    return hl
 def _rotateLeft(x, n):
    "Rotate x (32 bit) left n bits circularly."
    return (x << n) | (x >> (32-n))
 # ======================================================================
 # The SHA transformation functions
 #
 # ======================================================================
 def f0_19(B, C, D):
    return (B & (C ^ D)) ^ D
 def f20_39(B, C, D):
    return B ^ C ^ D
 def f40_59(B, C, D):
    return ((B | C) & D) | (B & C)
 def f60_79(B, C, D):
    return B ^ C ^ D
 def f6_42(B, C, D):
    return (B + C) ^ C
 f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
 f[3] = f20_39
 f[6] = f6_42
 f[10] = f20_39
 f[15] = f20_39
 f[26] = f0_19
 f[31] = f40_59
 f[42] = f6_42
 f[51] = f20_39
 f[68] = f0_19
 # Constants to be used
 K = [
    0x5A827999L, # ( 0 <= t <= 19)
    0x6ED9EBA1L, # (20 <= t <= 39)
    0x8F1BBCDCL, # (40 <= t <= 59)
    0xCA62C1D6L  # (60 <= t <= 79)
    ]
 class sha:
    "An implementation of the MD5 hash function in pure Python."
    def __init__(self):
        "Initialisation."
        # Initial message length in bits(!).
        self.length = 0L
        self.count = [0, 0]
        # Initial empty message as a sequence of bytes (8 bit characters).
        self.input = []
        # Call a separate init function, that can be used repeatedly
        # to start from scratch on the same object.
        self.init()
    def init(self):
        "Initialize the message-digest and set all fields to zero."
        self.length = 0L
        self.input = []
        # Initial 160 bit message digest (5 times 32 bit).
        self.H0 = 0x32107654L
        self.H1 = 0x23016745L
        self.H2 = 0xC4E680A2L
        self.H3 = 0xDC679823L
        self.H4 = 0xD0857A34L
    def _transform(self, W):
        for t in range(16, 80):
            W.append(_rotateLeft(
                W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL)
        A = self.H0
        B = self.H1
        C = self.H2
        D = self.H3
        E = self.H4
        for t in xrange(0, 80):
            TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
            E = D
            D = C
            C = _rotateLeft(B, 30) & 0xffffffffL
            B = A
            A = TEMP & 0xffffffffL
        self.H0 = (self.H0 + A) & 0xffffffffL
        self.H1 = (self.H1 + B) & 0xffffffffL
        self.H2 = (self.H2 + C) & 0xffffffffL
        self.H3 = (self.H3 + D) & 0xffffffffL
        self.H4 = (self.H4 + E) & 0xffffffffL
    # Down from here all methods follow the Python Standard Library
    # API of the sha module.
    def update(self, inBuf):
        """Add to the current message.
        Update the sha object with the string arg. Repeated calls
        are equivalent to a single call with the concatenation of all
        the arguments, i.e. s.update(a); s.update(b) is equivalent
        to s.update(a+b).
        The hash is immediately calculated for all full blocks. The final
        calculation is made in digest(). It will calculate 1-2 blocks,
        depending on how much padding we have to add. This allows us to
        keep an intermediate value for the hash, so that we only need to
        make minimal recalculation if we call update() to add more data
        to the hashed string.
        """
        leninBuf = long(len(inBuf))
        # Compute number of bytes mod 64.
        index = (self.count[1] >> 3) & 0x3FL
        # Update number of bits.
        self.count[1] = self.count[1] + (leninBuf << 3)
        if self.count[1] < (leninBuf << 3):
            self.count[0] = self.count[0] + 1
        self.count[0] = self.count[0] + (leninBuf >> 29)
        partLen = 64 - index
        if leninBuf >= partLen:
            self.input[index:] = list(inBuf[:partLen])
            self._transform(_bytelist2longBigEndian(self.input))
            i = partLen
            while i + 63 < leninBuf:
                self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
                i = i + 64
            else:
                self.input = list(inBuf[i:leninBuf])
        else:
            i = 0
            self.input = self.input + list(inBuf)
    def digest(self):
        """Terminate the message-digest computation and return digest.
        Return the digest of the strings passed to the update()
        method so far. This is a 16-byte string which may contain
        non-ASCII characters, including null bytes.
        """
        H0 = self.H0
        H1 = self.H1
        H2 = self.H2
        H3 = self.H3
        H4 = self.H4
        input = [] + self.input
        count = [] + self.count
        index = (self.count[1] >> 3) & 0x3fL
        if index < 56:
            padLen = 56 - index
        else:
            padLen = 120 - index
        padding = ['\200'] + ['\000'] * 63
        self.update(padding[:padLen])
        # Append length (before padding).
        bits = _bytelist2longBigEndian(self.input[:56]) + count
        self._transform(bits)
        # Store state in digest.
        digest = _long2bytesBigEndian(self.H0, 4) + \
                 _long2bytesBigEndian(self.H1, 4) + \
                 _long2bytesBigEndian(self.H2, 4) + \
                 _long2bytesBigEndian(self.H3, 4) + \
                 _long2bytesBigEndian(self.H4, 4)
        self.H0 = H0 
        self.H1 = H1 
        self.H2 = H2
        self.H3 = H3
        self.H4 = H4
        self.input = input 
        self.count = count 
        return digest
    def hexdigest(self):
        """Terminate and return digest in HEX form.
        Like digest() except the digest is returned as a string of
        length 32, containing only hexadecimal digits. This may be
        used to exchange the value safely in email or other non-
        binary environments.
        """
        return ''.join(['%02x' % ord(c) for c in self.digest()])
    def copy(self):
        """Return a clone object.
        Return a copy ('clone') of the md5 object. This can be used
        to efficiently compute the digests of strings that share
        a common initial substring.
        """
        return copy.deepcopy(self)
 # ======================================================================
 # Mimic Python top-level functions from standard library API
 # for consistency with the md5 module of the standard library.
 # ======================================================================
 # These are mandatory variables in the module. They have constant values
 # in the SHA standard.
 digest_size = digestsize = 20
 blocksize = 1
 def new(arg=None):
    """Return a new sha crypto object.
    If arg is present, the method call update(arg) is made.
    """
    crypto = sha()
    if arg:
        crypto.update(arg)
    return crypto
 if __name__ == '__main__':
    def main():
        import sys
        file = None
        if len(sys.argv) > 2:
            print "usage: %s [FILE]" % sys.argv[0]
            return
        elif len(sys.argv) < 2:
            file = sys.stdin
        else:
            file = open(sys.argv[1], 'rb')
        context = new()
        data = file.read(16384)
        while data:
            context.update(data)
            data = file.read(16384)
        file.close()
        digest = context.hexdigest().upper()
        for i in xrange(0, 40, 8):
            print digest[i:i+8],
        print
    main()
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -5,6 +5,7 @@ Support for reading the metadata from a lit file.
 '''
 import sys, struct, cStringIO, os
 import functools
 from itertools import repeat
 from calibre import relpath
@ -13,6 +14,31 @@ from calibre.ebooks.metadata.opf import OPFReader
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
 <!DOCTYPE package 
  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
 """
 XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
 <!DOCTYPE html PUBLIC
 "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
 "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
 """
 class DirectoryEntry(object):
    def __init__(self, name, section, offset, size):
        self.name = name
        self.section = section
        self.offset = offset
        self.size = size
    def __repr__(self):
        return "<DirectoryEntry name='%s' section='%d' offset='%d' size='%d'>" \
            % (self.name, self.section, self.offset, self.size)
    def __str__(self):
        return repr(self)
 def u32(bytes):
    return struct.unpack('<L', bytes[:4])[0]
@ -67,7 +93,7 @@ XML_ENTITIES   = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
 class UnBinary(object):
    def __init__(self, bin, manifest, map=OPF_MAP):
        self.manifest = manifest
-        self.attr_map, self.tag_map, self.tag_to_attr_map = map
+        self.tag_map, self.attr_map, self.tag_to_attr_map = map
        self.opf = map is OPF_MAP
        self.bin = bin
        self.buf = cStringIO.StringIO()
@ -104,7 +130,7 @@ class UnBinary(object):
    def binary_to_text(self, base=0, depth=0):
        tag_name = current_map = None
        dynamic_tag = errors = 0
-        in_censorship = False
+        in_censorship = is_goingdown = False
        state = 'text'
        index =  base
        flags = 0
@ -136,7 +162,7 @@ class UnBinary(object):
                    tag = oc
                    self.buf.write('<')
                    if not (flags & FLAG_CLOSING):
-                        is_goingdown = 1
+                        is_goingdown = True
                    if tag == 0x8000:
                        state = 'get custom length'
                        continue
@ -167,7 +193,7 @@ class UnBinary(object):
                    else:
                        self.buf.write('>')
                        index = self.binary_to_text(base=index, depth=depth+1)
-                        is_goingdown = 0
+                        is_goingdown = False
                        if not tag_name:
                            raise LitError('Tag ends before it begins.')
                        self.buf.write('</'+tag_name+'>')
@ -222,7 +248,7 @@ class UnBinary(object):
                    if not in_censorship:
                        self.buf.write(c)
                    count -= 1
-                elif count == 0:
+                if count == 0:
                    if not in_censorship:
                        self.buf.write('"')
                    in_censorship = False
@ -268,7 +294,7 @@ class UnBinary(object):
                href += c
                count -= 1
                if count == 0:
-                    doc, m, frag = href.partition('#')
+                    doc, m, frag = href[1:].partition('#')
                    path = self.item_path(doc)
                    if m and frag:
                        path += m + frag
@ -297,100 +323,74 @@ class ManifestItem(object):
    def __repr__(self):
        return self.internal + u'->' + self.path 
 def preserve(function):
    def wrapper(self, *args, **kwargs):
        opos = self._stream.tell()
        try:
            return function(self, *args, **kwargs)
        finally:
            self._stream.seek(opos)
    functools.update_wrapper(wrapper, function)
    return wrapper
 class LitFile(object):
    PIECE_SIZE = 16
    def magic():
        @preserve
        def fget(self):
-            val = None
+            self._stream.seek(0)
-            opos = self._stream.tell()
+            return self._stream.read(8)
            try:
                self._stream.seek(0)
                val = self._stream.read(8)
            finally:
                self._stream.seek(opos)
            return val
        return property(fget=fget)
    magic = magic()
    def version():
        def fget(self):
-            val = None
+            self._stream.seek(8)
-            opos = self._stream.tell()
+            return u32(self._stream.read(4))
            try:
                self._stream.seek(8)
                val = u32(self._stream.read(4))
            finally:
                self._stream.seek(opos)
            return val
        return property(fget=fget)
    version = version()
    def hdr_len():
        @preserve
        def fget(self):
-            val = None
+            self._stream.seek(12)
-            opos = self._stream.tell()
+            return int32(self._stream.read(4))
            try:
                self._stream.seek(12)
                val = int32(self._stream.read(4))
            finally:
                self._stream.seek(opos)
            return val
        return property(fget=fget)
    hdr_len = hdr_len()
    def num_pieces():
        @preserve
        def fget(self):
-            val = None
+            self._stream.seek(16)
-            opos = self._stream.tell()
+            return int32(self._stream.read(4))
            try:
                self._stream.seek(16)
                val = int32(self._stream.read(4))
            finally:
                self._stream.seek(opos)
            return val
        return property(fget=fget)
    num_pieces = num_pieces()
    def sec_hdr_len():
        @preserve
        def fget(self):
-            val = None
+            self._stream.seek(20)
-            opos = self._stream.tell()
+            return int32(self._stream.read(4))
            try:
                self._stream.seek(20)
                val = int32(self._stream.read(4))
            finally:
                self._stream.seek(opos)
            return val
        return property(fget=fget)
    sec_hdr_len = sec_hdr_len()
    def guid():
        @preserve
        def fget(self):
-            val = None
+            self._stream.seek(24)
-            opos = self._stream.tell()
+            return self._stream.read(16)
            try:
                self._stream.seek(24)
                val = self._stream.read(16)
            finally:
                self._stream.seek(opos)
            return val
        return property(fget=fget)
    guid = guid()
    def header():
        @preserve
        def fget(self):
-            val = None
+            size = self.hdr_len \
-            opos = self._stream.tell()
+                + (self.num_pieces * self.PIECE_SIZE) \
-            try:
+                + self.sec_hdr_len
-                size = self.hdr_len \
+            self._stream.seek(0)
-                    + (self.num_pieces * self.PIECE_SIZE) \
+            return self._stream.read(size)
                    + self.sec_hdr_len
                self._stream.seek(0)
                val = self._stream.read(size)
            finally:
                self._stream.seek(opos)
            return val
        return property(fget=fget)
    header = header()        
@ -402,70 +402,64 @@ class LitFile(object):
            raise LitError('Unknown LIT version %d'%(self.version,))
        self.read_secondary_header()
        self.read_header_pieces()
    def read_secondary_header(self):
        opos = self._stream.tell()
        try:
            self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
            bytes = self._stream.read(self.sec_hdr_len)
            offset = int32(bytes[4:])
            while offset < len(bytes):
                blocktype = bytes[offset:offset+4]
                blockver  = u32(bytes[offset+4:])
                if blocktype == 'CAOL':
                    if blockver != 2:
                        raise LitError(
                            'Unknown CAOL block format %d' % blockver)
                    self.creator_id     = u32(bytes[offset+12:])
                    self.entry_chunklen = u32(bytes[offset+20:])
                    self.count_chunklen = u32(bytes[offset+24:])
                    self.entry_unknown  = u32(bytes[offset+28:])
                    self.count_unknown  = u32(bytes[offset+32:])
                    offset += 48
                elif blocktype == 'ITSF':
                    if blockver != 4:
                        raise LitError(
                            'Unknown ITSF block format %d' % blockver)
                    if u32(bytes[offset+4+16:]):
                        raise LitError('This file has a 64bit content offset')
                    self.content_offset = u32(bytes[offset+16:])
                    self.timestamp      = u32(bytes[offset+24:]) 
                    self.language_id    = u32(bytes[offset+28:])
                    offset += 48
            if not hasattr(self, 'content_offset'):
                raise LitError('Could not figure out the content offset')
        finally:
            self._stream.seek(opos)
    @preserve
    def read_secondary_header(self):
        self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
        bytes = self._stream.read(self.sec_hdr_len)
        offset = int32(bytes[4:])
        while offset < len(bytes):
            blocktype = bytes[offset:offset+4]
            blockver  = u32(bytes[offset+4:])
            if blocktype == 'CAOL':
                if blockver != 2:
                    raise LitError(
                        'Unknown CAOL block format %d' % blockver)
                self.creator_id     = u32(bytes[offset+12:])
                self.entry_chunklen = u32(bytes[offset+20:])
                self.count_chunklen = u32(bytes[offset+24:])
                self.entry_unknown  = u32(bytes[offset+28:])
                self.count_unknown  = u32(bytes[offset+32:])
                offset += 48
            elif blocktype == 'ITSF':
                if blockver != 4:
                    raise LitError(
                        'Unknown ITSF block format %d' % blockver)
                if u32(bytes[offset+4+16:]):
                    raise LitError('This file has a 64bit content offset')
                self.content_offset = u32(bytes[offset+16:])
                self.timestamp      = u32(bytes[offset+24:]) 
                self.language_id    = u32(bytes[offset+28:])
                offset += 48
        if not hasattr(self, 'content_offset'):
            raise LitError('Could not figure out the content offset')
    @preserve
    def read_header_pieces(self):
-        opos = self._stream.tell()
+        src = self.header[self.hdr_len:]
-        try:
+        for i in range(self.num_pieces):
-            src = self.header[self.hdr_len:]
+            piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
-            for i in range(self.num_pieces):
+            if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
-                piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
+                raise LitError('Piece %s has 64bit value' % repr(piece))
-                if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
+            offset, size = u32(piece), int32(piece[8:])
-                    raise LitError('Piece %s has 64bit value' % repr(piece))
+            self._stream.seek(offset)
-                offset, size = u32(piece), int32(piece[8:])
+            piece = self._stream.read(size)
-                self._stream.seek(offset)
+            if i == 0:
-                piece = self._stream.read(size)
+                continue # Dont need this piece
-                if i == 0:
+            elif i == 1:
-                    continue # Dont need this piece
+                if u32(piece[8:])  != self.entry_chunklen or \
-                elif i == 1:
+                   u32(piece[12:]) != self.entry_unknown:
-                    if u32(piece[8:])  != self.entry_chunklen or \
+                    raise LitError('Secondary header does not match piece')
-                       u32(piece[12:]) != self.entry_unknown:
+                self.read_directory(piece)
-                        raise LitError('Secondary header does not match piece')
+            elif i == 2:
-                    self.read_directory(piece)
+                if u32(piece[8:])  != self.count_chunklen or \
-                elif i == 2:
+                   u32(piece[12:]) != self.count_unknown:
-                    if u32(piece[8:])  != self.count_chunklen or \
+                    raise LitError('Secondary header does not match piece')
-                       u32(piece[12:]) != self.count_unknown:
+                continue # No data needed from this piece
-                        raise LitError('Secondary header does not match piece')
+            elif i == 3:
-                    continue # No data needed from this piece
+                self.piece3_guid = piece
-                elif i == 3:
+            elif i == 4:
-                    self.piece3_guid = piece
+                self.piece4_guid = piece
                elif i == 4:
                    self.piece4_guid = piece
        finally:
            self._stream.seek(opos)
    def read_directory(self, piece):
        self.entries = []
@ -521,108 +515,88 @@ class LitFile(object):
            if not hasattr(self, 'manifest'):
                raise LitError('Lit file does not have a valid manifest')
    def read_section_names(self, entry):
        opos = self._stream.tell()
        try:
            self._stream.seek(self.content_offset + entry.offset)
            raw = self._stream.read(entry.size)
            if len(raw) < 4:
                raise LitError('Invalid Namelist section')
            pos = 4
            self.num_sections = u16(raw[2:pos])
            self.sections = {}
            for section in range(self.num_sections):
                size = u16(raw[pos:pos+2])
                pos += 2
                size = size*2 + 2
                if pos + size > len(raw):
                    raise LitError('Invalid Namelist section')
                self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
                pos += size                
        finally:
            self._stream.seek(opos)
    def read_manifest(self, entry):
        opos = self._stream.tell()
        try:
            self.manifest = []
            self._stream.seek(self.content_offset + entry.offset)
            raw = self._stream.read(entry.size)
            pos = 0
            while pos < len(raw):
                size = ord(raw[pos])
                if size == 0: break
                pos += 1
                root = raw[pos:pos+size].decode('utf8')
                pos += size
                if pos >= len(raw):
                    raise LitError('Truncated manifest.')
                for state in ['spine', 'not spine', 'css', 'images']:
                    num_files = int32(raw[pos:pos+4])
                    pos += 4
                    if num_files == 0: continue
                    i = 0
                    while i < num_files:
                        if pos+5 >= len(raw):
                            raise LitError('Truncated manifest.')
                        offset = u32(raw[pos:pos+4])
                        pos += 4
                        slen = ord(raw[pos])
                        pos += 1
                        internal = raw[pos:pos+slen].decode('utf8')
                        pos += slen
                        slen = ord(raw[pos])
                        pos += 1
                        original = raw[pos:pos+slen].decode('utf8')
                        pos += slen
                        slen = ord(raw[pos])
                        pos += 1
                        mime_type = raw[pos:pos+slen].decode('utf8')
                        pos += slen + 1
                        self.manifest.append(
                            ManifestItem(original, internal, mime_type,
                                         offset, root, state))
                        i += 1
        finally:
            self._stream.seek(opos)        
    def read_meta(self, entry):
        opos = self._stream.tell()
        try:
            self._stream.seek(self.content_offset + entry.offset)
            raw = self._stream.read(entry.size)
-            xml = \
+    @preserve
-'''\
+    def read_section_names(self, entry):
-<?xml version="1.0" encoding="UTF-8" ?>
+        self._stream.seek(self.content_offset + entry.offset)
-<!DOCTYPE package
+        raw = self._stream.read(entry.size)
-  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
+        if len(raw) < 4:
-  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
+            raise LitError('Invalid Namelist section')
-'''+\
+        pos = 4
-                unicode(UnBinary(raw, self.manifest))
+        self.num_sections = u16(raw[2:pos])
-            self.meta = xml
+        
-        finally:
+        self.sections = {}
-            self._stream.seek(opos)
+        for section in range(self.num_sections):
-            
+            size = u16(raw[pos:pos+2])
            pos += 2
            size = size*2 + 2
            if pos + size > len(raw):
                raise LitError('Invalid Namelist section')
            self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
            pos += size                
    @preserve
    def read_manifest(self, entry):
        self.manifest = []
        self._stream.seek(self.content_offset + entry.offset)
        raw = self._stream.read(entry.size)
        pos = 0
        while pos < len(raw):
            size = ord(raw[pos])
            if size == 0: break
            pos += 1
            root = raw[pos:pos+size].decode('utf8')
            pos += size
            if pos >= len(raw):
                raise LitError('Truncated manifest.')
            for state in ['spine', 'not spine', 'css', 'images']:
                num_files = int32(raw[pos:pos+4])
                pos += 4
                if num_files == 0: continue
                i = 0
                while i < num_files:
                    if pos+5 >= len(raw):
                        raise LitError('Truncated manifest.')
                    offset = u32(raw[pos:pos+4])
                    pos += 4
                    slen = ord(raw[pos])
                    pos += 1
                    internal = raw[pos:pos+slen].decode('utf8')
                    pos += slen
                    slen = ord(raw[pos])
                    pos += 1
                    original = raw[pos:pos+slen].decode('utf8')
                    pos += slen
                    slen = ord(raw[pos])
                    pos += 1
                    mime_type = raw[pos:pos+slen].decode('utf8')
                    pos += slen + 1
                    self.manifest.append(
                        ManifestItem(original, internal, mime_type,
                                     offset, root, state))
                    i += 1
    @preserve
    def read_meta(self, entry):
        self._stream.seek(self.content_offset + entry.offset)
        raw = self._stream.read(entry.size)
        xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
        self.meta = xml
    @preserve
    def read_image(self, internal_name):
        cover_entry = None
        for entry in self.entries:
            if internal_name in entry.name:
                cover_entry = entry
                break
-        opos = self._stream.tell()
+        self._stream.seek(self.content_offset + cover_entry.offset)
-        try:
+        return self._stream.read(cover_entry.size)
            self._stream.seek(self.content_offset + cover_entry.offset)
            return self._stream.read(cover_entry.size)
        finally:
            self._stream.seek(opos)
 def get_metadata(stream):
    try:
--- a/src/calibre/ebooks/metadata/opf.py
+++ b/src/calibre/ebooks/metadata/opf.py
@ -237,7 +237,7 @@ class OPF(MetaInformation):
    def get_title(self):
        title = self.soup.package.metadata.find('dc:title')
-        if title and title.string:
+        if title:
            return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip()
        return self.default_title.strip()