Re-integrate changes from home

2025-08-30 23:00:21 -04:00 · 2008-07-16 10:46:36 -04:00 · 2008-07-16 10:46:36 -04:00 · 76b3759947
commit 76b3759947
parent acac7bb0d1 615d5ea279
6 changed files with 1353 additions and 1039 deletions
--- a/src/calibre/ebooks/lit/maps/init.py
+++ b/src/calibre/ebooks/lit/maps/init.py
@ -1,5 +1,2 @@
-import calibre.ebooks.maps.opf as opf
-import calibre.ebooks.maps.html as html
-
-OPF_MAP = opf.MAP
-HTML_MAP = html.MAP
+from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
+from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
--- a/src/calibre/ebooks/lit/maps/html.py
+++ b/src/calibre/ebooks/lit/maps/html.py
--- a/src/calibre/ebooks/lit/maps/opf.py
+++ b/src/calibre/ebooks/lit/maps/opf.py
@ -1,28 +1,3 @@
-ATTRS = {
-    0x0001 => "href",   
-    0x0002 => "%never-used",
-    0x0003 => "%guid",
-    0x0004 => "%minimum_level",
-    0x0005 => "%attr5",
-    0x0006 => "id",
-    0x0007 => "href",
-    0x0008 => "media-type",
-    0x0009 => "fallback",
-    0x000A => "idref",
-    0x000B => "xmlns:dc",
-    0x000C => "xmlns:oebpackage",
-    0x000D => "role",
-    0x000E => "file-as",
-    0x000F => "event",
-    0x0010 => "scheme",
-    0x0011 => "title",
-    0x0012 => "type",
-    0x0013 => "unique-identifier",
-    0x0014 => "name",
-    0x0015 => "content",
-    0x0016 => "xml:lang",
-    }
-
 TAGS = [
    None,
    "package",
@ -69,6 +44,31 @@ TAGS = [
    None,
   ]

-TAGS_ATTR = [{} for i in xrange(43)]
+ATTRS = {
+    0x0001: "href",   
+    0x0002: "%never-used",
+    0x0003: "%guid",
+    0x0004: "%minimum_level",
+    0x0005: "%attr5",
+    0x0006: "id",
+    0x0007: "href",
+    0x0008: "media-type",
+    0x0009: "fallback",
+    0x000A: "idref",
+    0x000B: "xmlns:dc",
+    0x000C: "xmlns:oebpackage",
+    0x000D: "role",
+    0x000E: "file-as",
+    0x000F: "event",
+    0x0010: "scheme",
+    0x0011: "title",
+    0x0012: "type",
+    0x0013: "unique-identifier",
+    0x0014: "name",
+    0x0015: "content",
+    0x0016: "xml:lang",
+    }

-MAP = (TAGS, TAGS_ATTRS, ATTRS0)
+TAGS_ATTRS = [{} for i in xrange(43)]
+
+MAP = (TAGS, ATTRS, TAGS_ATTRS)
--- a/src/calibre/ebooks/lit/mssha1.py
+++ b/src/calibre/ebooks/lit/mssha1.py
@ -0,0 +1,343 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1
+
+"""A sample implementation of SHA-1 in pure Python.
+
+   Framework adapted from Dinu Gherman's MD5 implementation by
+   J. Hallén and L. Creighton. SHA-1 implementation based directly on
+   the text of the NIST standard FIPS PUB 180-1.
+"""
+
+
+__date__    = '2004-11-17'
+__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy
+
+
+import struct, copy
+
+
+# ======================================================================
+# Bit-Manipulation helpers
+#
+#   _long2bytes() was contributed by Barry Warsaw
+#   and is reused here with tiny modifications.
+# ======================================================================
+
+def _long2bytesBigEndian(n, blocksize=0):
+    """Convert a long integer to a byte string.
+
+    If optional blocksize is given and greater than zero, pad the front
+    of the byte string with binary zeros so that the length is a multiple
+    of blocksize.
+    """
+
+    # After much testing, this algorithm was deemed to be the fastest.
+    s = ''
+    pack = struct.pack
+    while n > 0:
+        s = pack('>I', n & 0xffffffffL) + s
+        n = n >> 32
+
+    # Strip off leading zeros.
+    for i in range(len(s)):
+        if s[i] != '\000':
+            break
+    else:
+        # Only happens when n == 0.
+        s = '\000'
+        i = 0
+
+    s = s[i:]
+
+    # Add back some pad bytes. This could be done more efficiently
+    # w.r.t. the de-padding being done above, but sigh...
+    if blocksize > 0 and len(s) % blocksize:
+        s = (blocksize - len(s) % blocksize) * '\000' + s
+
+    return s
+
+
+def _bytelist2longBigEndian(list):
+    "Transform a list of characters into a list of longs."
+
+    imax = len(list)/4
+    hl = [0L] * imax
+
+    j = 0
+    i = 0
+    while i < imax:
+        b0 = long(ord(list[j])) << 24
+        b1 = long(ord(list[j+1])) << 16
+        b2 = long(ord(list[j+2])) << 8
+        b3 = long(ord(list[j+3]))
+        hl[i] = b0 | b1 | b2 | b3
+        i = i+1
+        j = j+4
+
+    return hl
+
+
+def _rotateLeft(x, n):
+    "Rotate x (32 bit) left n bits circularly."
+
+    return (x << n) | (x >> (32-n))
+
+
+# ======================================================================
+# The SHA transformation functions
+#
+# ======================================================================
+
+def f0_19(B, C, D):
+    return (B & (C ^ D)) ^ D
+
+def f20_39(B, C, D):
+    return B ^ C ^ D
+
+def f40_59(B, C, D):
+    return ((B | C) & D) | (B & C)
+
+def f60_79(B, C, D):
+    return B ^ C ^ D
+
+def f6_42(B, C, D):
+    return (B + C) ^ C
+
+f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
+f[3] = f20_39
+f[6] = f6_42
+f[10] = f20_39
+f[15] = f20_39
+f[26] = f0_19
+f[31] = f40_59
+f[42] = f6_42
+f[51] = f20_39
+f[68] = f0_19
+
+
+# Constants to be used
+K = [
+    0x5A827999L, # ( 0 <= t <= 19)
+    0x6ED9EBA1L, # (20 <= t <= 39)
+    0x8F1BBCDCL, # (40 <= t <= 59)
+    0xCA62C1D6L  # (60 <= t <= 79)
+    ]
+
+class sha:
+    "An implementation of the MD5 hash function in pure Python."
+
+    def __init__(self):
+        "Initialisation."
+        
+        # Initial message length in bits(!).
+        self.length = 0L
+        self.count = [0, 0]
+
+        # Initial empty message as a sequence of bytes (8 bit characters).
+        self.input = []
+
+        # Call a separate init function, that can be used repeatedly
+        # to start from scratch on the same object.
+        self.init()
+
+
+    def init(self):
+        "Initialize the message-digest and set all fields to zero."
+
+        self.length = 0L
+        self.input = []
+
+        # Initial 160 bit message digest (5 times 32 bit).
+        self.H0 = 0x32107654L
+        self.H1 = 0x23016745L
+        self.H2 = 0xC4E680A2L
+        self.H3 = 0xDC679823L
+        self.H4 = 0xD0857A34L
+
+    def _transform(self, W):
+        for t in range(16, 80):
+            W.append(_rotateLeft(
+                W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL)
+
+        A = self.H0
+        B = self.H1
+        C = self.H2
+        D = self.H3
+        E = self.H4
+
+        for t in xrange(0, 80):
+            TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
+            E = D
+            D = C
+            C = _rotateLeft(B, 30) & 0xffffffffL
+            B = A
+            A = TEMP & 0xffffffffL
+            
+        self.H0 = (self.H0 + A) & 0xffffffffL
+        self.H1 = (self.H1 + B) & 0xffffffffL
+        self.H2 = (self.H2 + C) & 0xffffffffL
+        self.H3 = (self.H3 + D) & 0xffffffffL
+        self.H4 = (self.H4 + E) & 0xffffffffL
+    
+
+    # Down from here all methods follow the Python Standard Library
+    # API of the sha module.
+
+    def update(self, inBuf):
+        """Add to the current message.
+
+        Update the sha object with the string arg. Repeated calls
+        are equivalent to a single call with the concatenation of all
+        the arguments, i.e. s.update(a); s.update(b) is equivalent
+        to s.update(a+b).
+
+        The hash is immediately calculated for all full blocks. The final
+        calculation is made in digest(). It will calculate 1-2 blocks,
+        depending on how much padding we have to add. This allows us to
+        keep an intermediate value for the hash, so that we only need to
+        make minimal recalculation if we call update() to add more data
+        to the hashed string.
+        """
+
+        leninBuf = long(len(inBuf))
+
+        # Compute number of bytes mod 64.
+        index = (self.count[1] >> 3) & 0x3FL
+
+        # Update number of bits.
+        self.count[1] = self.count[1] + (leninBuf << 3)
+        if self.count[1] < (leninBuf << 3):
+            self.count[0] = self.count[0] + 1
+        self.count[0] = self.count[0] + (leninBuf >> 29)
+
+        partLen = 64 - index
+
+        if leninBuf >= partLen:
+            self.input[index:] = list(inBuf[:partLen])
+            self._transform(_bytelist2longBigEndian(self.input))
+            i = partLen
+            while i + 63 < leninBuf:
+                self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
+                i = i + 64
+            else:
+                self.input = list(inBuf[i:leninBuf])
+        else:
+            i = 0
+            self.input = self.input + list(inBuf)
+
+
+    def digest(self):
+        """Terminate the message-digest computation and return digest.
+
+        Return the digest of the strings passed to the update()
+        method so far. This is a 16-byte string which may contain
+        non-ASCII characters, including null bytes.
+        """
+
+        H0 = self.H0
+        H1 = self.H1
+        H2 = self.H2
+        H3 = self.H3
+        H4 = self.H4
+        input = [] + self.input
+        count = [] + self.count
+
+        index = (self.count[1] >> 3) & 0x3fL
+
+        if index < 56:
+            padLen = 56 - index
+        else:
+            padLen = 120 - index
+
+        padding = ['\200'] + ['\000'] * 63
+        self.update(padding[:padLen])
+
+        # Append length (before padding).
+        bits = _bytelist2longBigEndian(self.input[:56]) + count
+
+        self._transform(bits)
+
+        # Store state in digest.
+        digest = _long2bytesBigEndian(self.H0, 4) + \
+                 _long2bytesBigEndian(self.H1, 4) + \
+                 _long2bytesBigEndian(self.H2, 4) + \
+                 _long2bytesBigEndian(self.H3, 4) + \
+                 _long2bytesBigEndian(self.H4, 4)
+
+        self.H0 = H0 
+        self.H1 = H1 
+        self.H2 = H2
+        self.H3 = H3
+        self.H4 = H4
+        self.input = input 
+        self.count = count 
+
+        return digest
+
+
+    def hexdigest(self):
+        """Terminate and return digest in HEX form.
+
+        Like digest() except the digest is returned as a string of
+        length 32, containing only hexadecimal digits. This may be
+        used to exchange the value safely in email or other non-
+        binary environments.
+        """
+        return ''.join(['%02x' % ord(c) for c in self.digest()])
+
+    def copy(self):
+        """Return a clone object.
+
+        Return a copy ('clone') of the md5 object. This can be used
+        to efficiently compute the digests of strings that share
+        a common initial substring.
+        """
+
+        return copy.deepcopy(self)
+
+
+# ======================================================================
+# Mimic Python top-level functions from standard library API
+# for consistency with the md5 module of the standard library.
+# ======================================================================
+
+# These are mandatory variables in the module. They have constant values
+# in the SHA standard.
+
+digest_size = digestsize = 20
+blocksize = 1
+
+def new(arg=None):
+    """Return a new sha crypto object.
+
+    If arg is present, the method call update(arg) is made.
+    """
+
+    crypto = sha()
+    if arg:
+        crypto.update(arg)
+
+    return crypto
+
+if __name__ == '__main__':
+    def main():
+        import sys
+        file = None
+        if len(sys.argv) > 2:
+            print "usage: %s [FILE]" % sys.argv[0]
+            return
+        elif len(sys.argv) < 2:
+            file = sys.stdin
+        else:
+            file = open(sys.argv[1], 'rb')
+        context = new()
+        data = file.read(16384)
+        while data:
+            context.update(data)
+            data = file.read(16384)
+        file.close()
+        digest = context.hexdigest().upper()
+        for i in xrange(0, 40, 8):
+            print digest[i:i+8],
+        print
+    main()
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -5,6 +5,7 @@ Support for reading the metadata from a lit file.
 '''

 import sys, struct, cStringIO, os
+import functools
 from itertools import repeat

 from calibre import relpath
@ -13,6 +14,31 @@ from calibre.ebooks.metadata.opf import OPFReader
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP

+OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE package 
+  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
+  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
+"""
+XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE html PUBLIC
+ "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
+ "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
+"""
+
+class DirectoryEntry(object):
+    def __init__(self, name, section, offset, size):
+        self.name = name
+        self.section = section
+        self.offset = offset
+        self.size = size
+        
+    def __repr__(self):
+        return "<DirectoryEntry name='%s' section='%d' offset='%d' size='%d'>" \
+            % (self.name, self.section, self.offset, self.size)
+        
+    def __str__(self):
+        return repr(self)
+
 def u32(bytes):
    return struct.unpack('<L', bytes[:4])[0]

@ -67,7 +93,7 @@ XML_ENTITIES   = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
 class UnBinary(object):
    def __init__(self, bin, manifest, map=OPF_MAP):
        self.manifest = manifest
-        self.attr_map, self.tag_map, self.tag_to_attr_map = map
+        self.tag_map, self.attr_map, self.tag_to_attr_map = map
        self.opf = map is OPF_MAP
        self.bin = bin
        self.buf = cStringIO.StringIO()
@ -104,7 +130,7 @@ class UnBinary(object):
    def binary_to_text(self, base=0, depth=0):
        tag_name = current_map = None
        dynamic_tag = errors = 0
-        in_censorship = False
+        in_censorship = is_goingdown = False
        state = 'text'
        index =  base
        flags = 0
@ -136,7 +162,7 @@ class UnBinary(object):
                    tag = oc
                    self.buf.write('<')
                    if not (flags & FLAG_CLOSING):
-                        is_goingdown = 1
+                        is_goingdown = True
                    if tag == 0x8000:
                        state = 'get custom length'
                        continue
@ -167,7 +193,7 @@ class UnBinary(object):
                    else:
                        self.buf.write('>')
                        index = self.binary_to_text(base=index, depth=depth+1)
-                        is_goingdown = 0
+                        is_goingdown = False
                        if not tag_name:
                            raise LitError('Tag ends before it begins.')
                        self.buf.write('</'+tag_name+'>')
@ -222,7 +248,7 @@ class UnBinary(object):
                    if not in_censorship:
                        self.buf.write(c)
                    count -= 1
-                elif count == 0:
+                if count == 0:
                    if not in_censorship:
                        self.buf.write('"')
                    in_censorship = False
@ -268,7 +294,7 @@ class UnBinary(object):
                href += c
                count -= 1
                if count == 0:
-                    doc, m, frag = href.partition('#')
+                    doc, m, frag = href[1:].partition('#')
                    path = self.item_path(doc)
                    if m and frag:
                        path += m + frag
@ -297,100 +323,74 @@ class ManifestItem(object):
    def __repr__(self):
        return self.internal + u'->' + self.path 

+def preserve(function):
+    def wrapper(self, *args, **kwargs):
+        opos = self._stream.tell()
+        try:
+            return function(self, *args, **kwargs)
+        finally:
+            self._stream.seek(opos)
+    functools.update_wrapper(wrapper, function)
+    return wrapper
+    
 class LitFile(object):
    PIECE_SIZE = 16

    def magic():
+        @preserve
        def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(0)
-                val = self._stream.read(8)
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(0)
+            return self._stream.read(8)
        return property(fget=fget)
    magic = magic()
    
    def version():
        def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(8)
-                val = u32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(8)
+            return u32(self._stream.read(4))
        return property(fget=fget)
    version = version()
    
    def hdr_len():
+        @preserve
        def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(12)
-                val = int32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(12)
+            return int32(self._stream.read(4))
        return property(fget=fget)
    hdr_len = hdr_len()
    
    def num_pieces():
+        @preserve
        def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(16)
-                val = int32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(16)
+            return int32(self._stream.read(4))
        return property(fget=fget)
    num_pieces = num_pieces()
    
    def sec_hdr_len():
+        @preserve
        def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(20)
-                val = int32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(20)
+            return int32(self._stream.read(4))
        return property(fget=fget)
    sec_hdr_len = sec_hdr_len()
    
    def guid():
+        @preserve
        def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(24)
-                val = self._stream.read(16)
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(24)
+            return self._stream.read(16)
        return property(fget=fget)
    guid = guid()
    
    def header():
+        @preserve
        def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                size = self.hdr_len \
-                    + (self.num_pieces * self.PIECE_SIZE) \
-                    + self.sec_hdr_len
-                self._stream.seek(0)
-                val = self._stream.read(size)
-            finally:
-                self._stream.seek(opos)
-            return val
+            size = self.hdr_len \
+                + (self.num_pieces * self.PIECE_SIZE) \
+                + self.sec_hdr_len
+            self._stream.seek(0)
+            return self._stream.read(size)
        return property(fget=fget)
    header = header()        
    
@ -403,69 +403,63 @@ class LitFile(object):
        self.read_secondary_header()
        self.read_header_pieces()

+    @preserve
    def read_secondary_header(self):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
-            bytes = self._stream.read(self.sec_hdr_len)
-            offset = int32(bytes[4:])
-            while offset < len(bytes):
-                blocktype = bytes[offset:offset+4]
-                blockver  = u32(bytes[offset+4:])
-                if blocktype == 'CAOL':
-                    if blockver != 2:
-                        raise LitError(
-                            'Unknown CAOL block format %d' % blockver)
-                    self.creator_id     = u32(bytes[offset+12:])
-                    self.entry_chunklen = u32(bytes[offset+20:])
-                    self.count_chunklen = u32(bytes[offset+24:])
-                    self.entry_unknown  = u32(bytes[offset+28:])
-                    self.count_unknown  = u32(bytes[offset+32:])
-                    offset += 48
-                elif blocktype == 'ITSF':
-                    if blockver != 4:
-                        raise LitError(
-                            'Unknown ITSF block format %d' % blockver)
-                    if u32(bytes[offset+4+16:]):
-                        raise LitError('This file has a 64bit content offset')
-                    self.content_offset = u32(bytes[offset+16:])
-                    self.timestamp      = u32(bytes[offset+24:]) 
-                    self.language_id    = u32(bytes[offset+28:])
-                    offset += 48
-            if not hasattr(self, 'content_offset'):
-                raise LitError('Could not figure out the content offset')
-        finally:
-            self._stream.seek(opos)
+        self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
+        bytes = self._stream.read(self.sec_hdr_len)
+        offset = int32(bytes[4:])
+        while offset < len(bytes):
+            blocktype = bytes[offset:offset+4]
+            blockver  = u32(bytes[offset+4:])
+            if blocktype == 'CAOL':
+                if blockver != 2:
+                    raise LitError(
+                        'Unknown CAOL block format %d' % blockver)
+                self.creator_id     = u32(bytes[offset+12:])
+                self.entry_chunklen = u32(bytes[offset+20:])
+                self.count_chunklen = u32(bytes[offset+24:])
+                self.entry_unknown  = u32(bytes[offset+28:])
+                self.count_unknown  = u32(bytes[offset+32:])
+                offset += 48
+            elif blocktype == 'ITSF':
+                if blockver != 4:
+                    raise LitError(
+                        'Unknown ITSF block format %d' % blockver)
+                if u32(bytes[offset+4+16:]):
+                    raise LitError('This file has a 64bit content offset')
+                self.content_offset = u32(bytes[offset+16:])
+                self.timestamp      = u32(bytes[offset+24:]) 
+                self.language_id    = u32(bytes[offset+28:])
+                offset += 48
+        if not hasattr(self, 'content_offset'):
+            raise LitError('Could not figure out the content offset')
    
+    @preserve
    def read_header_pieces(self):
-        opos = self._stream.tell()
-        try:
-            src = self.header[self.hdr_len:]
-            for i in range(self.num_pieces):
-                piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
-                if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
-                    raise LitError('Piece %s has 64bit value' % repr(piece))
-                offset, size = u32(piece), int32(piece[8:])
-                self._stream.seek(offset)
-                piece = self._stream.read(size)
-                if i == 0:
-                    continue # Dont need this piece
-                elif i == 1:
-                    if u32(piece[8:])  != self.entry_chunklen or \
-                       u32(piece[12:]) != self.entry_unknown:
-                        raise LitError('Secondary header does not match piece')
-                    self.read_directory(piece)
-                elif i == 2:
-                    if u32(piece[8:])  != self.count_chunklen or \
-                       u32(piece[12:]) != self.count_unknown:
-                        raise LitError('Secondary header does not match piece')
-                    continue # No data needed from this piece
-                elif i == 3:
-                    self.piece3_guid = piece
-                elif i == 4:
-                    self.piece4_guid = piece
-        finally:
-            self._stream.seek(opos)
+        src = self.header[self.hdr_len:]
+        for i in range(self.num_pieces):
+            piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
+            if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
+                raise LitError('Piece %s has 64bit value' % repr(piece))
+            offset, size = u32(piece), int32(piece[8:])
+            self._stream.seek(offset)
+            piece = self._stream.read(size)
+            if i == 0:
+                continue # Dont need this piece
+            elif i == 1:
+                if u32(piece[8:])  != self.entry_chunklen or \
+                   u32(piece[12:]) != self.entry_unknown:
+                    raise LitError('Secondary header does not match piece')
+                self.read_directory(piece)
+            elif i == 2:
+                if u32(piece[8:])  != self.count_chunklen or \
+                   u32(piece[12:]) != self.count_unknown:
+                    raise LitError('Secondary header does not match piece')
+                continue # No data needed from this piece
+            elif i == 3:
+                self.piece3_guid = piece
+            elif i == 4:
+                self.piece4_guid = piece
                
    def read_directory(self, piece):
        self.entries = []
@ -522,107 +516,87 @@ class LitFile(object):
            if not hasattr(self, 'manifest'):
                raise LitError('Lit file does not have a valid manifest')

+    @preserve
    def read_section_names(self, entry):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
-            if len(raw) < 4:
+        self._stream.seek(self.content_offset + entry.offset)
+        raw = self._stream.read(entry.size)
+        if len(raw) < 4:
+            raise LitError('Invalid Namelist section')
+        pos = 4
+        self.num_sections = u16(raw[2:pos])
+        
+        self.sections = {}
+        for section in range(self.num_sections):
+            size = u16(raw[pos:pos+2])
+            pos += 2
+            size = size*2 + 2
+            if pos + size > len(raw):
                raise LitError('Invalid Namelist section')
-            pos = 4
-            self.num_sections = u16(raw[2:pos])
-            
-            self.sections = {}
-            for section in range(self.num_sections):
-                size = u16(raw[pos:pos+2])
-                pos += 2
-                size = size*2 + 2
-                if pos + size > len(raw):
-                    raise LitError('Invalid Namelist section')
-                self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
-                pos += size                
-        finally:
-            self._stream.seek(opos)
+            self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
+            pos += size                

+    @preserve
    def read_manifest(self, entry):
-        opos = self._stream.tell()
-        try:
-            self.manifest = []
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
-            pos = 0
-            while pos < len(raw):
-                size = ord(raw[pos])
-                if size == 0: break
-                pos += 1
-                root = raw[pos:pos+size].decode('utf8')
-                pos += size
-                if pos >= len(raw):
-                    raise LitError('Truncated manifest.')
-                for state in ['spine', 'not spine', 'css', 'images']:
-                    num_files = int32(raw[pos:pos+4])
+        self.manifest = []
+        self._stream.seek(self.content_offset + entry.offset)
+        raw = self._stream.read(entry.size)
+        pos = 0
+        while pos < len(raw):
+            size = ord(raw[pos])
+            if size == 0: break
+            pos += 1
+            root = raw[pos:pos+size].decode('utf8')
+            pos += size
+            if pos >= len(raw):
+                raise LitError('Truncated manifest.')
+            for state in ['spine', 'not spine', 'css', 'images']:
+                num_files = int32(raw[pos:pos+4])
+                pos += 4
+                if num_files == 0: continue
+                
+                i = 0
+                while i < num_files:
+                    if pos+5 >= len(raw):
+                        raise LitError('Truncated manifest.')
+                    offset = u32(raw[pos:pos+4])
                    pos += 4
-                    if num_files == 0: continue
                    
-                    i = 0
-                    while i < num_files:
-                        if pos+5 >= len(raw):
-                            raise LitError('Truncated manifest.')
-                        offset = u32(raw[pos:pos+4])
-                        pos += 4
+                    slen = ord(raw[pos])
+                    pos += 1
+                    internal = raw[pos:pos+slen].decode('utf8')
+                    pos += slen
                    
-                        slen = ord(raw[pos])
-                        pos += 1
-                        internal = raw[pos:pos+slen].decode('utf8')
-                        pos += slen
+                    slen = ord(raw[pos])
+                    pos += 1
+                    original = raw[pos:pos+slen].decode('utf8')
+                    pos += slen
                    
-                        slen = ord(raw[pos])
-                        pos += 1
-                        original = raw[pos:pos+slen].decode('utf8')
-                        pos += slen
+                    slen = ord(raw[pos])
+                    pos += 1
+                    mime_type = raw[pos:pos+slen].decode('utf8')
+                    pos += slen + 1
                    
-                        slen = ord(raw[pos])
-                        pos += 1
-                        mime_type = raw[pos:pos+slen].decode('utf8')
-                        pos += slen + 1
-                        
-                        self.manifest.append(
-                            ManifestItem(original, internal, mime_type,
-                                         offset, root, state))
-                        i += 1
-        finally:
-            self._stream.seek(opos)        
+                    self.manifest.append(
+                        ManifestItem(original, internal, mime_type,
+                                     offset, root, state))
+                    i += 1

+    @preserve
    def read_meta(self, entry):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
-
-            xml = \
-'''\
-<?xml version="1.0" encoding="UTF-8" ?>
-<!DOCTYPE package
-  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
-  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
-'''+\
-                unicode(UnBinary(raw, self.manifest))
-            self.meta = xml
-        finally:
-            self._stream.seek(opos)
+        self._stream.seek(self.content_offset + entry.offset)
+        raw = self._stream.read(entry.size)
+        xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
+        self.meta = xml

+    @preserve
    def read_image(self, internal_name):
        cover_entry = None
        for entry in self.entries:
            if internal_name in entry.name:
                cover_entry = entry
                break
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + cover_entry.offset)
-            return self._stream.read(cover_entry.size)
-        finally:
-            self._stream.seek(opos)
+        self._stream.seek(self.content_offset + cover_entry.offset)
+        return self._stream.read(cover_entry.size)

 def get_metadata(stream):
    try:
--- a/src/calibre/ebooks/metadata/opf.py
+++ b/src/calibre/ebooks/metadata/opf.py
@ -237,7 +237,7 @@ class OPF(MetaInformation):
    
    def get_title(self):
        title = self.soup.package.metadata.find('dc:title')
-        if title and title.string:
+        if title:
            return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip()
        return self.default_title.strip()