From 75953a47d239bcdda2b15f725610a4860c8b2c3e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 20 Sep 2007 01:54:53 +0000
Subject: [PATCH] Newer pyPdf that is hopefully bug free.

---
 src/libprs500/ebooks/pyPdf/filters.py |    4 +-
 src/libprs500/ebooks/pyPdf/generic.py | 1253 ++++++++++++++-----------
 src/libprs500/ebooks/pyPdf/pdf.py     |  440 ++++++++-
 src/libprs500/ebooks/pyPdf/utils.py   |    5 +-
 4 files changed, 1109 insertions(+), 593 deletions(-)

diff --git a/src/libprs500/ebooks/pyPdf/filters.py b/src/libprs500/ebooks/pyPdf/filters.py
index 17a325f76f..581cd52111 100644
--- a/src/libprs500/ebooks/pyPdf/filters.py
+++ b/src/libprs500/ebooks/pyPdf/filters.py
@@ -32,9 +32,8 @@
 Implementation of stream filters for PDF.
 """
 __author__ = "Mathieu Fenniak"
-__author_email__ = "mfenniak@pobox.com"
+__author_email__ = "biziqe@mathieu.fenniak.net"
 
-from generic import NameObject
 
 try:
     import zlib
@@ -208,6 +207,7 @@ class ASCII85Decode(object):
     decode = staticmethod(decode)
 
 def decodeStreamData(stream):
+    from generic import NameObject
     filters = stream.get("/Filter", ())
     if len(filters) and not isinstance(filters[0], NameObject):
         # we have a single filter instance
diff --git a/src/libprs500/ebooks/pyPdf/generic.py b/src/libprs500/ebooks/pyPdf/generic.py
index 4fea8fa640..69a9ad7b5e 100644
--- a/src/libprs500/ebooks/pyPdf/generic.py
+++ b/src/libprs500/ebooks/pyPdf/generic.py
@@ -1,542 +1,711 @@
-# vim: sw=4:expandtab:foldmethod=marker
-#
-# Copyright (c) 2006, Mathieu Fenniak
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-# * The name of the author may not be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-
-"""
-Implementation of generic PDF objects (dictionary, number, string, and so on)
-"""
-__author__ = "Mathieu Fenniak"
-__author_email__ = "mfenniak@pobox.com"
-
-import re
-from utils import readNonWhitespace, RC4_encrypt
-import filters
-
-def readObject(stream, pdf):
-    tok = stream.read(1)
-    stream.seek(-1, 1) # reset to start
-    if tok == 't' or tok == 'f':
-        # boolean object
-        return BooleanObject.readFromStream(stream)
-    elif tok == '(':
-        # string object
-        return StringObject.readFromStream(stream)
-    elif tok == '/':
-        # name object
-        return NameObject.readFromStream(stream)
-    elif tok == '[':
-        # array object
-        return ArrayObject.readFromStream(stream, pdf)
-    elif tok == 'n':
-        # null object
-        return NullObject.readFromStream(stream)
-    elif tok == '<':
-        # hexadecimal string OR dictionary
-        peek = stream.read(2)
-        stream.seek(-2, 1) # reset to start
-        if peek == '<<':
-            return DictionaryObject.readFromStream(stream, pdf)
-        else:
-            return StringObject.readHexStringFromStream(stream)
-    elif tok == '%':
-        # comment
-        while tok not in ('\r', '\n'):
-            tok = stream.read(1)
-        tok = readNonWhitespace(stream)
-        stream.seek(-1, 1)
-        return readObject(stream, pdf)
-    else:
-        # number object OR indirect reference
-        if tok == '+' or tok == '-':
-            # number
-            return NumberObject.readFromStream(stream)
-        peek = stream.read(20)
-        stream.seek(-len(peek), 1) # reset to start
-        if re.match(r"(\d+)\s(\d+)\sR[^a-zA-Z]", peek) != None:
-            return IndirectObject.readFromStream(stream, pdf)
-        else:
-            return NumberObject.readFromStream(stream)
-
-class PdfObject(object):
-    def getObject(self):
-        """Resolves indirect references."""
-        return self
-
-
-class NullObject(PdfObject):
-    def writeToStream(self, stream, encryption_key):
-        stream.write("null")
-
-    def readFromStream(stream):
-        assert stream.read(4) == "null"
-        return NullObject()
-    readFromStream = staticmethod(readFromStream)
-
-
-class BooleanObject(PdfObject):
-    def __init__(self, value):
-        self.value = value
-
-    def writeToStream(self, stream, encryption_key):
-        if self.value:
-            stream.write("true")
-        else:
-            stream.write("false")
-
-    def readFromStream(stream):
-        word = stream.read(4)
-        if word == "true":
-            return BooleanObject(True)
-        elif word == "fals":
-            stream.read(1)
-            return BooleanObject(False)
-        assert False
-    readFromStream = staticmethod(readFromStream)
-
-
-class ArrayObject(list, PdfObject):
-    def writeToStream(self, stream, encryption_key):
-        stream.write("[")
-        for data in self:
-            stream.write(" ")
-            data.writeToStream(stream, encryption_key)
-        stream.write(" ]")
-
-    def readFromStream(stream, pdf):
-        arr = ArrayObject()
-        assert stream.read(1) == "["
-        while True:
-            # skip leading whitespace
-            tok = stream.read(1)
-            while tok.isspace():
-                tok = stream.read(1)
-            stream.seek(-1, 1)
-            # check for array ending
-            peekahead = stream.read(1)
-            if peekahead == "]":
-                break
-            stream.seek(-1, 1)
-            # read and append obj
-            arr.append(readObject(stream, pdf))
-        return arr
-    readFromStream = staticmethod(readFromStream)
-
-
-class IndirectObject(PdfObject):
-    def __init__(self, idnum, generation, pdf):
-        self.idnum = idnum
-        self.generation = generation
-        self.pdf = pdf
-
-    def getObject(self):
-        return self.pdf.getObject(self).getObject()
-
-    def __repr__(self):
-        return "IndirectObject(%r, %r)" % (self.idnum, self.generation)
-
-    def __eq__(self, other):
-        return (
-            other != None and
-            isinstance(other, IndirectObject) and
-            self.idnum == other.idnum and
-            self.generation == other.generation and
-            self.pdf is other.pdf
-            )
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
-    def writeToStream(self, stream, encryption_key):
-        stream.write("%s %s R" % (self.idnum, self.generation))
-
-    def readFromStream(stream, pdf):
-        idnum = ""
-        while True:
-            tok = stream.read(1)
-            if tok.isspace():
-                break
-            idnum += tok
-        generation = ""
-        while True:
-            tok = stream.read(1)
-            if tok.isspace():
-                break
-            generation += tok
-        r = stream.read(1)
-        #if r != "R":
-        #    stream.seek(-20, 1)
-        #    print idnum, generation
-        #    print repr(stream.read(40))
-        assert r == "R"
-        return IndirectObject(int(idnum), int(generation), pdf)
-    readFromStream = staticmethod(readFromStream)
-
-
-class FloatObject(float, PdfObject):
-    def writeToStream(self, stream, encryption_key):
-        stream.write(repr(self))
-
-
-class NumberObject(int, PdfObject):
-    def __init__(self, value):
-        int.__init__(self, value)
-
-    def writeToStream(self, stream, encryption_key):
-        stream.write(repr(self))
-
-    def readFromStream(stream):
-        name = ""
-        while True:
-            tok = stream.read(1)
-            if tok != '+' and tok != '-' and tok != '.' and not tok.isdigit():
-                stream.seek(-1, 1)
-                break
-            name += tok
-        if name.find(".") != -1:
-            return FloatObject(name)
-        else:
-            return NumberObject(name)
-    readFromStream = staticmethod(readFromStream)
-
-
-class StringObject(str, PdfObject):
-    def writeToStream(self, stream, encryption_key):
-        string = self
-        if encryption_key:
-            string = RC4_encrypt(encryption_key, string)
-        stream.write("(")
-        for c in string:
-            if not c.isalnum() and not c.isspace():
-                stream.write("\\%03o" % ord(c))
-            else:
-                stream.write(c)
-        stream.write(")")
-
-    def readHexStringFromStream(stream):
-        stream.read(1)
-        txt = ""
-        x = ""
-        while True:
-            tok = readNonWhitespace(stream)
-            if tok == ">":
-                break
-            x += tok
-            if len(x) == 2:
-                txt += chr(int(x, base=16))
-                x = ""
-        if len(x) == 1:
-            x += "0"
-        if len(x) == 2:
-            txt += chr(int(x, base=16))
-        return StringObject(txt)
-    readHexStringFromStream = staticmethod(readHexStringFromStream)
-
-    def readFromStream(stream):
-        tok = stream.read(1)
-        parens = 1
-        txt = ""
-        while True:
-            tok = stream.read(1)
-            if tok == "(":
-                parens += 1
-            elif tok == ")":
-                parens -= 1
-                if parens == 0:
-                    break
-            elif tok == "\\":
-                tok = stream.read(1)
-                if tok == "n":
-                    tok = "\n"
-                elif tok == "r":
-                    tok = "\r"
-                elif tok == "t":
-                    tok = "\t"
-                elif tok == "b":
-                    tok == "\b"
-                elif tok == "f":
-                    tok = "\f"
-                elif tok == "(":
-                    tok = "("
-                elif tok == ")":
-                    tok = ")"
-                elif tok == "\\":
-                    tok = "\\"
-                elif tok.isdigit():
-                    tok += stream.read(2)
-                    tok = chr(int(tok, base=8))
-            txt += tok
-        return StringObject(txt)
-    readFromStream = staticmethod(readFromStream)
-
-
-class NameObject(str, PdfObject):
-    delimiterCharacters = "(", ")", "<", ">", "[", "]", "{", "}", "/", "%"
-
-    def __init__(self, data):
-        str.__init__(self, data)
-
-    def writeToStream(self, stream, encryption_key):
-        stream.write(self)
-
-    def readFromStream(stream):
-        name = stream.read(1)
-        assert name == "/"
-        while True:
-            tok = stream.read(1)
-            if tok.isspace() or tok in NameObject.delimiterCharacters:
-                stream.seek(-1, 1)
-                break
-            name += tok
-        return NameObject(name)
-    readFromStream = staticmethod(readFromStream)
-
-
-class DictionaryObject(dict, PdfObject):
-    def __init__(self):
-        pass
-
-    def writeToStream(self, stream, encryption_key):
-        stream.write("<<\n")
-        for key, value in self.items():
-            key.writeToStream(stream, encryption_key)
-            stream.write(" ")
-            value.writeToStream(stream, encryption_key)
-            stream.write("\n")
-        stream.write(">>")
-
-    def readFromStream(stream, pdf):
-        assert stream.read(2) == "<<"
-        data = {}
-        while True:
-            tok = readNonWhitespace(stream)
-            if tok == ">":
-                stream.read(1)
-                break
-            stream.seek(-1, 1)
-            key = readObject(stream, pdf)
-            tok = readNonWhitespace(stream)
-            stream.seek(-1, 1)
-            value = readObject(stream, pdf)
-            if data.has_key(key):
-                # multiple definitions of key not permitted
-                assert False
-            data[key] = value
-        pos = stream.tell()
-        s = readNonWhitespace(stream)
-        if s == 's' and stream.read(5) == 'tream':
-            eol = stream.read(1)
-            # odd PDF file output has spaces after 'stream' keyword but before EOL.
-            # patch provided by Danial Sandler
-            while eol == ' ':
-                eol = stream.read(1)
-            assert eol in ("\n", "\r")
-            if eol == "\r":
-                # read \n after
-                stream.read(1)
-            # this is a stream object, not a dictionary
-            assert data.has_key("/Length")
-            length = data["/Length"]
-            if isinstance(length, IndirectObject):
-                t = stream.tell()
-                length = pdf.getObject(length)
-                stream.seek(t, 0)
-            data["__streamdata__"] = stream.read(length)
-            e = readNonWhitespace(stream)
-            ndstream = stream.read(8)
-            if (e + ndstream) != "endstream":
-                # (sigh) - the odd PDF file has a length that is too long, so
-                # we need to read backwards to find the "endstream" ending.
-                # ReportLab (unknown version) generates files with this bug,
-                # and Python users into PDF files tend to be our audience.
-                # we need to do this to correct the streamdata and chop off
-                # an extra character.
-                pos = stream.tell()
-                stream.seek(-10, 1)
-                end = stream.read(9)
-                if end == "endstream":
-                    # we found it by looking back one character further.
-                    data["__streamdata__"] = data["__streamdata__"][:-1]
-                else:
-                    stream.seek(pos, 0)
-                    raise "Unable to find 'endstream' marker after stream."
-        else:
-            stream.seek(pos, 0)
-        if data.has_key("__streamdata__"):
-            return StreamObject.initializeFromDictionary(data)
-        else:
-            retval = DictionaryObject()
-            retval.update(data)
-            return retval
-    readFromStream = staticmethod(readFromStream)
-
-
-class StreamObject(DictionaryObject):
-    def __init__(self):
-        self._data = None
-        self.decodedSelf = None
-
-    def writeToStream(self, stream, encryption_key):
-        self[NameObject("/Length")] = NumberObject(len(self._data))
-        DictionaryObject.writeToStream(self, stream, encryption_key)
-        del self["/Length"]
-        stream.write("\nstream\n")
-        data = self._data
-        if encryption_key:
-            data = RC4_encrypt(encryption_key, data)
-        stream.write(data)
-        stream.write("\nendstream")
-
-    def initializeFromDictionary(data):
-        if data.has_key("/Filter"):
-            retval = EncodedStreamObject()
-        else:
-            retval = DecodedStreamObject()
-        retval._data = data["__streamdata__"]
-        del data["__streamdata__"]
-        del data["/Length"]
-        retval.update(data)
-        return retval
-    initializeFromDictionary = staticmethod(initializeFromDictionary)
-
-    def flateEncode(self):
-        if self.has_key("/Filter"):
-            f = self["/Filter"]
-            if isinstance(f, ArrayObject):
-                f.insert(0, NameObject("/FlateDecode"))
-            else:
-                newf = ArrayObject()
-                newf.append(NameObject("/FlateDecode"))
-                newf.append(f)
-                f = newf
-        else:
-            f = NameObject("/FlateDecode")
-        retval = EncodedStreamObject()
-        retval[NameObject("/Filter")] = f
-        retval._data = filters.FlateDecode.encode(self._data)
-        return retval
-
-
-class DecodedStreamObject(StreamObject):
-    def getData(self):
-        return self._data
-
-    def setData(self, data):
-        self._data = data
-
-
-class EncodedStreamObject(StreamObject):
-    def __init__(self):
-        self.decodedSelf = None
-
-    def getData(self):
-        if self.decodedSelf:
-            # cached version of decoded object
-            return self.decodedSelf.getData()
-        else:
-            # create decoded object
-            decoded = StreamObject()
-            decoded._data = filters.decodeStreamData(self)
-            for key, value in self.items():
-                if not key in ("/Length", "/Filter", "/DecodeParms"):
-                    decoded[key] = value
-            self.decodedSelf = decoded
-            return decoded._data
-
-    def setData(self, data):
-        raise "Creating EncodedStreamObject is not currently supported"
-
-
-class RectangleObject(ArrayObject):
-    def __init__(self, arr):
-        # must have four points
-        assert len(arr) == 4
-        # automatically convert arr[x] into NumberObject(arr[x]) if necessary
-        ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
-
-    def ensureIsNumber(self, value):
-        if not isinstance(value, NumberObject):
-            value = NumberObject(value)
-        return value
-
-    def __repr__(self):
-        return "RectangleObject(%s)" % repr(list(self))
-
-    def getLowerLeft_x(self):
-        return self[0]
-
-    def getLowerLeft_y(self):
-        return self[1]
-
-    def getUpperRight_x(self):
-        return self[2]
-
-    def getUpperRight_y(self):
-        return self[3]
-
-    def getUpperLeft_x(self):
-        return self.getLowerLeft_x()
-    
-    def getUpperLeft_y(self):
-        return self.getUpperRight_y()
-
-    def getLowerRight_x(self):
-        return self.getUpperRight_x()
-
-    def getLowerRight_y(self):
-        return self.getLowerLeft_y()
-
-    def getLowerLeft(self):
-        return self.getLowerLeft_x(), self.getLowerLeft_y()
-
-    def getLowerRight(self):
-        return self.getLowerRight_x(), self.getLowerRight_y()
-
-    def getUpperLeft(self):
-        return self.getUpperLeft_x(), self.getUpperLeft_y()
-
-    def getUpperRight(self):
-        return self.getUpperRight_x(), self.getUpperRight_y()
-
-    def setLowerLeft(self, value):
-        self[0], self[1] = [self.ensureIsNumber(x) for x in value]
-
-    def setLowerRight(self, value):
-        self[2], self[1] = [self.ensureIsNumber(x) for x in value]
-
-    def setUpperLeft(self, value):
-        self[0], self[3] = [self.ensureIsNumber(x) for x in value]
-
-    def setUpperRight(self, value):
-        self[2], self[3] = [self.ensureIsNumber(x) for x in value]
-
-    lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
-    lowerRight = property(getLowerRight, setLowerRight, None, None)
-    upperLeft = property(getUpperLeft, setUpperLeft, None, None)
-    upperRight = property(getUpperRight, setUpperRight, None, None)
-
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+Implementation of generic PDF objects (dictionary, number, string, and so on)
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+import re
+from utils import readNonWhitespace, RC4_encrypt
+import filters
+import utils
+import decimal
+import codecs
+
+def readObject(stream, pdf):
+    tok = stream.read(1)
+    stream.seek(-1, 1) # reset to start
+    if tok == 't' or tok == 'f':
+        # boolean object
+        return BooleanObject.readFromStream(stream)
+    elif tok == '(':
+        # string object
+        return readStringFromStream(stream)
+    elif tok == '/':
+        # name object
+        return NameObject.readFromStream(stream)
+    elif tok == '[':
+        # array object
+        return ArrayObject.readFromStream(stream, pdf)
+    elif tok == 'n':
+        # null object
+        return NullObject.readFromStream(stream)
+    elif tok == '<':
+        # hexadecimal string OR dictionary
+        peek = stream.read(2)
+        stream.seek(-2, 1) # reset to start
+        if peek == '<<':
+            return DictionaryObject.readFromStream(stream, pdf)
+        else:
+            return readHexStringFromStream(stream)
+    elif tok == '%':
+        # comment
+        while tok not in ('\r', '\n'):
+            tok = stream.read(1)
+        tok = readNonWhitespace(stream)
+        stream.seek(-1, 1)
+        return readObject(stream, pdf)
+    else:
+        # number object OR indirect reference
+        if tok == '+' or tok == '-':
+            # number
+            return NumberObject.readFromStream(stream)
+        peek = stream.read(20)
+        stream.seek(-len(peek), 1) # reset to start
+        if re.match(r"(\d+)\s(\d+)\sR[^a-zA-Z]", peek) != None:
+            return IndirectObject.readFromStream(stream, pdf)
+        else:
+            return NumberObject.readFromStream(stream)
+
+class PdfObject(object):
+    def getObject(self):
+        """Resolves indirect references."""
+        return self
+
+
+class NullObject(PdfObject):
+    def writeToStream(self, stream, encryption_key):
+        stream.write("null")
+
+    def readFromStream(stream):
+        nulltxt = stream.read(4)
+        if nulltxt != "null":
+            raise utils.PdfReadError, "error reading null object"
+        return NullObject()
+    readFromStream = staticmethod(readFromStream)
+
+
+class BooleanObject(PdfObject):
+    def __init__(self, value):
+        self.value = value
+
+    def writeToStream(self, stream, encryption_key):
+        if self.value:
+            stream.write("true")
+        else:
+            stream.write("false")
+
+    def readFromStream(stream):
+        word = stream.read(4)
+        if word == "true":
+            return BooleanObject(True)
+        elif word == "fals":
+            stream.read(1)
+            return BooleanObject(False)
+        assert False
+    readFromStream = staticmethod(readFromStream)
+
+
+class ArrayObject(list, PdfObject):
+    def writeToStream(self, stream, encryption_key):
+        stream.write("[")
+        for data in self:
+            stream.write(" ")
+            data.writeToStream(stream, encryption_key)
+        stream.write(" ]")
+
+    def readFromStream(stream, pdf):
+        arr = ArrayObject()
+        tmp = stream.read(1)
+        if tmp != "[":
+            raise utils.PdfReadError, "error reading array"
+        while True:
+            # skip leading whitespace
+            tok = stream.read(1)
+            while tok.isspace():
+                tok = stream.read(1)
+            stream.seek(-1, 1)
+            # check for array ending
+            peekahead = stream.read(1)
+            if peekahead == "]":
+                break
+            stream.seek(-1, 1)
+            # read and append obj
+            arr.append(readObject(stream, pdf))
+        return arr
+    readFromStream = staticmethod(readFromStream)
+
+
+class IndirectObject(PdfObject):
+    def __init__(self, idnum, generation, pdf):
+        self.idnum = idnum
+        self.generation = generation
+        self.pdf = pdf
+
+    def getObject(self):
+        return self.pdf.getObject(self).getObject()
+
+    def __repr__(self):
+        return "IndirectObject(%r, %r)" % (self.idnum, self.generation)
+
+    def __eq__(self, other):
+        return (
+            other != None and
+            isinstance(other, IndirectObject) and
+            self.idnum == other.idnum and
+            self.generation == other.generation and
+            self.pdf is other.pdf
+            )
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write("%s %s R" % (self.idnum, self.generation))
+
+    def readFromStream(stream, pdf):
+        idnum = ""
+        while True:
+            tok = stream.read(1)
+            if tok.isspace():
+                break
+            idnum += tok
+        generation = ""
+        while True:
+            tok = stream.read(1)
+            if tok.isspace():
+                break
+            generation += tok
+        r = stream.read(1)
+        if r != "R":
+            raise utils.PdfReadError("error reading indirect object reference")
+        return IndirectObject(int(idnum), int(generation), pdf)
+    readFromStream = staticmethod(readFromStream)
+
+
+class FloatObject(decimal.Decimal, PdfObject):
+    def writeToStream(self, stream, encryption_key):
+        stream.write(str(self))
+
+
+class NumberObject(int, PdfObject):
+    def __init__(self, value):
+        int.__init__(self, value)
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(repr(self))
+
+    def readFromStream(stream):
+        name = ""
+        while True:
+            tok = stream.read(1)
+            if tok != '+' and tok != '-' and tok != '.' and not tok.isdigit():
+                stream.seek(-1, 1)
+                break
+            name += tok
+        if name.find(".") != -1:
+            return FloatObject(name)
+        else:
+            return NumberObject(name)
+    readFromStream = staticmethod(readFromStream)
+
+
+##
+# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
+# TextStringObject to represent the string.
+def createStringObject(string):
+    if isinstance(string, unicode):
+        return TextStringObject(string)
+    elif isinstance(string, str):
+        if string.startswith(codecs.BOM_UTF16_BE):
+            retval = TextStringObject(string.decode("utf-16"))
+            retval.autodetect_utf16 = True
+            return retval
+        else:
+            # This is probably a big performance hit here, but we need to
+            # convert string objects into the text/unicode-aware version if
+            # possible... and the only way to check if that's possible is
+            # to try.  Some strings are strings, some are just byte arrays.
+            try:
+                retval = TextStringObject(decode_pdfdocencoding(string))
+                retval.autodetect_pdfdocencoding = True
+                return retval
+            except UnicodeDecodeError:
+                return ByteStringObject(string)
+    else:
+        raise TypeError("createStringObject should have str or unicode arg")
+
+
+def readHexStringFromStream(stream):
+    stream.read(1)
+    txt = ""
+    x = ""
+    while True:
+        tok = readNonWhitespace(stream)
+        if tok == ">":
+            break
+        x += tok
+        if len(x) == 2:
+            txt += chr(int(x, base=16))
+            x = ""
+    if len(x) == 1:
+        x += "0"
+    if len(x) == 2:
+        txt += chr(int(x, base=16))
+    return createStringObject(txt)
+
+
+def readStringFromStream(stream):
+    tok = stream.read(1)
+    parens = 1
+    txt = ""
+    while True:
+        tok = stream.read(1)
+        if tok == "(":
+            parens += 1
+        elif tok == ")":
+            parens -= 1
+            if parens == 0:
+                break
+        elif tok == "\\":
+            tok = stream.read(1)
+            if tok == "n":
+                tok = "\n"
+            elif tok == "r":
+                tok = "\r"
+            elif tok == "t":
+                tok = "\t"
+            elif tok == "b":
+                tok == "\b"
+            elif tok == "f":
+                tok = "\f"
+            elif tok == "(":
+                tok = "("
+            elif tok == ")":
+                tok = ")"
+            elif tok == "\\":
+                tok = "\\"
+            elif tok.isdigit():
+                tok += stream.read(2)
+                tok = chr(int(tok, base=8))
+            elif tok in "\n\r":
+                # This case is  hit when a backslash followed by a line
+                # break occurs.  If it's a multi-char EOL, consume the
+                # second character:
+                tok = stream.read(1)
+                if not tok in "\n\r":
+                    stream.seek(-1, 1)
+                # Then don't add anything to the actual string, since this
+                # line break was escaped:
+                tok = ''
+            else:
+                raise utils.PdfReadError("Unexpected escaped string")
+        txt += tok
+    return createStringObject(txt)
+
+
+##
+# Represents a string object where the text encoding could not be determined.
+# This occurs quite often, as the PDF spec doesn't provide an alternate way to
+# represent strings -- for example, the encryption data stored in files (like
+# /O) is clearly not text, but is still stored in a "String" object.
+class ByteStringObject(str, PdfObject):
+
+    ##
+    # For compatibility with TextStringObject.original_bytes.  This method
+    # returns self.
+    original_bytes = property(lambda self: self)
+
+    def writeToStream(self, stream, encryption_key):
+        bytearr = self
+        if encryption_key:
+            bytearr = RC4_encrypt(encryption_key, bytearr)
+        stream.write("<")
+        stream.write(bytearr.encode("hex"))
+        stream.write(">")
+
+
+##
+# Represents a string object that has been decoded into a real unicode string.
+# If read from a PDF document, this string appeared to match the
+# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
+# occur.
+class TextStringObject(unicode, PdfObject):
+    autodetect_pdfdocencoding = False
+    autodetect_utf16 = False
+
+    ##
+    # It is occasionally possible that a text string object gets created where
+    # a byte string object was expected due to the autodetection mechanism --
+    # if that occurs, this "original_bytes" property can be used to
+    # back-calculate what the original encoded bytes were.
+    original_bytes = property(lambda self: self.get_original_bytes())
+
+    def get_original_bytes(self):
+        # We're a text string object, but the library is trying to get our raw
+        # bytes.  This can happen if we auto-detected this string as text, but
+        # we were wrong.  It's pretty common.  Return the original bytes that
+        # would have been used to create this object, based upon the autodetect
+        # method.
+        if self.autodetect_utf16:
+            return codecs.BOM_UTF16_BE + self.encode("utf-16be")
+        elif self.autodetect_pdfdocencoding:
+            return encode_pdfdocencoding(self)
+        else:
+            raise Exception("no information about original bytes")
+
+    def writeToStream(self, stream, encryption_key):
+        # Try to write the string out as a PDFDocEncoding encoded string.  It's
+        # nicer to look at in the PDF file.  Sadly, we take a performance hit
+        # here for trying...
+        try:
+            bytearr = encode_pdfdocencoding(self)
+        except UnicodeEncodeError:
+            bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
+        if encryption_key:
+            bytearr = RC4_encrypt(encryption_key, bytearr)
+            obj = ByteStringObject(bytearr)
+            obj.writeToStream(stream, None)
+        else:
+            stream.write("(")
+            for c in bytearr:
+                if not c.isalnum() and c != ' ':
+                    stream.write("\\%03o" % ord(c))
+                else:
+                    stream.write(c)
+            stream.write(")")
+
+
+class NameObject(str, PdfObject):
+    delimiterCharacters = "(", ")", "<", ">", "[", "]", "{", "}", "/", "%"
+
+    def __init__(self, data):
+        str.__init__(self, data)
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(self)
+
+    def readFromStream(stream):
+        name = stream.read(1)
+        if name != "/":
+            raise utils.PdfReadError, "name read error"
+        while True:
+            tok = stream.read(1)
+            if tok.isspace() or tok in NameObject.delimiterCharacters:
+                stream.seek(-1, 1)
+                break
+            name += tok
+        return NameObject(name)
+    readFromStream = staticmethod(readFromStream)
+
+
+class DictionaryObject(dict, PdfObject):
+    def __init__(self):
+        pass
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write("<<\n")
+        for key, value in self.items():
+            key.writeToStream(stream, encryption_key)
+            stream.write(" ")
+            value.writeToStream(stream, encryption_key)
+            stream.write("\n")
+        stream.write(">>")
+
+    def readFromStream(stream, pdf):
+        tmp = stream.read(2)
+        if tmp != "<<":
+            raise utils.PdfReadError, "dictionary read error"
+        data = {}
+        while True:
+            tok = readNonWhitespace(stream)
+            if tok == ">":
+                stream.read(1)
+                break
+            stream.seek(-1, 1)
+            key = readObject(stream, pdf)
+            tok = readNonWhitespace(stream)
+            stream.seek(-1, 1)
+            value = readObject(stream, pdf)
+            if data.has_key(key):
+                # multiple definitions of key not permitted
+                raise utils.PdfReadError, "multiple definitions in dictionary"
+            data[key] = value
+        pos = stream.tell()
+        s = readNonWhitespace(stream)
+        if s == 's' and stream.read(5) == 'tream':
+            eol = stream.read(1)
+            # odd PDF file output has spaces after 'stream' keyword but before EOL.
+            # patch provided by Danial Sandler
+            while eol == ' ':
+                eol = stream.read(1)
+            assert eol in ("\n", "\r")
+            if eol == "\r":
+                # read \n after
+                stream.read(1)
+            # this is a stream object, not a dictionary
+            assert data.has_key("/Length")
+            length = data["/Length"]
+            if isinstance(length, IndirectObject):
+                t = stream.tell()
+                length = pdf.getObject(length)
+                stream.seek(t, 0)
+            data["__streamdata__"] = stream.read(length)
+            e = readNonWhitespace(stream)
+            ndstream = stream.read(8)
+            if (e + ndstream) != "endstream":
+                # (sigh) - the odd PDF file has a length that is too long, so
+                # we need to read backwards to find the "endstream" ending.
+                # ReportLab (unknown version) generates files with this bug,
+                # and Python users into PDF files tend to be our audience.
+                # we need to do this to correct the streamdata and chop off
+                # an extra character.
+                pos = stream.tell()
+                stream.seek(-10, 1)
+                end = stream.read(9)
+                if end == "endstream":
+                    # we found it by looking back one character further.
+                    data["__streamdata__"] = data["__streamdata__"][:-1]
+                else:
+                    stream.seek(pos, 0)
+                    raise utils.PdfReadError, "Unable to find 'endstream' marker after stream."
+        else:
+            stream.seek(pos, 0)
+        if data.has_key("__streamdata__"):
+            return StreamObject.initializeFromDictionary(data)
+        else:
+            retval = DictionaryObject()
+            retval.update(data)
+            return retval
+    readFromStream = staticmethod(readFromStream)
+
+
+class StreamObject(DictionaryObject):
+    def __init__(self):
+        self._data = None
+        self.decodedSelf = None
+
+    def writeToStream(self, stream, encryption_key):
+        self[NameObject("/Length")] = NumberObject(len(self._data))
+        DictionaryObject.writeToStream(self, stream, encryption_key)
+        del self["/Length"]
+        stream.write("\nstream\n")
+        data = self._data
+        if encryption_key:
+            data = RC4_encrypt(encryption_key, data)
+        stream.write(data)
+        stream.write("\nendstream")
+
+    def initializeFromDictionary(data):
+        if data.has_key("/Filter"):
+            retval = EncodedStreamObject()
+        else:
+            retval = DecodedStreamObject()
+        retval._data = data["__streamdata__"]
+        del data["__streamdata__"]
+        del data["/Length"]
+        retval.update(data)
+        return retval
+    initializeFromDictionary = staticmethod(initializeFromDictionary)
+
+    def flateEncode(self):
+        if self.has_key("/Filter"):
+            f = self["/Filter"]
+            if isinstance(f, ArrayObject):
+                f.insert(0, NameObject("/FlateDecode"))
+            else:
+                newf = ArrayObject()
+                newf.append(NameObject("/FlateDecode"))
+                newf.append(f)
+                f = newf
+        else:
+            f = NameObject("/FlateDecode")
+        retval = EncodedStreamObject()
+        retval[NameObject("/Filter")] = f
+        retval._data = filters.FlateDecode.encode(self._data)
+        return retval
+
+
+class DecodedStreamObject(StreamObject):
+    def getData(self):
+        return self._data
+
+    def setData(self, data):
+        self._data = data
+
+
+class EncodedStreamObject(StreamObject):
+    def __init__(self):
+        self.decodedSelf = None
+
+    def getData(self):
+        if self.decodedSelf:
+            # cached version of decoded object
+            return self.decodedSelf.getData()
+        else:
+            # create decoded object
+            decoded = StreamObject()
+            decoded._data = filters.decodeStreamData(self)
+            for key, value in self.items():
+                if not key in ("/Length", "/Filter", "/DecodeParms"):
+                    decoded[key] = value
+            self.decodedSelf = decoded
+            return decoded._data
+
+    def setData(self, data):
+        raise utils.PdfReadError, "Creating EncodedStreamObject is not currently supported"
+
+
+class RectangleObject(ArrayObject):
+    def __init__(self, arr):
+        # must have four points
+        assert len(arr) == 4
+        # automatically convert arr[x] into NumberObject(arr[x]) if necessary
+        ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
+
+    def ensureIsNumber(self, value):
+        if not isinstance(value, NumberObject):
+            value = NumberObject(value)
+        return value
+
+    def __repr__(self):
+        return "RectangleObject(%s)" % repr(list(self))
+
+    def getLowerLeft_x(self):
+        return self[0]
+
+    def getLowerLeft_y(self):
+        return self[1]
+
+    def getUpperRight_x(self):
+        return self[2]
+
+    def getUpperRight_y(self):
+        return self[3]
+
+    def getUpperLeft_x(self):
+        return self.getLowerLeft_x()
+    
+    def getUpperLeft_y(self):
+        return self.getUpperRight_y()
+
+    def getLowerRight_x(self):
+        return self.getUpperRight_x()
+
+    def getLowerRight_y(self):
+        return self.getLowerLeft_y()
+
+    def getLowerLeft(self):
+        return self.getLowerLeft_x(), self.getLowerLeft_y()
+
+    def getLowerRight(self):
+        return self.getLowerRight_x(), self.getLowerRight_y()
+
+    def getUpperLeft(self):
+        return self.getUpperLeft_x(), self.getUpperLeft_y()
+
+    def getUpperRight(self):
+        return self.getUpperRight_x(), self.getUpperRight_y()
+
+    def setLowerLeft(self, value):
+        self[0], self[1] = [self.ensureIsNumber(x) for x in value]
+
+    def setLowerRight(self, value):
+        self[2], self[1] = [self.ensureIsNumber(x) for x in value]
+
+    def setUpperLeft(self, value):
+        self[0], self[3] = [self.ensureIsNumber(x) for x in value]
+
+    def setUpperRight(self, value):
+        self[2], self[3] = [self.ensureIsNumber(x) for x in value]
+
+    lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
+    lowerRight = property(getLowerRight, setLowerRight, None, None)
+    upperLeft = property(getUpperLeft, setUpperLeft, None, None)
+    upperRight = property(getUpperRight, setUpperRight, None, None)
+
+
+def encode_pdfdocencoding(unicode_string):
+    retval = ''
+    for c in unicode_string:
+        try:
+            retval += chr(_pdfDocEncoding_rev[c])
+        except KeyError:
+            raise UnicodeEncodeError("pdfdocencoding", c, -1, -1,
+                    "does not exist in translation table")
+    return retval
+
+def decode_pdfdocencoding(byte_array):
+    retval = u''
+    for b in byte_array:
+        c = _pdfDocEncoding[ord(b)]
+        if c == u'\u0000':
+            raise UnicodeDecodeError("pdfdocencoding", b, -1, -1,
+                    "does not exist in translation table")
+        retval += c
+    return retval
+
+_pdfDocEncoding = (
+  u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
+  u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
+  u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
+  u'\u02d8', u'\u02c7', u'\u02c6', u'\u02d9', u'\u02dd', u'\u02db', u'\u02da', u'\u02dc',
+  u'\u0020', u'\u0021', u'\u0022', u'\u0023', u'\u0024', u'\u0025', u'\u0026', u'\u0027',
+  u'\u0028', u'\u0029', u'\u002a', u'\u002b', u'\u002c', u'\u002d', u'\u002e', u'\u002f',
+  u'\u0030', u'\u0031', u'\u0032', u'\u0033', u'\u0034', u'\u0035', u'\u0036', u'\u0037',
+  u'\u0038', u'\u0039', u'\u003a', u'\u003b', u'\u003c', u'\u003d', u'\u003e', u'\u003f',
+  u'\u0040', u'\u0041', u'\u0042', u'\u0043', u'\u0044', u'\u0045', u'\u0046', u'\u0047',
+  u'\u0048', u'\u0049', u'\u004a', u'\u004b', u'\u004c', u'\u004d', u'\u004e', u'\u004f',
+  u'\u0050', u'\u0051', u'\u0052', u'\u0053', u'\u0054', u'\u0055', u'\u0056', u'\u0057',
+  u'\u0058', u'\u0059', u'\u005a', u'\u005b', u'\u005c', u'\u005d', u'\u005e', u'\u005f',
+  u'\u0060', u'\u0061', u'\u0062', u'\u0063', u'\u0064', u'\u0065', u'\u0066', u'\u0067',
+  u'\u0068', u'\u0069', u'\u006a', u'\u006b', u'\u006c', u'\u006d', u'\u006e', u'\u006f',
+  u'\u0070', u'\u0071', u'\u0072', u'\u0073', u'\u0074', u'\u0075', u'\u0076', u'\u0077',
+  u'\u0078', u'\u0079', u'\u007a', u'\u007b', u'\u007c', u'\u007d', u'\u007e', u'\u0000',
+  u'\u2022', u'\u2020', u'\u2021', u'\u2026', u'\u2014', u'\u2013', u'\u0192', u'\u2044',
+  u'\u2039', u'\u203a', u'\u2212', u'\u2030', u'\u201e', u'\u201c', u'\u201d', u'\u2018',
+  u'\u2019', u'\u201a', u'\u2122', u'\ufb01', u'\ufb02', u'\u0141', u'\u0152', u'\u0160',
+  u'\u0178', u'\u017d', u'\u0131', u'\u0142', u'\u0153', u'\u0161', u'\u017e', u'\u0000',
+  u'\u20ac', u'\u00a1', u'\u00a2', u'\u00a3', u'\u00a4', u'\u00a5', u'\u00a6', u'\u00a7',
+  u'\u00a8', u'\u00a9', u'\u00aa', u'\u00ab', u'\u00ac', u'\u0000', u'\u00ae', u'\u00af',
+  u'\u00b0', u'\u00b1', u'\u00b2', u'\u00b3', u'\u00b4', u'\u00b5', u'\u00b6', u'\u00b7',
+  u'\u00b8', u'\u00b9', u'\u00ba', u'\u00bb', u'\u00bc', u'\u00bd', u'\u00be', u'\u00bf',
+  u'\u00c0', u'\u00c1', u'\u00c2', u'\u00c3', u'\u00c4', u'\u00c5', u'\u00c6', u'\u00c7',
+  u'\u00c8', u'\u00c9', u'\u00ca', u'\u00cb', u'\u00cc', u'\u00cd', u'\u00ce', u'\u00cf',
+  u'\u00d0', u'\u00d1', u'\u00d2', u'\u00d3', u'\u00d4', u'\u00d5', u'\u00d6', u'\u00d7',
+  u'\u00d8', u'\u00d9', u'\u00da', u'\u00db', u'\u00dc', u'\u00dd', u'\u00de', u'\u00df',
+  u'\u00e0', u'\u00e1', u'\u00e2', u'\u00e3', u'\u00e4', u'\u00e5', u'\u00e6', u'\u00e7',
+  u'\u00e8', u'\u00e9', u'\u00ea', u'\u00eb', u'\u00ec', u'\u00ed', u'\u00ee', u'\u00ef',
+  u'\u00f0', u'\u00f1', u'\u00f2', u'\u00f3', u'\u00f4', u'\u00f5', u'\u00f6', u'\u00f7',
+  u'\u00f8', u'\u00f9', u'\u00fa', u'\u00fb', u'\u00fc', u'\u00fd', u'\u00fe', u'\u00ff'
+)
+
+assert len(_pdfDocEncoding) == 256
+
+_pdfDocEncoding_rev = {}
+for i in xrange(256):
+    char = _pdfDocEncoding[i]
+    if char == u"\u0000":
+        continue
+    assert char not in _pdfDocEncoding_rev
+    _pdfDocEncoding_rev[char] = i
+
diff --git a/src/libprs500/ebooks/pyPdf/pdf.py b/src/libprs500/ebooks/pyPdf/pdf.py
index fdaacaf574..f64c1a6c22 100644
--- a/src/libprs500/ebooks/pyPdf/pdf.py
+++ b/src/libprs500/ebooks/pyPdf/pdf.py
@@ -1,6 +1,8 @@
 # vim: sw=4:expandtab:foldmethod=marker
 #
 # Copyright (c) 2006, Mathieu Fenniak
+# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+#
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -34,7 +36,7 @@ be able to split and merge PDF files by page, and that's about all it can do.
 It may be a solid base for future PDF file work in Python.
 """
 __author__ = "Mathieu Fenniak"
-__author_email__ = "mfenniak@pobox.com"
+__author_email__ = "biziqe@mathieu.fenniak.net"
 
 import struct
 try:
@@ -44,6 +46,7 @@ except ImportError:
 
 import filters
 import utils
+import warnings
 from generic import *
 from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
 from sets import ImmutableSet
@@ -68,7 +71,7 @@ class PdfFileWriter(object):
         # info object
         info = DictionaryObject()
         info.update({
-                NameObject("/Producer"): StringObject("Python PDF Library - http://pybrary.net/pyPdf/")
+                NameObject("/Producer"): createStringObject(u"Python PDF Library - http://pybrary.net/pyPdf/")
                 })
         self._info = self._addObject(info)
 
@@ -128,10 +131,10 @@ class PdfFileWriter(object):
             keylen = 40 / 8
         # permit everything:
         P = -1
-        O = StringObject(_alg33(owner_pwd, user_pwd, rev, keylen))
+        O = ByteStringObject(_alg33(owner_pwd, user_pwd, rev, keylen))
         ID_1 = md5.new(repr(time.time())).digest()
         ID_2 = md5.new(repr(random.random())).digest()
-        self._ID = ArrayObject((StringObject(ID_1), StringObject(ID_2)))
+        self._ID = ArrayObject((ByteStringObject(ID_1), ByteStringObject(ID_2)))
         if rev == 2:
             U, key = _alg34(user_pwd, O, P, ID_1)
         else:
@@ -143,8 +146,8 @@ class PdfFileWriter(object):
         if V == 2:
             encrypt[NameObject("/Length")] = NumberObject(keylen * 8)
         encrypt[NameObject("/R")] = NumberObject(rev)
-        encrypt[NameObject("/O")] = StringObject(O)
-        encrypt[NameObject("/U")] = StringObject(U)
+        encrypt[NameObject("/O")] = ByteStringObject(O)
+        encrypt[NameObject("/U")] = ByteStringObject(U)
         encrypt[NameObject("/P")] = NumberObject(P)
         self._encrypt = self._addObject(encrypt)
         self._encrypt_key = key
@@ -212,8 +215,6 @@ class PdfFileWriter(object):
             for key, value in data.items():
                 origvalue = value
                 value = self._sweepIndirectReferences(externMap, value)
-                if value == None:
-                    print objects, value, origvalue
                 if isinstance(value, StreamObject):
                     # a dictionary value is a stream.  streams must be indirect
                     # objects, so we need to change this value.
@@ -271,6 +272,7 @@ class PdfFileWriter(object):
 class PdfFileReader(object):
     def __init__(self, stream):
         self.flattenedPages = None
+        self.pageNumbers = {}
         self.resolvedObjects = {}
         self.read(stream)
         self.stream = stream
@@ -329,6 +331,144 @@ class PdfFileReader(object):
             self._flatten()
         return self.flattenedPages[pageNumber]
 
+    ##
+    # Read-only property that accesses the 
+    # {@link #PdfFileReader.getNamedDestinations 
+    # getNamedDestinations} function.
+    # <p>
+    # Stability: Added in v1.10, will exist for all future v1.x releases.
+    namedDestinations = property(lambda self:
+                                  self.getNamedDestinations(), None, None)
+
+    ##
+    # Retrieves the named destinations present in the document.
+    # <p>
+    # Stability: Added in v1.10, will exist for all future v1.x releases.
+    # @return Returns a dict which maps names to {@link #Destination
+    # destinations}.
+    def getNamedDestinations(self, tree = None, map = None):
+        if self.flattenedPages == None:
+            self._flatten()
+        
+        get = self.safeGetObject
+        if map == None:
+            map = {}
+            catalog = get(self.trailer["/Root"])
+            
+            # get the name tree
+            if catalog.has_key("/Dests"):
+                tree = get(catalog["/Dests"])
+            elif catalog.has_key("/Names"):
+                names = get(catalog['/Names'])
+                if names.has_key("/Dests"):
+                    tree = get(names['/Dests'])
+        
+        if tree == None:
+            return map
+
+        if tree.has_key("/Kids"):
+            # recurse down the tree
+            for kid in get(tree["/Kids"]):
+                self.getNamedDestinations(get(kid), map)
+
+        if tree.has_key("/Names"):
+            names = get(tree["/Names"])
+            for i in range(0, len(names), 2):
+                key = get(names[i])
+                val = get(names[i+1])
+                if isinstance(val, DictionaryObject) and val.has_key('/D'):
+                    val = get(val['/D'])
+                dest = self._buildDestination(val, key)
+                if dest != None:
+                    map[key] = dest
+
+        return map
+
+    ##
+    # Read-only property that accesses the {@link #PdfFileReader.getOutlines
+    # getOutlines} function.
+    # <p>
+    # Stability: Added in v1.10, will exist for all future v1.x releases.
+    outlines = property(lambda self: self.getOutlines(), None, None)
+
+    ##
+    # Retrieves the document outline present in the document.
+    # <p>
+    # Stability: Added in v1.10, will exist for all future v1.x releases.
+    # @return Returns a nested list of {@link #Destination destinations}.
+    def getOutlines(self, node = None, outlines = None):
+        if self.flattenedPages == None:
+            self._flatten()
+        
+        get = self.safeGetObject
+        if outlines == None:
+            outlines = []
+            catalog = get(self.trailer["/Root"])
+            
+            # get the outline dictionary and named destinations
+            if catalog.has_key("/Outlines"):
+                lines = get(catalog["/Outlines"])
+                if lines.has_key("/First"):
+                    node = get(lines["/First"])
+            self._namedDests = self.getNamedDestinations()
+            
+        if node == None:
+          return outlines
+          
+        # see if there are any more outlines
+        while 1:
+            outline = self._buildOutline(node)
+            if outline:
+                outlines.append(outline)
+
+            # check for sub-outlines
+            if node.has_key("/First"):
+                subOutlines = []
+                self.getOutlines(get(node["/First"]), subOutlines)
+                if subOutlines:
+                    outlines.append(subOutlines)
+
+            if not node.has_key("/Next"):
+                break
+            node = get(node["/Next"])
+
+        return outlines
+
+    def _buildDestination(self, array, title):
+        if not (isinstance(array, ArrayObject) and len(array) >= 2 and \
+                isinstance(array[0], IndirectObject)):
+            return None
+            
+        pageKey = (array[0].generation, array[0].idnum)
+        if not self.pageNumbers.has_key(pageKey):
+            return None
+
+        pageNum = self.pageNumbers[pageKey]
+        return Destination(*([title, pageNum]+array[1:]))
+          
+    def _buildOutline(self, node):
+        dest, title, outline = None, None, None
+        
+        if node.has_key("/A") and node.has_key("/Title"):
+            # Action, section 8.5 (only type GoTo supported)
+            title  = self.safeGetObject(node["/Title"])
+            action = self.safeGetObject(node["/A"])
+            if action["/S"] == "/GoTo":
+                dest = self.safeGetObject(action["/D"])
+        elif node.has_key("/Dest") and node.has_key("/Title"):
+            # Destination, section 8.2.1
+            title = self.safeGetObject(node["/Title"])
+            dest  = self.safeGetObject(node["/Dest"])
+
+        # if destination found, then create outline
+        if dest:
+            if isinstance(dest, ArrayObject):
+                outline = self._buildDestination(dest, title)
+            elif isinstance(dest, str) and self._namedDests.has_key(dest):
+                outline = self._namedDests[dest]
+                outline.title = title
+        return outline
+
     ##
     # Read-only property that emulates a list based upon the {@link
     # #PdfFileReader.getNumPages getNumPages} and {@link #PdfFileReader.getPage
@@ -349,14 +489,16 @@ class PdfFileReader(object):
             self.flattenedPages = []
             catalog = self.getObject(self.trailer["/Root"])
             pages = self.getObject(catalog["/Pages"])
+        indirectReference = None
         if isinstance(pages, IndirectObject):
+            indirectReference = pages
             pages = self.getObject(pages)
         t = pages["/Type"]
         if t == "/Pages":
             for attr in inheritablePageAttributes:
                 if pages.has_key(attr):
                     inherit[attr] = pages[attr]
-            for page in pages["/Kids"]:
+            for page in self.safeGetObject(pages["/Kids"]):
                 self._flatten(page, inherit)
         elif t == "/Page":
             for attr,value in inherit.items():
@@ -364,8 +506,11 @@ class PdfFileReader(object):
                 # parent's value:
                 if not pages.has_key(attr):
                     pages[attr] = value
-            pageObj = PageObject(self)
+            pageObj = PageObject(self, indirectReference)
             pageObj.update(pages)
+            if indirectReference:
+                key = (indirectReference.generation, indirectReference.idnum)
+                self.pageNumbers[key] = len(self.flattenedPages)
             self.flattenedPages.append(pageObj)
 
     def safeGetObject(self, obj):
@@ -425,8 +570,8 @@ class PdfFileReader(object):
         return retval
 
     def _decryptObject(self, obj, key):
-        if isinstance(obj, StringObject):
-            obj = StringObject(utils.RC4_encrypt(key, obj))
+        if isinstance(obj, ByteStringObject) or isinstance(obj, TextStringObject):
+            obj = createStringObject(utils.RC4_encrypt(key, obj.original_bytes))
         elif isinstance(obj, StreamObject):
             obj._data = utils.RC4_encrypt(key, obj._data)
         elif isinstance(obj, DictionaryObject):
@@ -438,6 +583,11 @@ class PdfFileReader(object):
         return obj
 
     def readObjectHeader(self, stream):
+        # Should never be necessary to read out whitespace, since the
+        # cross-reference table should put us in the right spot to read the
+        # object header.  In reality... some files have stupid cross reference
+        # tables that are off by whitespace bytes.
+        readNonWhitespace(stream); stream.seek(-1, 1)
         idnum = readUntilWhitespace(stream)
         generation = readUntilWhitespace(stream)
         obj = stream.read(3)
@@ -456,13 +606,15 @@ class PdfFileReader(object):
         line = ''
         while not line:
             line = self.readNextEndLine(stream)
-        assert line[:5] == "%%EOF"
+        if line[:5] != "%%EOF":
+            raise utils.PdfReadError, "EOF marker not found"
 
         # find startxref entry - the location of the xref table
         line = self.readNextEndLine(stream)
         startxref = int(line)
         line = self.readNextEndLine(stream)
-        assert line[:9] == "startxref"
+        if line[:9] != "startxref":
+            raise utils.PdfReadError, "startxref not found"
 
         # read all cross reference tables and their trailers
         self.xref = {}
@@ -475,7 +627,8 @@ class PdfFileReader(object):
             if x == "x":
                 # standard cross-reference table
                 ref = stream.read(4)
-                assert ref[:3] == "ref"
+                if ref[:3] != "ref":
+                    raise utils.PdfReadError, "xref table read error"
                 readNonWhitespace(stream)
                 stream.seek(-1, 1)
                 while 1:
@@ -661,7 +814,7 @@ class PdfFileReader(object):
     def _authenticateUserPassword(self, password):
         encrypt = self.safeGetObject(self.trailer['/Encrypt'])
         rev = self.safeGetObject(encrypt['/R'])
-        owner_entry = self.safeGetObject(encrypt['/O'])
+        owner_entry = self.safeGetObject(encrypt['/O']).original_bytes
         p_entry = self.safeGetObject(encrypt['/P'])
         id_entry = self.safeGetObject(self.trailer['/ID'])
         id1_entry = self.safeGetObject(id_entry[0])
@@ -672,7 +825,7 @@ class PdfFileReader(object):
                     self.safeGetObject(encrypt["/Length"]) / 8, owner_entry,
                     p_entry, id1_entry,
                     self.safeGetObject(encrypt.get("/EncryptMetadata", False)))
-        real_U = self.safeGetObject(encrypt['/U'])
+        real_U = self.safeGetObject(encrypt['/U']).original_bytes
         return U == real_U, key
 
     def getIsEncrypted(self):
@@ -721,9 +874,10 @@ def createRectangleAccessor(name, fallback):
 # will be created by accessing the {@link #PdfFileReader.getPage getPage}
 # function of the {@link #PdfFileReader PdfFileReader} class.
 class PageObject(DictionaryObject):
-    def __init__(self, pdf):
+    def __init__(self, pdf, indirectReference = None):
         DictionaryObject.__init__(self)
         self.pdf = pdf
+        self.indirectReference = indirectReference
 
     ##
     # Rotates a page clockwise by increments of 90 degrees.
@@ -856,26 +1010,35 @@ class PageObject(DictionaryObject):
     # <p>
     # Stability: Added in v1.7, will exist for all future v1.x releases.  May
     # be overhauled to provide more ordered text in the future.
-    # @return a string object
+    # @return a unicode string object
     def extractText(self):
-        text = ""
+        text = u""
         content = self["/Contents"].getObject()
         if not isinstance(content, ContentStream):
             content = ContentStream(content, self.pdf)
+        # Note: we check all strings are TextStringObjects.  ByteStringObjects
+        # are strings where the byte->string encoding was unknown, so adding
+        # them to the text here would be gibberish.
         for operands,operator in content.operations:
             if operator == "Tj":
-                text += operands[0]
+                _text = operands[0]
+                if isinstance(_text, TextStringObject):
+                    text += _text
             elif operator == "T*":
                 text += "\n"
             elif operator == "'":
                 text += "\n"
-                text += operands[0]
-            elif operator == "\"":
-                text += "\n"
-                text += operands[2]
+                _text = operands[0]
+                if isinstance(_text, TextStringObject):
+                    text += operands[0]
+            elif operator == '"':
+                _text = operands[2]
+                if isinstance(_text, TextStringObject):
+                    text += "\n"
+                    text += _text
             elif operator == "TJ":
                 for i in operands[0]:
-                    if isinstance(i, StringObject):
+                    if isinstance(i, TextStringObject):
                         text += i
         return text
 
@@ -946,7 +1109,7 @@ class ContentStream(DecodedStreamObject):
             if peek == '':
                 break
             stream.seek(-1, 1)
-            if peek.isalpha() or peek == "'" or peek == "\"":
+            if peek.isalpha() or peek == "'" or peek == '"':
                 operator = readUntilWhitespace(stream, maxchars=2)
                 if operator == "BI":
                     # begin inline image - a completely different parsing
@@ -1021,43 +1184,139 @@ class ContentStream(DecodedStreamObject):
 
 ##
 # A class representing the basic document metadata provided in a PDF File.
+# <p>
+# As of pyPdf v1.10, all text properties of the document metadata have two
+# properties, eg. author and author_raw.  The non-raw property will always
+# return a TextStringObject, making it ideal for a case where the metadata is
+# being displayed.  The raw property can sometimes return a ByteStringObject,
+# if pyPdf was unable to decode the string's text encoding; this requires
+# additional safety in the caller and therefore is not as commonly accessed.
 class DocumentInformation(DictionaryObject):
     def __init__(self):
         DictionaryObject.__init__(self)
 
+    def getText(self, key):
+        retval = self.get(key, None)
+        if isinstance(retval, TextStringObject):
+            return retval
+        return None
+
     ##
     # Read-only property accessing the document's title.  Added in v1.6, will
-    # exist for all future v1.x releases.
-    # @return A string, or None if the title is not provided.
-    title = property(lambda self: self.get("/Title", None), None, None)
+    # exist for all future v1.x releases.  Modified in v1.10 to always return a
+    # unicode string (TextStringObject).
+    # @return A unicode string, or None if the title is not provided.
+    title = property(lambda self: self.getText("/Title"))
+    title_raw = property(lambda self: self.get("/Title"))
 
     ##
     # Read-only property accessing the document's author.  Added in v1.6, will
-    # exist for all future v1.x releases.
-    # @return A string, or None if the author is not provided.
-    author = property(lambda self: self.get("/Author", None), None, None)
+    # exist for all future v1.x releases.  Modified in v1.10 to always return a
+    # unicode string (TextStringObject).
+    # @return A unicode string, or None if the author is not provided.
+    author = property(lambda self: self.getText("/Author"))
+    author_raw = property(lambda self: self.get("/Author"))
 
     ##
     # Read-only property accessing the subject of the document.  Added in v1.6,
-    # will exist for all future v1.x releases.
-    # @return A string, or None if the subject is not provided.
-    subject = property(lambda self: self.get("/Subject", None), None, None)
+    # will exist for all future v1.x releases.  Modified in v1.10 to always
+    # return a unicode string (TextStringObject).
+    # @return A unicode string, or None if the subject is not provided.
+    subject = property(lambda self: self.getText("/Subject"))
+    subject_raw = property(lambda self: self.get("/Subject"))
 
     ##
     # Read-only property accessing the document's creator.  If the document was
     # converted to PDF from another format, the name of the application (for
     # example, OpenOffice) that created the original document from which it was
     # converted.  Added in v1.6, will exist for all future v1.x releases.
-    # @return A string, or None if the creator is not provided.
-    creator = property(lambda self: self.get("/Creator", None), None, None)
+    # Modified in v1.10 to always return a unicode string (TextStringObject).
+    # @return A unicode string, or None if the creator is not provided.
+    creator = property(lambda self: self.getText("/Creator"))
+    creator_raw = property(lambda self: self.get("/Creator"))
 
     ##
     # Read-only property accessing the document's producer.  If the document
     # was converted to PDF from another format, the name of the application
     # (for example, OSX Quartz) that converted it to PDF.  Added in v1.6, will
-    # exist for all future v1.x releases.
-    # @return A string, or None if the producer is not provided.
-    producer = property(lambda self: self.get("/Producer", None), None, None)
+    # exist for all future v1.x releases.  Modified in v1.10 to always return a
+    # unicode string (TextStringObject).
+    # @return A unicode string, or None if the producer is not provided.
+    producer = property(lambda self: self.getText("/Producer"))
+    producer_raw = property(lambda self: self.get("/Producer"))
+
+
+##
+# A class representing a destination within a PDF file.
+# See section 8.2.1 of the PDF 1.6 reference.
+# Stability: Added in v1.10, will exist for all v1.x releases.
+class Destination(DictionaryObject):
+    def __init__(self, *args):
+        DictionaryObject.__init__(self)
+        self.title = args[0]
+        self["/Page"], self["/Type"] = args[1], args[2]
+        
+        # from table 8.2 of the PDF 1.6 reference.
+        mapNull = lambda x: {True: None, False: x}[isinstance(x, NullObject)]
+        params = map(mapNull, args[3:])
+        type = self["/Type"]
+
+        if type == "/XYZ":
+            self["/Left"], self["/Top"], self["/Zoom"] = params
+        elif type == "/FitR":
+            self["/Left"], self["/Bottom"], \
+                self["/Right"], self["/Top"] = params
+        elif type in ["/FitH", "FitBH"]:
+            self["/Top"], = params
+        elif type in ["/FitV", "FitBV"]:
+            self["/Left"], = params
+        elif type in ["/Fit", "FitB"]:
+            pass
+        else:
+            raise utils.PdfReadError, "Unknown Destination Type: " + type
+          
+    def setTitle(self, title):
+        self["/Title"] = title.strip()
+
+    ##
+    # Read-write property accessing the destination title.
+    # @return A string.
+    title = property(lambda self: self.get("/Title"), setTitle, None)
+
+    ##
+    # Read-only property accessing the destination page.
+    # @return An integer.
+    page = property(lambda self: self.get("/Page"), None, None)
+
+    ##
+    # Read-only property accessing the destination type.
+    # @return A string.
+    type = property(lambda self: self.get("/Type"), None, None)
+
+    ##
+    # Read-only property accessing the zoom factor.
+    # @return A number, or None if not available.
+    zoom = property(lambda self: self.get("/Zoom", None), None, None)
+
+    ##
+    # Read-only property accessing the left horizontal coordinate.
+    # @return A number, or None if not available.
+    left = property(lambda self: self.get("/Left", None), None, None)
+
+    ##
+    # Read-only property accessing the right horizontal coordinate.
+    # @return A number, or None if not available.
+    right = property(lambda self: self.get("/Right", None), None, None)
+
+    ##
+    # Read-only property accessing the top vertical coordinate.
+    # @return A number, or None if not available.
+    top = property(lambda self: self.get("/Top", None), None, None)
+
+    ##
+    # Read-only property accessing the bottom vertical coordinate.
+    # @return A number, or None if not available.
+    bottom = property(lambda self: self.get("/Bottom", None), None, None)
 
 
 def convertToInt(d, size):
@@ -1078,65 +1337,150 @@ _encryption_padding = '\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56' + \
         '\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c' + \
         '\xa9\xfe\x64\x53\x69\x7a'
 
+# Implementation of algorithm 3.2 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
 def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
-    import md5, struct
-    m = md5.new()
+    # 1. Pad or truncate the password string to exactly 32 bytes.  If the
+    # password string is more than 32 bytes long, use only its first 32 bytes;
+    # if it is less than 32 bytes long, pad it by appending the required number
+    # of additional bytes from the beginning of the padding string
+    # (_encryption_padding).
     password = (password + _encryption_padding)[:32]
-    m.update(password)
+    # 2. Initialize the MD5 hash function and pass the result of step 1 as
+    # input to this function.
+    import md5, struct
+    m = md5.new(password)
+    # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash
+    # function.
     m.update(owner_entry)
+    # 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass
+    # these bytes to the MD5 hash function, low-order byte first.
     p_entry = struct.pack('<i', p_entry)
     m.update(p_entry)
+    # 5. Pass the first element of the file's file identifier array to the MD5
+    # hash function.
     m.update(id1_entry)
+    # 6. (Revision 3 or greater) If document metadata is not being encrypted,
+    # pass 4 bytes with the value 0xFFFFFFFF to the MD5 hash function.
     if rev >= 3 and not metadata_encrypt:
         m.update("\xff\xff\xff\xff")
+    # 7. Finish the hash.
     md5_hash = m.digest()
+    # 8. (Revision 3 or greater) Do the following 50 times: Take the output
+    # from the previous MD5 hash and pass the first n bytes of the output as
+    # input into a new MD5 hash, where n is the number of bytes of the
+    # encryption key as defined by the value of the encryption dictionary's
+    # /Length entry.
     if rev >= 3:
         for i in range(50):
             md5_hash = md5.new(md5_hash[:keylen]).digest()
+    # 9. Set the encryption key to the first n bytes of the output from the
+    # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or
+    # greater, depends on the value of the encryption dictionary's /Length
+    # entry.
     return md5_hash[:keylen]
 
+# Implementation of algorithm 3.3 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
 def _alg33(owner_pwd, user_pwd, rev, keylen):
+    # steps 1 - 4
     key = _alg33_1(owner_pwd, rev, keylen)
+    # 5. Pad or truncate the user password string as described in step 1 of
+    # algorithm 3.2.
     user_pwd = (user_pwd + _encryption_padding)[:32]
+    # 6. Encrypt the result of step 5, using an RC4 encryption function with
+    # the encryption key obtained in step 4.
     val = utils.RC4_encrypt(key, user_pwd)
+    # 7. (Revision 3 or greater) Do the following 19 times: Take the output
+    # from the previous invocation of the RC4 function and pass it as input to
+    # a new invocation of the function; use an encryption key generated by
+    # taking each byte of the encryption key obtained in step 4 and performing
+    # an XOR operation between that byte and the single-byte value of the
+    # iteration counter (from 1 to 19).
     if rev >= 3:
         for i in range(1, 20):
             new_key = ''
             for l in range(len(key)):
                 new_key += chr(ord(key[l]) ^ i)
             val = utils.RC4_encrypt(new_key, val)
+    # 8. Store the output from the final invocation of the RC4 as the value of
+    # the /O entry in the encryption dictionary.
     return val
 
+# Steps 1-4 of algorithm 3.3
 def _alg33_1(password, rev, keylen):
-    import md5
-    m = md5.new()
+    # 1. Pad or truncate the owner password string as described in step 1 of
+    # algorithm 3.2.  If there is no owner password, use the user password
+    # instead.
     password = (password + _encryption_padding)[:32]
-    m.update(password)
+    # 2. Initialize the MD5 hash function and pass the result of step 1 as
+    # input to this function.
+    import md5
+    m = md5.new(password)
+    # 3. (Revision 3 or greater) Do the following 50 times: Take the output
+    # from the previous MD5 hash and pass it as input into a new MD5 hash.
     md5_hash = m.digest()
     if rev >= 3:
         for i in range(50):
             md5_hash = md5.new(md5_hash).digest()
+    # 4. Create an RC4 encryption key using the first n bytes of the output
+    # from the final MD5 hash, where n is always 5 for revision 2 but, for
+    # revision 3 or greater, depends on the value of the encryption
+    # dictionary's /Length entry.
     key = md5_hash[:keylen]
     return key
 
+# Implementation of algorithm 3.4 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
 def _alg34(password, owner_entry, p_entry, id1_entry):
+    # 1. Create an encryption key based on the user password string, as
+    # described in algorithm 3.2.
     key = _alg32(password, 2, 5, owner_entry, p_entry, id1_entry)
+    # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2,
+    # using an RC4 encryption function with the encryption key from the
+    # preceding step.
     U = utils.RC4_encrypt(key, _encryption_padding)
+    # 3. Store the result of step 2 as the value of the /U entry in the
+    # encryption dictionary.
     return U, key
 
+# Implementation of algorithm 3.4 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
 def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):
+    # 1. Create an encryption key based on the user password string, as
+    # described in Algorithm 3.2.
+    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
+    # 2. Initialize the MD5 hash function and pass the 32-byte padding string
+    # shown in step 1 of Algorithm 3.2 as input to this function. 
     import md5
     m = md5.new()
     m.update(_encryption_padding)
+    # 3. Pass the first element of the file's file identifier array (the value
+    # of the ID entry in the document's trailer dictionary; see Table 3.13 on
+    # page 73) to the hash function and finish the hash.  (See implementation
+    # note 25 in Appendix H.) 
     m.update(id1_entry)
     md5_hash = m.digest()
-    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
+    # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
+    # function with the encryption key from step 1. 
     val = utils.RC4_encrypt(key, md5_hash)
+    # 5. Do the following 19 times: Take the output from the previous
+    # invocation of the RC4 function and pass it as input to a new invocation
+    # of the function; use an encryption key generated by taking each byte of
+    # the original encryption key (obtained in step 2) and performing an XOR
+    # operation between that byte and the single-byte value of the iteration
+    # counter (from 1 to 19). 
     for i in range(1, 20):
         new_key = ''
         for l in range(len(key)):
             new_key += chr(ord(key[l]) ^ i)
         val = utils.RC4_encrypt(new_key, val)
+    # 6. Append 16 bytes of arbitrary padding to the output from the final
+    # invocation of the RC4 function and store the 32-byte result as the value
+    # of the U entry in the encryption dictionary. 
+    # (implementator note: I don't know what "arbitrary padding" is supposed to
+    # mean, so I have used null bytes.  This seems to match a few other
+    # people's implementations)
     return val + ('\x00' * 16), key
 
 #if __name__ == "__main__":
diff --git a/src/libprs500/ebooks/pyPdf/utils.py b/src/libprs500/ebooks/pyPdf/utils.py
index d6769c248f..860a42e669 100644
--- a/src/libprs500/ebooks/pyPdf/utils.py
+++ b/src/libprs500/ebooks/pyPdf/utils.py
@@ -32,7 +32,7 @@
 Utility functions for PDF library.
 """
 __author__ = "Mathieu Fenniak"
-__author_email__ = "mfenniak@pobox.com"
+__author_email__ = "biziqe@mathieu.fenniak.net"
 
 def readUntilWhitespace(stream, maxchars=None):
     txt = ""
@@ -86,6 +86,9 @@ def RC4_encrypt(key, plaintext):
         retval += chr(ord(plaintext[x]) ^ t)
     return retval
 
+class PdfReadError(Exception):
+    pass
+
 if __name__ == "__main__":
     # test RC4
     out = RC4_encrypt("Key", "Plaintext")