Implement pure python solution for reading PDF metadata

This commit is contained in:
Kovid Goyal 2007-09-07 15:43:39 +00:00
parent 76af4c11d0
commit f7332494ae
6 changed files with 2059 additions and 62 deletions

View File

@ -14,83 +14,41 @@
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
'''Read meta information from PDF files'''
import sys, os, copy
import sys, os
from libprs500.ebooks.metadata import MetaInformation, get_parser
from libprs500.ptempfile import PersistentTemporaryFile
from libprs500.ebooks.metadata import MetaInformation
from libprs500.ebooks.pyPdf import PdfFileReader
def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """
if hasattr(stream, 'name'):
title = stream.name
title = os.path.splitext(os.path.basename(stream.name))[0]
else:
title = 'Unknown'
mi = MetaInformation(title, 'Unknown')
stream.seek(0)
pt = PersistentTemporaryFile('.pdf')
pt.write(stream.read())
pt.close()
return get_metadata_from_file(pt.name, mi)
def set_metadata(path, options):
try:
import podofo
doc = podofo.PdfDocument()
doc.Load(path)
info = doc.GetInfo()
if options.title:
info.SetTitle(options.title)
if options.authors:
info.SetAuthor(options.authors)
if options.category:
info.SetSubject(options.category)
pt = PersistentTemporaryFile('.pdf')
pt.close()
doc.Write(pt.name)
stream = open(path, 'wb')
stream.write(open(pt.name, 'rb').read())
stream.close()
except ImportError:
return False
return True
def get_metadata_from_file(path, default_mi=None):
if default_mi is None:
title = os.path.splitext(os.path.basename(path))[0]
mi = MetaInformation(title, 'Unknown')
else:
mi = copy.copy(default_mi)
try:
import podofo
doc = podofo.PdfDocument()
doc.Load(path)
info = doc.GetInfo()
if info.GetTitle():
mi.title = info.GetTitle()
if info.GetAuthor():
mi.authors = info.GetAuthor().split(',')
if info.GetSubject():
mi.category = info.GetSubject()
except ImportError:
pass
finally:
return mi
info = PdfFileReader(stream).getDocumentInfo()
if info.title:
mi.title = title
if info.author:
src = info.author.split('&')
authors = []
for au in src:
authors += au.split(',')
mi.authors = authors
mi.author = info.author
if info.subject:
mi.category = info.subject
return mi
def main(args=sys.argv):
parser = get_parser('pdf')
options, args = parser.parse_args(args)
if len(args) != 2:
print >>sys.stderr, 'No filename specified.'
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
if not set_metadata(path, options):
print >>sys.stderr, 'You do not have the podofo python extension installed. Cannot read PDF files.'
return 1
print get_metadata_from_file(path)
print get_metadata(open(path, 'rb'))
return 0
if __name__ == '__main__':

View File

@ -0,0 +1,2 @@
from pdf import PdfFileReader, PdfFileWriter
__all__ = ["pdf"]

View File

@ -0,0 +1,239 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Implementation of stream filters for PDF.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "mfenniak@pobox.com"
from generic import NameObject
try:
import zlib
def decompress(data):
return zlib.decompress(data)
def compress(data):
return zlib.compress(data)
except ImportError:
# Unable to import zlib. Attempt to use the System.IO.Compression
# library from the .NET framework. (IronPython only)
import System
from System import IO, Collections, Array
def _string_to_bytearr(buf):
retval = Array.CreateInstance(System.Byte, len(buf))
for i in range(len(buf)):
retval[i] = ord(buf[i])
return retval
def _bytearr_to_string(bytes):
retval = ""
for i in range(bytes.Length):
retval += chr(bytes[i])
return retval
def _read_bytes(stream):
ms = IO.MemoryStream()
buf = Array.CreateInstance(System.Byte, 2048)
while True:
bytes = stream.Read(buf, 0, buf.Length)
if bytes == 0:
break
else:
ms.Write(buf, 0, bytes)
retval = ms.ToArray()
ms.Close()
return retval
def decompress(data):
bytes = _string_to_bytearr(data)
ms = IO.MemoryStream()
ms.Write(bytes, 0, bytes.Length)
ms.Position = 0 # fseek 0
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
bytes = _read_bytes(gz)
retval = _bytearr_to_string(bytes)
gz.Close()
return retval
def compress(data):
bytes = _string_to_bytearr(data)
ms = IO.MemoryStream()
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
gz.Write(bytes, 0, bytes.Length)
gz.Close()
ms.Position = 0 # fseek 0
bytes = ms.ToArray()
retval = _bytearr_to_string(bytes)
ms.Close()
return retval
class FlateDecode(object):
def decode(data, decodeParms):
data = decompress(data)
predictor = 1
if decodeParms:
predictor = decodeParms.get("/Predictor", 1)
# predictor 1 == no predictor
if predictor != 1:
columns = decodeParms["/Columns"]
if predictor >= 10:
newdata = ""
# PNG prediction can vary from row to row
rowlength = columns + 1
assert len(data) % rowlength == 0
prev_rowdata = "\x00"*rowlength
for row in range(len(data) / rowlength):
rowdata = list(data[(row*rowlength):((row+1)*rowlength)])
filterByte = ord(rowdata[0])
if filterByte == 0:
pass
elif filterByte == 1:
for i in range(2, rowlength):
rowdata[i] = chr((ord(rowdata[i]) + ord(rowdata[i-1])) % 256)
elif filterByte == 2:
for i in range(1, rowlength):
rowdata[i] = chr((ord(rowdata[i]) + ord(prev_rowdata[i])) % 256)
else:
# unsupported PNG filter
assert False
prev_rowdata = rowdata
newdata += ''.join(rowdata[1:])
data = newdata
else:
# unsupported predictor
assert False
return data
decode = staticmethod(decode)
def encode(data):
return compress(data)
encode = staticmethod(encode)
class ASCIIHexDecode(object):
def decode(data, decodeParms=None):
retval = ""
char = ""
x = 0
while True:
c = data[x]
if c == ">":
break
elif c.isspace():
x += 1
continue
char += c
if len(char) == 2:
retval += chr(int(char, base=16))
char = ""
x += 1
assert char == ""
return retval
decode = staticmethod(decode)
class ASCII85Decode(object):
def decode(data, decodeParms=None):
retval = ""
group = []
x = 0
hitEod = False
# remove all whitespace from data
data = [y for y in data if not (y in ' \n\r\t')]
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
x += 2
continue
#elif c.isspace():
# x += 1
# continue
elif c == 'z':
assert len(group) == 0
retval += '\x00\x00\x00\x00'
continue
elif c == "~" and data[x+1] == ">":
if len(group) != 0:
# cannot have a final group of just 1 char
assert len(group) > 1
cnt = len(group) - 1
group += [ 85, 85, 85 ]
hitEod = cnt
else:
break
else:
c = ord(c) - 33
assert c >= 0 and c < 85
group += [ c ]
if len(group) >= 5:
b = group[0] * (85**4) + \
group[1] * (85**3) + \
group[2] * (85**2) + \
group[3] * 85 + \
group[4]
assert b < (2**32 - 1)
c4 = chr((b >> 0) % 256)
c3 = chr((b >> 8) % 256)
c2 = chr((b >> 16) % 256)
c1 = chr(b >> 24)
retval += (c1 + c2 + c3 + c4)
if hitEod:
retval = retval[:-4+hitEod]
group = []
x += 1
return retval
decode = staticmethod(decode)
def decodeStreamData(stream):
filters = stream.get("/Filter", ())
if len(filters) and not isinstance(filters[0], NameObject):
# we have a single filter instance
filters = (filters,)
data = stream._data
for filterType in filters:
if filterType == "/FlateDecode":
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCIIHexDecode":
data = ASCIIHexDecode.decode(data)
elif filterType == "/ASCII85Decode":
data = ASCII85Decode.decode(data)
else:
# unsupported filter
assert False
return data
if __name__ == "__main__":
assert "abc" == ASCIIHexDecode.decode('61\n626\n3>')
ascii85Test = """
<~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY
i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
>uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>
"""
ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText

View File

@ -0,0 +1,542 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Implementation of generic PDF objects (dictionary, number, string, and so on)
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "mfenniak@pobox.com"
import re
from utils import readNonWhitespace, RC4_encrypt
import filters
def readObject(stream, pdf):
tok = stream.read(1)
stream.seek(-1, 1) # reset to start
if tok == 't' or tok == 'f':
# boolean object
return BooleanObject.readFromStream(stream)
elif tok == '(':
# string object
return StringObject.readFromStream(stream)
elif tok == '/':
# name object
return NameObject.readFromStream(stream)
elif tok == '[':
# array object
return ArrayObject.readFromStream(stream, pdf)
elif tok == 'n':
# null object
return NullObject.readFromStream(stream)
elif tok == '<':
# hexadecimal string OR dictionary
peek = stream.read(2)
stream.seek(-2, 1) # reset to start
if peek == '<<':
return DictionaryObject.readFromStream(stream, pdf)
else:
return StringObject.readHexStringFromStream(stream)
elif tok == '%':
# comment
while tok not in ('\r', '\n'):
tok = stream.read(1)
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
return readObject(stream, pdf)
else:
# number object OR indirect reference
if tok == '+' or tok == '-':
# number
return NumberObject.readFromStream(stream)
peek = stream.read(20)
stream.seek(-len(peek), 1) # reset to start
if re.match(r"(\d+)\s(\d+)\sR[^a-zA-Z]", peek) != None:
return IndirectObject.readFromStream(stream, pdf)
else:
return NumberObject.readFromStream(stream)
class PdfObject(object):
def getObject(self):
"""Resolves indirect references."""
return self
class NullObject(PdfObject):
def writeToStream(self, stream, encryption_key):
stream.write("null")
def readFromStream(stream):
assert stream.read(4) == "null"
return NullObject()
readFromStream = staticmethod(readFromStream)
class BooleanObject(PdfObject):
def __init__(self, value):
self.value = value
def writeToStream(self, stream, encryption_key):
if self.value:
stream.write("true")
else:
stream.write("false")
def readFromStream(stream):
word = stream.read(4)
if word == "true":
return BooleanObject(True)
elif word == "fals":
stream.read(1)
return BooleanObject(False)
assert False
readFromStream = staticmethod(readFromStream)
class ArrayObject(list, PdfObject):
def writeToStream(self, stream, encryption_key):
stream.write("[")
for data in self:
stream.write(" ")
data.writeToStream(stream, encryption_key)
stream.write(" ]")
def readFromStream(stream, pdf):
arr = ArrayObject()
assert stream.read(1) == "["
while True:
# skip leading whitespace
tok = stream.read(1)
while tok.isspace():
tok = stream.read(1)
stream.seek(-1, 1)
# check for array ending
peekahead = stream.read(1)
if peekahead == "]":
break
stream.seek(-1, 1)
# read and append obj
arr.append(readObject(stream, pdf))
return arr
readFromStream = staticmethod(readFromStream)
class IndirectObject(PdfObject):
def __init__(self, idnum, generation, pdf):
self.idnum = idnum
self.generation = generation
self.pdf = pdf
def getObject(self):
return self.pdf.getObject(self).getObject()
def __repr__(self):
return "IndirectObject(%r, %r)" % (self.idnum, self.generation)
def __eq__(self, other):
return (
other != None and
isinstance(other, IndirectObject) and
self.idnum == other.idnum and
self.generation == other.generation and
self.pdf is other.pdf
)
def __ne__(self, other):
return not self.__eq__(other)
def writeToStream(self, stream, encryption_key):
stream.write("%s %s R" % (self.idnum, self.generation))
def readFromStream(stream, pdf):
idnum = ""
while True:
tok = stream.read(1)
if tok.isspace():
break
idnum += tok
generation = ""
while True:
tok = stream.read(1)
if tok.isspace():
break
generation += tok
r = stream.read(1)
#if r != "R":
# stream.seek(-20, 1)
# print idnum, generation
# print repr(stream.read(40))
assert r == "R"
return IndirectObject(int(idnum), int(generation), pdf)
readFromStream = staticmethod(readFromStream)
class FloatObject(float, PdfObject):
def writeToStream(self, stream, encryption_key):
stream.write(repr(self))
class NumberObject(int, PdfObject):
def __init__(self, value):
int.__init__(self, value)
def writeToStream(self, stream, encryption_key):
stream.write(repr(self))
def readFromStream(stream):
name = ""
while True:
tok = stream.read(1)
if tok != '+' and tok != '-' and tok != '.' and not tok.isdigit():
stream.seek(-1, 1)
break
name += tok
if name.find(".") != -1:
return FloatObject(name)
else:
return NumberObject(name)
readFromStream = staticmethod(readFromStream)
class StringObject(str, PdfObject):
def writeToStream(self, stream, encryption_key):
string = self
if encryption_key:
string = RC4_encrypt(encryption_key, string)
stream.write("(")
for c in string:
if not c.isalnum() and not c.isspace():
stream.write("\\%03o" % ord(c))
else:
stream.write(c)
stream.write(")")
def readHexStringFromStream(stream):
stream.read(1)
txt = ""
x = ""
while True:
tok = readNonWhitespace(stream)
if tok == ">":
break
x += tok
if len(x) == 2:
txt += chr(int(x, base=16))
x = ""
if len(x) == 1:
x += "0"
if len(x) == 2:
txt += chr(int(x, base=16))
return StringObject(txt)
readHexStringFromStream = staticmethod(readHexStringFromStream)
def readFromStream(stream):
tok = stream.read(1)
parens = 1
txt = ""
while True:
tok = stream.read(1)
if tok == "(":
parens += 1
elif tok == ")":
parens -= 1
if parens == 0:
break
elif tok == "\\":
tok = stream.read(1)
if tok == "n":
tok = "\n"
elif tok == "r":
tok = "\r"
elif tok == "t":
tok = "\t"
elif tok == "b":
tok == "\b"
elif tok == "f":
tok = "\f"
elif tok == "(":
tok = "("
elif tok == ")":
tok = ")"
elif tok == "\\":
tok = "\\"
elif tok.isdigit():
tok += stream.read(2)
tok = chr(int(tok, base=8))
txt += tok
return StringObject(txt)
readFromStream = staticmethod(readFromStream)
class NameObject(str, PdfObject):
delimiterCharacters = "(", ")", "<", ">", "[", "]", "{", "}", "/", "%"
def __init__(self, data):
str.__init__(self, data)
def writeToStream(self, stream, encryption_key):
stream.write(self)
def readFromStream(stream):
name = stream.read(1)
assert name == "/"
while True:
tok = stream.read(1)
if tok.isspace() or tok in NameObject.delimiterCharacters:
stream.seek(-1, 1)
break
name += tok
return NameObject(name)
readFromStream = staticmethod(readFromStream)
class DictionaryObject(dict, PdfObject):
def __init__(self):
pass
def writeToStream(self, stream, encryption_key):
stream.write("<<\n")
for key, value in self.items():
key.writeToStream(stream, encryption_key)
stream.write(" ")
value.writeToStream(stream, encryption_key)
stream.write("\n")
stream.write(">>")
def readFromStream(stream, pdf):
assert stream.read(2) == "<<"
data = {}
while True:
tok = readNonWhitespace(stream)
if tok == ">":
stream.read(1)
break
stream.seek(-1, 1)
key = readObject(stream, pdf)
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
value = readObject(stream, pdf)
if data.has_key(key):
# multiple definitions of key not permitted
assert False
data[key] = value
pos = stream.tell()
s = readNonWhitespace(stream)
if s == 's' and stream.read(5) == 'tream':
eol = stream.read(1)
# odd PDF file output has spaces after 'stream' keyword but before EOL.
# patch provided by Danial Sandler
while eol == ' ':
eol = stream.read(1)
assert eol in ("\n", "\r")
if eol == "\r":
# read \n after
stream.read(1)
# this is a stream object, not a dictionary
assert data.has_key("/Length")
length = data["/Length"]
if isinstance(length, IndirectObject):
t = stream.tell()
length = pdf.getObject(length)
stream.seek(t, 0)
data["__streamdata__"] = stream.read(length)
e = readNonWhitespace(stream)
ndstream = stream.read(8)
if (e + ndstream) != "endstream":
# (sigh) - the odd PDF file has a length that is too long, so
# we need to read backwards to find the "endstream" ending.
# ReportLab (unknown version) generates files with this bug,
# and Python users into PDF files tend to be our audience.
# we need to do this to correct the streamdata and chop off
# an extra character.
pos = stream.tell()
stream.seek(-10, 1)
end = stream.read(9)
if end == "endstream":
# we found it by looking back one character further.
data["__streamdata__"] = data["__streamdata__"][:-1]
else:
stream.seek(pos, 0)
raise "Unable to find 'endstream' marker after stream."
else:
stream.seek(pos, 0)
if data.has_key("__streamdata__"):
return StreamObject.initializeFromDictionary(data)
else:
retval = DictionaryObject()
retval.update(data)
return retval
readFromStream = staticmethod(readFromStream)
class StreamObject(DictionaryObject):
def __init__(self):
self._data = None
self.decodedSelf = None
def writeToStream(self, stream, encryption_key):
self[NameObject("/Length")] = NumberObject(len(self._data))
DictionaryObject.writeToStream(self, stream, encryption_key)
del self["/Length"]
stream.write("\nstream\n")
data = self._data
if encryption_key:
data = RC4_encrypt(encryption_key, data)
stream.write(data)
stream.write("\nendstream")
def initializeFromDictionary(data):
if data.has_key("/Filter"):
retval = EncodedStreamObject()
else:
retval = DecodedStreamObject()
retval._data = data["__streamdata__"]
del data["__streamdata__"]
del data["/Length"]
retval.update(data)
return retval
initializeFromDictionary = staticmethod(initializeFromDictionary)
def flateEncode(self):
if self.has_key("/Filter"):
f = self["/Filter"]
if isinstance(f, ArrayObject):
f.insert(0, NameObject("/FlateDecode"))
else:
newf = ArrayObject()
newf.append(NameObject("/FlateDecode"))
newf.append(f)
f = newf
else:
f = NameObject("/FlateDecode")
retval = EncodedStreamObject()
retval[NameObject("/Filter")] = f
retval._data = filters.FlateDecode.encode(self._data)
return retval
class DecodedStreamObject(StreamObject):
def getData(self):
return self._data
def setData(self, data):
self._data = data
class EncodedStreamObject(StreamObject):
def __init__(self):
self.decodedSelf = None
def getData(self):
if self.decodedSelf:
# cached version of decoded object
return self.decodedSelf.getData()
else:
# create decoded object
decoded = StreamObject()
decoded._data = filters.decodeStreamData(self)
for key, value in self.items():
if not key in ("/Length", "/Filter", "/DecodeParms"):
decoded[key] = value
self.decodedSelf = decoded
return decoded._data
def setData(self, data):
raise "Creating EncodedStreamObject is not currently supported"
class RectangleObject(ArrayObject):
def __init__(self, arr):
# must have four points
assert len(arr) == 4
# automatically convert arr[x] into NumberObject(arr[x]) if necessary
ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
def ensureIsNumber(self, value):
if not isinstance(value, NumberObject):
value = NumberObject(value)
return value
def __repr__(self):
return "RectangleObject(%s)" % repr(list(self))
def getLowerLeft_x(self):
return self[0]
def getLowerLeft_y(self):
return self[1]
def getUpperRight_x(self):
return self[2]
def getUpperRight_y(self):
return self[3]
def getUpperLeft_x(self):
return self.getLowerLeft_x()
def getUpperLeft_y(self):
return self.getUpperRight_y()
def getLowerRight_x(self):
return self.getUpperRight_x()
def getLowerRight_y(self):
return self.getLowerLeft_y()
def getLowerLeft(self):
return self.getLowerLeft_x(), self.getLowerLeft_y()
def getLowerRight(self):
return self.getLowerRight_x(), self.getLowerRight_y()
def getUpperLeft(self):
return self.getUpperLeft_x(), self.getUpperLeft_y()
def getUpperRight(self):
return self.getUpperRight_x(), self.getUpperRight_y()
def setLowerLeft(self, value):
self[0], self[1] = [self.ensureIsNumber(x) for x in value]
def setLowerRight(self, value):
self[2], self[1] = [self.ensureIsNumber(x) for x in value]
def setUpperLeft(self, value):
self[0], self[3] = [self.ensureIsNumber(x) for x in value]
def setUpperRight(self, value):
self[2], self[3] = [self.ensureIsNumber(x) for x in value]
lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
lowerRight = property(getLowerRight, setLowerRight, None, None)
upperLeft = property(getUpperLeft, setUpperLeft, None, None)
upperRight = property(getUpperRight, setUpperRight, None, None)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,94 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Utility functions for PDF library.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "mfenniak@pobox.com"
def readUntilWhitespace(stream, maxchars=None):
txt = ""
while True:
tok = stream.read(1)
if tok.isspace() or not tok:
break
txt += tok
if len(txt) == maxchars:
break
return txt
def readNonWhitespace(stream):
tok = ' '
while tok == '\n' or tok == '\r' or tok == ' ' or tok == '\t':
tok = stream.read(1)
return tok
class ConvertFunctionsToVirtualList(object):
def __init__(self, lengthFunction, getFunction):
self.lengthFunction = lengthFunction
self.getFunction = getFunction
def __len__(self):
return self.lengthFunction()
def __getitem__(self, index):
if not isinstance(index, int):
raise TypeError, "sequence indices must be integers"
len_self = len(self)
if index < 0:
# support negative indexes
index = len_self + index
if index < 0 or index >= len_self:
raise IndexError, "sequence index out of range"
return self.getFunction(index)
def RC4_encrypt(key, plaintext):
S = [i for i in range(256)]
j = 0
for i in range(256):
j = (j + S[i] + ord(key[i % len(key)])) % 256
S[i], S[j] = S[j], S[i]
i, j = 0, 0
retval = ""
for x in range(len(plaintext)):
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
t = S[(S[i] + S[j]) % 256]
retval += chr(ord(plaintext[x]) ^ t)
return retval
if __name__ == "__main__":
# test RC4
out = RC4_encrypt("Key", "Plaintext")
print repr(out)
pt = RC4_encrypt("Key", out)
print repr(pt)