Remove the unmaintained pdfmanipulate command line utility

2025-07-09 03:04:10 -04:00 · 2012-08-27 17:22:51 +05:30 · 2012-08-27 17:22:51 +05:30 · b69fb230c5
commit b69fb230c5
parent 0a22c291b7
19 changed files with 0 additions and 4148 deletions
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -444,23 +444,6 @@ class CurrentDir(object):
            pass
 class StreamReadWrapper(object):
    '''
    Used primarily with pyPdf to ensure the stream is properly closed.
    '''
    def __init__(self, stream):
        for x in ('read', 'seek', 'tell'):
            setattr(self, x, getattr(stream, x))
    def __exit__(self, *args):
        for x in ('read', 'seek', 'tell'):
            setattr(self, x, None)
    def __enter__(self):
        return self
 def detect_ncpus():
    """Detects the number of effective CPUs in the system"""
    import multiprocessing
--- a/src/calibre/ebooks/pdf/manipulate/init.py
+++ b/src/calibre/ebooks/pdf/manipulate/init.py
--- a/src/calibre/ebooks/pdf/manipulate/cli.py
+++ b/src/calibre/ebooks/pdf/manipulate/cli.py
@ -1,72 +0,0 @@
 from __future__ import with_statement
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Command line interface to run pdf manipulation commands.
 '''
 import string, sys
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.ebooks.pdf.manipulate import crop, decrypt, encrypt, \
    info, merge, reverse, rotate, split
 COMMANDS = {
             'crop'    : crop,
             'decrypt' : decrypt,
             'encrypt' : encrypt,
             'info'    : info,
             'merge'   : merge,
             'reverse' : reverse,
             'rotate'  : rotate,
             'split'   : split,
           }
 USAGE = '%prog ' + _('''command ...
 command can be one of the following:
 [%%commands]
 Use %prog command --help to get more information about a specific command
 Manipulate a PDF.
 ''').replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser():
    return OptionParser(usage=USAGE)
 def main(args=sys.argv):
    log = Log()
    parser = option_parser()
    if len(args) < 2:
        print 'Error: No command sepecified.\n'
        print_help(parser, log)
        return 1
    command = args[1].lower().strip()
    if command in COMMANDS.keys():
        del args[1]
        return COMMANDS[command].main(args, command)
    else:
        parser.parse_args(args)
        print 'Unknown command %s.\n' % command
        print_help(parser, log)
        return 1
    # We should never get here.
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/crop.py
+++ b/src/calibre/ebooks/pdf/manipulate/crop.py
@ -1,150 +0,0 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, James Beal <james_@catbus.co.uk>, ' \
                '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Crop a pdf file
 '''
 import sys
 import re
 from decimal import Decimal
 from optparse import OptionGroup, Option
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
 from pyPdf import PdfFileWriter, PdfFileReader
 DEFAULT_CROP = 10
 USAGE = '\n%prog %%name ' + _('''\
 [options] file.pdf
 Crop a PDF file.
 ''')
 OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='cropped.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
    OptionRecommendation(name='bottom_left_x', recommended_value=DEFAULT_CROP,
        level=OptionRecommendation.LOW, long_switch='left-x', short_switch='x',
        help=_('Number of pixels to crop from the left most x (default is %s)') % DEFAULT_CROP),
    OptionRecommendation(name='bottom_left_y', recommended_value=DEFAULT_CROP,
        level=OptionRecommendation.LOW, long_switch='left-y', short_switch='y',
        help=_('Number of pixels to crop from the left most y (default is %s)') % DEFAULT_CROP),
    OptionRecommendation(name='top_right_x', recommended_value=DEFAULT_CROP,
        level=OptionRecommendation.LOW, long_switch='right-x', short_switch='v',
        help=_('Number of pixels to crop from the right most x (default is %s)') % DEFAULT_CROP),
    OptionRecommendation(name='top_right_y', recommended_value=DEFAULT_CROP,
        level=OptionRecommendation.LOW, long_switch='right-y', short_switch='w',
        help=_('Number of pixels to crop from the right most y (default is %s)') % DEFAULT_CROP),
    OptionRecommendation(name='bounding', recommended_value=None,
        level=OptionRecommendation.LOW, long_switch='bounding', short_switch='b',
        help=_('A file generated by ghostscript which allows each page to be individually cropped `gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox file.pdf 2> bounding`')),
 ])
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def option_recommendation_to_cli_option(add_option, rec):
    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Crop Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def crop_pdf(pdf_path, opts, metadata=None):
    if metadata == None:
        title = _('Unknown')
        author = _('Unknown')
    else:
        title = metadata.title
        author = authors_to_string(metadata.authors)
    input_pdf = PdfFileReader(open(pdf_path, 'rb'))
    bounding_lines = []
    if opts.bounding != None:
        try:
            bounding = open(opts.bounding , 'r')
            bounding_regex = re.compile('%%BoundingBox: (?P<bottom_x>\d+) (?P<bottom_y>\d+) (?P<top_x>\d+) (?P<top_y>\d+)')
        except:
            raise Exception('Error reading %s' % opts.bounding)
        lines = bounding.readlines()
        for line in lines:
            if line.startswith('%%BoundingBox:'):
                bounding_lines.append(line)
        if len(bounding_lines) != input_pdf.numPages:
            raise Exception('Error bounding file %s page count does not correspond to specified pdf' % opts.bounding)
    output_pdf = PdfFileWriter(title=title,author=author)
    blines = iter(bounding_lines)
    for page in input_pdf.pages:
        if bounding_lines != []:
            mo = bounding_regex.search(blines.next())
            if mo == None:
                raise Exception('Error in bounding file %s' % opts.bounding)
            page.mediaBox.upperRight = (float(mo.group('top_x')), Decimal(mo.group('top_y')))
            page.mediaBox.lowerLeft  = (float(mo.group('bottom_x')), Decimal(mo.group('bottom_y')))
        else:
            page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x() - Decimal(opts.top_right_x), page.bleedBox.getUpperRight_y() - Decimal(opts.top_right_y))
            page.mediaBox.lowerLeft  = (page.bleedBox.getLowerLeft_x() + Decimal(opts.bottom_left_x), page.bleedBox.getLowerLeft_y() + Decimal(opts.bottom_left_y))
        output_pdf.addPage(page)
    with open(opts.output, 'wb') as output_file:
        output_pdf.write(output_file)
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 1:
        print 'Error: A PDF file is required.\n'
        print_help(parser, log)
        return 1
    if not is_valid_pdf(args[0]):
        print 'Error: Could not read file `%s`.' % args[0]
        return 1
    if is_encrypted(args[0]):
        print 'Error: file `%s` is encrypted.' % args[0]
        return 1
    mi = metadata_from_formats([args[0]])
    crop_pdf(args[0], opts, mi)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/decrypt.py
+++ b/src/calibre/ebooks/pdf/manipulate/decrypt.py
@ -1,113 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Decrypt content of PDF.
 '''
 import os, sys
 from optparse import OptionGroup, Option
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
 from pyPdf import PdfFileWriter, PdfFileReader
 USAGE = '\n%prog %%name ' + _('''\
 [options] file.pdf password
 Decrypt a PDF.
 ''')
 OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='decrypted.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
 ])
 class DecryptionError(Exception):
    def __init__(self, pdf_path):
        self.value = 'Unable to decrypt file `%s`.' % pdf_path
    def __str__(self):
        return repr(self.value)
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def option_recommendation_to_cli_option(add_option, rec):
    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Decrypt Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def decrypt(pdf_path, out_path, password):
    pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
    if pdf.decrypt(str(password)) == 0:
        raise DecryptionError(pdf_path)
    title = pdf.documentInfo.title if pdf.documentInfo.title else _('Unknown')
    author = pdf.documentInfo.author if pdf.documentInfo.author else _('Unknown')
    out_pdf = PdfFileWriter(title=title, author=author)
    for page in pdf.pages:
        out_pdf.addPage(page)
    with open(out_path, 'wb') as out_file:
        out_pdf.write(out_file)
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 2:
        print 'Error: A PDF file and decryption password is required.\n'
        print_help(parser, log)
        return 1
    if not is_valid_pdf(args[0]):
        print 'Error: Could not read file `%s`.' % args[0]
        return 1
    if not is_encrypted(args[0]):
        print 'Error: file `%s` is not encrypted.' % args[0]
        return 1
    try:
        decrypt(args[0], opts.output, args[1])
    except DecryptionError as e:
        print e.value
        return 1
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/encrypt.py
+++ b/src/calibre/ebooks/pdf/manipulate/encrypt.py
@ -1,107 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Encrypt a PDF.
 '''
 import os, sys
 from optparse import OptionGroup, Option
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
 from calibre.ebooks.metadata import authors_to_string
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from pyPdf import PdfFileWriter, PdfFileReader
 USAGE = '\n%prog %%name ' + _('''\
 [options] file.pdf password
 Encrypt a PDF.
 ''')
 OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='encrypted.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
 ])
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def option_recommendation_to_cli_option(add_option, rec):
    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Encrypt Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def encrypt(pdf_path, out_path, password, metadata=None):
    if metadata == None:
        title = _('Unknown')
        author = _('Unknown')
    else:
        title = metadata.title
        author = authors_to_string(metadata.authors)
    out_pdf = PdfFileWriter(title=title, author=author)
    pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
    for page in pdf.pages:
        out_pdf.addPage(page)
    with open(out_path, 'wb') as out_file:
        out_pdf.encrypt(str(password))
        out_pdf.write(out_file)
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 2:
        print 'Error: A PDF file and decryption password is required.\n'
        print_help(parser, log)
        return 1
    if not is_valid_pdf(args[0]):
        print 'Error: Could not read file `%s`.' % args[0]
        return 1
    if is_encrypted(args[0]):
        print 'Error: file `%s` is already encrypted.' % args[0]
        return 1
    mi = metadata_from_formats([args[0]])
    encrypt(args[0], opts.output, args[1], mi)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/info.py
+++ b/src/calibre/ebooks/pdf/manipulate/info.py
@ -1,85 +0,0 @@
 from __future__ import with_statement
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Merge PDF files into a single PDF document.
 '''
 import os, sys
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.ebooks.pdf.verify import is_valid_pdfs, is_encrypted
 from calibre.utils.podofo import get_podofo
 from calibre import prints
 USAGE = '\n%prog %%name ' + _('''\
 file.pdf ...
 Get info about a PDF.
 ''')
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def print_info(pdf_path):
    podofo = get_podofo()
    p = podofo.PDFDoc()
    p.open(pdf_path)
    fmt = lambda x, y: '%-20s: %s'%(x, y)
    print
    prints(fmt(_('Title'), p.title))
    prints(fmt(_('Author'), p.author))
    prints(fmt(_('Subject'), p.subject))
    prints(fmt(_('Creator'), p.creator))
    prints(fmt(_('Producer'), p.producer))
    prints(fmt(_('Pages'), p.pages))
    prints(fmt(_('File Size'), os.stat(pdf_path).st_size))
    prints(fmt(_('PDF Version'), p.version if p.version else _('Unknown')))
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 1:
        print 'Error: No PDF sepecified.\n'
        print_help(parser, log)
        return 1
    bad_pdfs = is_valid_pdfs(args)
    if bad_pdfs != []:
        for pdf in bad_pdfs:
            print 'Error: Could not read file `%s`.' % pdf
        return 1
    enc = False
    for pdf in args:
        if is_encrypted(pdf):
            enc = True
            print 'Error: file `%s` is encrypted. Please decrypt first.' % pdf
    if enc:
        return 1
    for pdf in args:
        print_info(pdf)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/merge.py
+++ b/src/calibre/ebooks/pdf/manipulate/merge.py
@ -1,115 +0,0 @@
 from __future__ import with_statement
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Merge PDF files into a single PDF document.
 '''
 import os, sys
 from optparse import OptionGroup, Option
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ebooks.pdf.verify import is_valid_pdfs, is_encrypted
 from pyPdf import PdfFileWriter, PdfFileReader
 USAGE = '\n%prog %%name ' + _('''\
 [options] file1.pdf file2.pdf ...
 Metadata will be used from the first PDF specified.
 Merges individual PDFs.
 ''')
 OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='merged.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
 ])
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def option_recommendation_to_cli_option(add_option, rec):
    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Merge Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def merge_files(in_paths, out_path, metadata=None):
    if metadata == None:
        title = _('Unknown')
        author = _('Unknown')
    else:
        title = metadata.title
        author = authors_to_string(metadata.authors)
    out_pdf = PdfFileWriter(title=title, author=author)
    for pdf_path in in_paths:
        pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
        for page in pdf.pages:
            out_pdf.addPage(page)
    with open(out_path, 'wb') as out_file:
        out_pdf.write(out_file)
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 2:
        print 'Error: Two or more PDF files are required.\n'
        print_help(parser, log)
        return 1
    bad_pdfs = is_valid_pdfs(args)
    if bad_pdfs != []:
        for pdf in bad_pdfs:
            print 'Error: Could not read file `%s`.' % pdf
        return 1
    enc = False
    for pdf in args:
        if is_encrypted(pdf):
            enc = True
            print 'Error: file `%s` is encrypted.' % pdf
    if enc:
        return 1
    mi = metadata_from_formats([args[0]])
    merge_files(args, opts.output, mi)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/reverse.py
+++ b/src/calibre/ebooks/pdf/manipulate/reverse.py
@ -1,106 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Reverse content of PDF.
 '''
 import os, sys
 from optparse import OptionGroup, Option
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
 from pyPdf import PdfFileWriter, PdfFileReader
 USAGE = '\n%prog %%name ' + _('''\
 [options] file.pdf
 Reverse a PDF.
 ''')
 OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='reversed.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
 ])
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def option_recommendation_to_cli_option(add_option, rec):
    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Reverse Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def reverse(pdf_path, out_path, metadata=None):
    if metadata == None:
        title = _('Unknown')
        author = _('Unknown')
    else:
        title = metadata.title
        author = authors_to_string(metadata.authors)
    out_pdf = PdfFileWriter(title=title, author=author)
    pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
    for page in reversed(pdf.pages):
        out_pdf.addPage(page)
    with open(out_path, 'wb') as out_file:
        out_pdf.write(out_file)
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 1:
        print 'Error: A PDF file is required.\n'
        print_help(parser, log)
        return 1
    if not is_valid_pdf(args[0]):
        print 'Error: Could not read file `%s`.' % args[0]
        return 1
    if is_encrypted(args[0]):
        print 'Error: file `%s` is encrypted.' % args[0]
        return 1
    mi = metadata_from_formats([args[0]])
    reverse(args[0], opts.output, mi)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/rotate.py
+++ b/src/calibre/ebooks/pdf/manipulate/rotate.py
@ -1,105 +0,0 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Rotate pages of a PDF.
 '''
 import os, sys
 from optparse import OptionGroup, Option
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
 from pyPdf import PdfFileWriter, PdfFileReader
 USAGE = '\n%prog %%name ' + _('''\
 file.pdf degrees
 Rotate pages of a PDF clockwise.
 ''')
 OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='rotated.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
 ])
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def option_recommendation_to_cli_option(add_option, rec):
    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Rotate Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def rotate(pdf_path, out_path, degrees, metadata=None):
    if metadata == None:
        title = _('Unknown')
        author = _('Unknown')
    else:
        title = metadata.title
        author = authors_to_string(metadata.authors)
    out_pdf = PdfFileWriter(title=title, author=author)
    pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
    for page in pdf.pages:
        out_pdf.addPage(page.rotateClockwise(int(degrees)))
    with open(out_path, 'wb') as out_file:
        out_pdf.write(out_file)
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 2:
        print 'Error: A PDF file and how many degrees to rotate is required.\n'
        print_help(parser, log)
        return 1
    if not is_valid_pdf(args[0]):
        print 'Error: Could not read file `%s`.' % args[0]
        return 1
    if is_encrypted(args[0]):
        print 'Error: file `%s` is encrypted.' % args[0]
        return 1
    mi = metadata_from_formats([args[0]])
    rotate(args[0], opts.output, args[1], mi)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/split.py
+++ b/src/calibre/ebooks/pdf/manipulate/split.py
@ -1,204 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Split PDF file into multiple PDF documents.
 '''
 import os, sys, re
 from optparse import OptionGroup, Option
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
 from pyPdf import PdfFileWriter, PdfFileReader
 USAGE = _('''
 %prog %%name [options] file.pdf page_to_split_on ...
 %prog %%name [options] file.pdf page_range_to_split_on ...
 Ex.
 %prog %%name file.pdf 6
 %prog %%name file.pdf 6-12
 %prog %%name file.pdf 6-12 8 10 9-20
 Split a PDF.
 ''')
 OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='split.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
 ])
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def option_recommendation_to_cli_option(add_option, rec):
    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Split Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
    pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
    total_pages = pdf.numPages - 1
    for index in pages+page_ranges:
        if index in pages:
            write_pdf(pdf, out_name, '%s' % (index + 1), index, total_pages, metadata)
        else:
            write_pdf(pdf, out_name, '%s-%s' % (index[0] + 1, index[1] + 1), index[0], index[1], metadata)
 def write_pdf(pdf, name, suffix, start, end, metadata=None):
    if metadata == None:
        title = _('Unknown')
        author = _('Unknown')
    else:
        title = metadata.title
        author = authors_to_string(metadata.authors)
    out_pdf = PdfFileWriter(title=title, author=author)
    for page_num in range(start, end + 1):
        out_pdf.addPage(pdf.getPage(page_num))
    with open('%s%s.pdf' % (name, suffix), 'wb') as out_file:
        out_pdf.write(out_file)
 def split_args(args):
    pdf = ''
    pages = []
    page_ranges = []
    bad = []
    for arg in args:
        arg = arg.strip()
        # Find the pdf input
        if re.search('(?iu)^.*?\.pdf[ ]*$', arg) != None:
            if pdf == '':
                pdf = arg
            else:
                bad.append(arg)
        # Find single indexes
        elif re.search('^[ ]*\d+[ ]*$', arg) != None:
            pages.append(arg)
        # Find index ranges
        elif re.search('^[ ]*\d+[ ]*-[ ]*\d+[ ]*$', arg) != None:
            mo = re.search('^[ ]*(?P<start>\d+)[ ]*-[ ]*(?P<end>\d+)[ ]*$', arg)
            start = mo.group('start')
            end = mo.group('end')
            # check to see if the range is really a single index
            if start == end:
                pages.append(start)
            else:
                page_ranges.append([start, end])
        else:
            bad.append(arg)
    bad = sorted(list(set(bad)))
    return pdf, pages, page_ranges, bad
 # Remove duplicates from pages and page_ranges.
 # Set pages higher than the total number of pages in the pdf to the last page.
 # Return pages and page_ranges as lists of ints.
 def clean_page_list(pdf_path, pages, page_ranges):
    pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
    total_pages = pdf.numPages
    sorted_pages = []
    sorted_ranges = []
    for index in pages:
        index = int(index)
        if index > total_pages:
            sorted_pages.append(total_pages - 1)
        else:
            sorted_pages.append(index - 1)
    for start, end in page_ranges:
        start = int(start)
        end = int(end)
        if start > total_pages and end > total_pages:
            sorted_pages.append(total_pages - 1)
            continue
        if start > total_pages:
            start = total_pages
        if end > total_pages:
            end = total_pages
        page_range = sorted([start - 1, end - 1])
        if page_range not in sorted_ranges:
            sorted_ranges.append(page_range)
    # Remove duplicates and sort
    pages = sorted(list(set(sorted_pages)))
    page_ranges = sorted(sorted_ranges)
    return pages, page_ranges
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    pdf, pages, page_ranges, unknown = split_args(args[1:])
    if pdf == '' and (pages == [] or page_ranges == []):
        print 'Error: PDF and where to split is required.\n'
        print_help(parser, log)
        return 1
    if unknown != []:
        for arg in unknown:
            print 'Error: Unknown argument `%s`' % arg
        print_help(parser, log)
        return 1
    if not is_valid_pdf(pdf):
        print 'Error: Could not read file `%s`.' % pdf
        return 1
    if is_encrypted(pdf):
        print 'Error: file `%s` is encrypted.' % args[0]
        return 1
    pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
    mi = metadata_from_formats([pdf])
    split_pdf(pdf, pages, page_ranges, os.path.splitext(opts.output)[0], mi)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/verify.py
+++ b/src/calibre/ebooks/pdf/verify.py
@ -1,44 +0,0 @@
 from __future__ import with_statement
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Verify PDF files.
 '''
 import os
 from pyPdf import PdfFileReader
 def is_valid_pdf(pdf_path):
    '''
    Returns True if the pdf file is valid.
    '''
    try:
        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
            PdfFileReader(pdf_file)
    except:
        return False
    return True
 def is_valid_pdfs(pdf_paths):
    '''
    Returns a list of invalid pdf files.
    '''
    invalid = []
    for pdf_path in pdf_paths:
        if not is_valid_pdf(pdf_path):
            invalid.append(pdf_path)
    return invalid
 def is_encrypted(pdf_path):
    with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
        pdf = PdfFileReader(pdf_file)
        if pdf.isEncrypted:
            return True
    return False
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -29,7 +29,6 @@ entry_points = {
             'calibre-parallel   = calibre.utils.ipc.worker:main',
             'calibre-customize  = calibre.customize.ui:main',
             'calibre-complete   = calibre.utils.complete:main',
             'pdfmanipulate      = calibre.ebooks.pdf.manipulate.cli:main',
             'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main',
             'epub-fix           = calibre.ebooks.epub.fix.main:main',
             'calibre-smtp = calibre.utils.smtp:main',
--- a/src/pyPdf/init.py
+++ b/src/pyPdf/init.py
@ -1,2 +0,0 @@
 from pdf import PdfFileReader, PdfFileWriter
 __all__ = ["pdf"]
--- a/src/pyPdf/filters.py
+++ b/src/pyPdf/filters.py
@ -1,252 +0,0 @@
 # vim: sw=4:expandtab:foldmethod=marker
 #
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 """
 Implementation of stream filters for PDF.
 """
 __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 from utils import PdfReadError
 try:
    from cStringIO import StringIO
 except ImportError:
    from StringIO import StringIO
 try:
    import zlib
    def decompress(data):
        return zlib.decompress(data)
    def compress(data):
        return zlib.compress(data)
 except ImportError:
    # Unable to import zlib.  Attempt to use the System.IO.Compression
    # library from the .NET framework. (IronPython only)
    import System
    from System import IO, Collections, Array
    def _string_to_bytearr(buf):
        retval = Array.CreateInstance(System.Byte, len(buf))
        for i in range(len(buf)):
            retval[i] = ord(buf[i])
        return retval
    def _bytearr_to_string(bytes):
        retval = ""
        for i in range(bytes.Length):
            retval += chr(bytes[i])
        return retval
    def _read_bytes(stream):
        ms = IO.MemoryStream()
        buf = Array.CreateInstance(System.Byte, 2048)
        while True:
            bytes = stream.Read(buf, 0, buf.Length)
            if bytes == 0:
                break
            else:
                ms.Write(buf, 0, bytes)
        retval = ms.ToArray()
        ms.Close()
        return retval
    def decompress(data):
        bytes = _string_to_bytearr(data)
        ms = IO.MemoryStream()
        ms.Write(bytes, 0, bytes.Length)
        ms.Position = 0  # fseek 0
        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
        bytes = _read_bytes(gz)
        retval = _bytearr_to_string(bytes)
        gz.Close()
        return retval
    def compress(data):
        bytes = _string_to_bytearr(data)
        ms = IO.MemoryStream()
        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
        gz.Write(bytes, 0, bytes.Length)
        gz.Close()
        ms.Position = 0 # fseek 0
        bytes = ms.ToArray()
        retval = _bytearr_to_string(bytes)
        ms.Close()
        return retval
 class FlateDecode(object):
    def decode(data, decodeParms):
        data = decompress(data)
        predictor = 1
        if decodeParms:
            predictor = decodeParms.get("/Predictor", 1)
        # predictor 1 == no predictor
        if predictor != 1:
            columns = decodeParms["/Columns"]
            # PNG prediction:
            if predictor >= 10 and predictor <= 15:
                output = StringIO()
                # PNG prediction can vary from row to row
                rowlength = columns + 1
                assert len(data) % rowlength == 0
                prev_rowdata = (0,) * rowlength
                for row in xrange(len(data) / rowlength):
                    rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
                    filterByte = rowdata[0]
                    if filterByte == 0:
                        pass
                    elif filterByte == 1:
                        for i in range(2, rowlength):
                            rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
                    elif filterByte == 2:
                        for i in range(1, rowlength):
                            rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
                    else:
                        # unsupported PNG filter
                        raise PdfReadError("Unsupported PNG filter %r" % filterByte)
                    prev_rowdata = rowdata
                    output.write(''.join([chr(x) for x in rowdata[1:]]))
                data = output.getvalue()
            else:
                # unsupported predictor
                raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
        return data
    decode = staticmethod(decode)
    def encode(data):
        return compress(data)
    encode = staticmethod(encode)
 class ASCIIHexDecode(object):
    def decode(data, decodeParms=None):
        retval = ""
        char = ""
        x = 0
        while True:
            c = data[x]
            if c == ">":
                break
            elif c.isspace():
                x += 1
                continue
            char += c
            if len(char) == 2:
                retval += chr(int(char, base=16))
                char = ""
            x += 1
        assert char == ""
        return retval
    decode = staticmethod(decode)
 class ASCII85Decode(object):
    def decode(data, decodeParms=None):
        retval = ""
        group = []
        x = 0
        hitEod = False
        # remove all whitespace from data
        data = [y for y in data if not (y in ' \n\r\t')]
        while not hitEod:
            c = data[x]
            if len(retval) == 0 and c == "<" and data[x+1] == "~":
                x += 2
                continue
            #elif c.isspace():
            #    x += 1
            #    continue
            elif c == 'z':
                assert len(group) == 0
                retval += '\x00\x00\x00\x00'
                continue
            elif c == "~" and data[x+1] == ">":
                if len(group) != 0:
                    # cannot have a final group of just 1 char
                    assert len(group) > 1
                    cnt = len(group) - 1
                    group += [ 85, 85, 85 ]
                    hitEod = cnt
                else:
                    break
            else:
                c = ord(c) - 33
                assert c >= 0 and c < 85
                group += [ c ]
            if len(group) >= 5:
                b = group[0] * (85**4) + \
                    group[1] * (85**3) + \
                    group[2] * (85**2) + \
                    group[3] * 85 + \
                    group[4]
                assert b < (2**32 - 1)
                c4 = chr((b >> 0) % 256)
                c3 = chr((b >> 8) % 256)
                c2 = chr((b >> 16) % 256)
                c1 = chr(b >> 24)
                retval += (c1 + c2 + c3 + c4)
                if hitEod:
                    retval = retval[:-4+hitEod]
                group = []
            x += 1
        return retval
    decode = staticmethod(decode)
 def decodeStreamData(stream):
    from generic import NameObject
    filters = stream.get("/Filter", ())
    if len(filters) and not isinstance(filters[0], NameObject):
        # we have a single filter instance
        filters = (filters,)
    data = stream._data
    for filterType in filters:
        if filterType == "/FlateDecode":
            data = FlateDecode.decode(data, stream.get("/DecodeParms"))
        elif filterType == "/ASCIIHexDecode":
            data = ASCIIHexDecode.decode(data)
        elif filterType == "/ASCII85Decode":
            data = ASCII85Decode.decode(data)
        elif filterType == "/Crypt":
            decodeParams = stream.get("/DecodeParams", {})
            if "/Name" not in decodeParams and "/Type" not in decodeParams:
                pass
            else:
                raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
        else:
            # unsupported filter
            raise NotImplementedError("unsupported filter %s" % filterType)
    return data
 if __name__ == "__main__":
    assert "abc" == ASCIIHexDecode.decode('61\n626\n3>')
    ascii85Test = """
     <~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
     O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY
     i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
     l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
     >uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>
    """
    ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
    assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText
--- a/src/pyPdf/generic.py
+++ b/src/pyPdf/generic.py
@ -1,780 +0,0 @@
 # vim: sw=4:expandtab:foldmethod=marker
 #
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 """
 Implementation of generic PDF objects (dictionary, number, string, and so on)
 """
 __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 import re
 from utils import readNonWhitespace, RC4_encrypt
 import filters
 import utils
 import decimal
 import codecs
 def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1) # reset to start
    if tok == 't' or tok == 'f':
        # boolean object
        return BooleanObject.readFromStream(stream)
    elif tok == '(':
        # string object
        return readStringFromStream(stream)
    elif tok == '/':
        # name object
        return NameObject.readFromStream(stream)
    elif tok == '[':
        # array object
        return ArrayObject.readFromStream(stream, pdf)
    elif tok == 'n':
        # null object
        return NullObject.readFromStream(stream)
    elif tok == '<':
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1) # reset to start
        if peek == '<<':
            return DictionaryObject.readFromStream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    elif tok == '%':
        # comment
        while tok not in ('\r', '\n'):
            tok = stream.read(1)
        tok = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return readObject(stream, pdf)
    else:
        # number object OR indirect reference
        if tok == '+' or tok == '-':
            # number
            return NumberObject.readFromStream(stream)
        peek = stream.read(20)
        stream.seek(-len(peek), 1) # reset to start
        if re.match(r"(\d+)\s(\d+)\sR[^a-zA-Z]", peek) != None:
            return IndirectObject.readFromStream(stream, pdf)
        else:
            return NumberObject.readFromStream(stream)
 class PdfObject(object):
    def getObject(self):
        """Resolves indirect references."""
        return self
 class NullObject(PdfObject):
    def writeToStream(self, stream, encryption_key):
        stream.write("null")
    def readFromStream(stream):
        nulltxt = stream.read(4)
        if nulltxt != "null":
            raise utils.PdfReadError, "error reading null object"
        return NullObject()
    readFromStream = staticmethod(readFromStream)
 class BooleanObject(PdfObject):
    def __init__(self, value):
        self.value = value
    def writeToStream(self, stream, encryption_key):
        if self.value:
            stream.write("true")
        else:
            stream.write("false")
    def readFromStream(stream):
        word = stream.read(4)
        if word == "true":
            return BooleanObject(True)
        elif word == "fals":
            stream.read(1)
            return BooleanObject(False)
        assert False
    readFromStream = staticmethod(readFromStream)
 class ArrayObject(list, PdfObject):
    def writeToStream(self, stream, encryption_key):
        stream.write("[")
        for data in self:
            stream.write(" ")
            data.writeToStream(stream, encryption_key)
        stream.write(" ]")
    def readFromStream(stream, pdf):
        arr = ArrayObject()
        tmp = stream.read(1)
        if tmp != "[":
            raise utils.PdfReadError, "error reading array"
        while True:
            # skip leading whitespace
            tok = stream.read(1)
            while tok.isspace():
                tok = stream.read(1)
            stream.seek(-1, 1)
            # check for array ending
            peekahead = stream.read(1)
            if peekahead == "]":
                break
            stream.seek(-1, 1)
            # read and append obj
            arr.append(readObject(stream, pdf))
        return arr
    readFromStream = staticmethod(readFromStream)
 class IndirectObject(PdfObject):
    def __init__(self, idnum, generation, pdf):
        self.idnum = idnum
        self.generation = generation
        self.pdf = pdf
    def getObject(self):
        return self.pdf.getObject(self).getObject()
    def __repr__(self):
        return "IndirectObject(%r, %r)" % (self.idnum, self.generation)
    def __eq__(self, other):
        return (
            other != None and
            isinstance(other, IndirectObject) and
            self.idnum == other.idnum and
            self.generation == other.generation and
            self.pdf is other.pdf
            )
    def __ne__(self, other):
        return not self.__eq__(other)
    def writeToStream(self, stream, encryption_key):
        stream.write("%s %s R" % (self.idnum, self.generation))
    def readFromStream(stream, pdf):
        idnum = ""
        while True:
            tok = stream.read(1)
            if tok.isspace():
                break
            idnum += tok
        generation = ""
        while True:
            tok = stream.read(1)
            if tok.isspace():
                break
            generation += tok
        r = stream.read(1)
        if r != "R":
            raise utils.PdfReadError("error reading indirect object reference")
        return IndirectObject(int(idnum), int(generation), pdf)
    readFromStream = staticmethod(readFromStream)
 class FloatObject(decimal.Decimal, PdfObject):
    def __new__(cls, value="0", context=None):
        return decimal.Decimal.__new__(cls, str(value), context)
    def __repr__(self):
        return str(self)
    def writeToStream(self, stream, encryption_key):
        stream.write(str(self))
 class NumberObject(int, PdfObject):
    def __init__(self, value):
        int.__init__(self, value)
    def writeToStream(self, stream, encryption_key):
        stream.write(repr(self))
    def readFromStream(stream):
        name = ""
        while True:
            tok = stream.read(1)
            if tok != '+' and tok != '-' and tok != '.' and not tok.isdigit():
                stream.seek(-1, 1)
                break
            name += tok
        if name.find(".") != -1:
            return FloatObject(name)
        else:
            return NumberObject(name)
    readFromStream = staticmethod(readFromStream)
 ##
 # Given a string (either a "str" or "unicode"), create a ByteStringObject or a
 # TextStringObject to represent the string.
 def createStringObject(string):
    if isinstance(string, unicode):
        return TextStringObject(string)
    elif isinstance(string, str):
        if string.startswith(codecs.BOM_UTF16_BE):
            retval = TextStringObject(string.decode("utf-16"))
            retval.autodetect_utf16 = True
            return retval
        else:
            # This is probably a big performance hit here, but we need to
            # convert string objects into the text/unicode-aware version if
            # possible... and the only way to check if that's possible is
            # to try.  Some strings are strings, some are just byte arrays.
            try:
                retval = TextStringObject(decode_pdfdocencoding(string))
                retval.autodetect_pdfdocencoding = True
                return retval
            except UnicodeDecodeError:
                return ByteStringObject(string)
    else:
        raise TypeError("createStringObject should have str or unicode arg")
 def readHexStringFromStream(stream):
    stream.read(1)
    txt = ""
    x = ""
    while True:
        tok = readNonWhitespace(stream)
        if tok == ">":
            break
        x += tok
        if len(x) == 2:
            txt += chr(int(x, base=16))
            x = ""
    if len(x) == 1:
        x += "0"
    if len(x) == 2:
        txt += chr(int(x, base=16))
    return createStringObject(txt)
 def readStringFromStream(stream):
    tok = stream.read(1)
    parens = 1
    txt = ""
    while True:
        tok = stream.read(1)
        if tok == "(":
            parens += 1
        elif tok == ")":
            parens -= 1
            if parens == 0:
                break
        elif tok == "\\":
            tok = stream.read(1)
            if tok == "n":
                tok = "\n"
            elif tok == "r":
                tok = "\r"
            elif tok == "t":
                tok = "\t"
            elif tok == "b":
                tok = "\b"
            elif tok == "f":
                tok = "\f"
            elif tok == "(":
                tok = "("
            elif tok == ")":
                tok = ")"
            elif tok == "\\":
                tok = "\\"
            elif tok.isdigit():
                tok += stream.read(2)
                tok = chr(int(tok, base=8))
            elif tok in "\n\r":
                # This case is  hit when a backslash followed by a line
                # break occurs.  If it's a multi-char EOL, consume the
                # second character:
                tok = stream.read(1)
                if not tok in "\n\r":
                    stream.seek(-1, 1)
                # Then don't add anything to the actual string, since this
                # line break was escaped:
                tok = ''
            else:
                raise utils.PdfReadError("Unexpected escaped string")
        txt += tok
    return createStringObject(txt)
 ##
 # Represents a string object where the text encoding could not be determined.
 # This occurs quite often, as the PDF spec doesn't provide an alternate way to
 # represent strings -- for example, the encryption data stored in files (like
 # /O) is clearly not text, but is still stored in a "String" object.
 class ByteStringObject(str, PdfObject):
    ##
    # For compatibility with TextStringObject.original_bytes.  This method
    # returns self.
    original_bytes = property(lambda self: self)
    def writeToStream(self, stream, encryption_key):
        bytearr = self
        if encryption_key:
            bytearr = RC4_encrypt(encryption_key, bytearr)
        stream.write("<")
        stream.write(bytearr.encode("hex"))
        stream.write(">")
 ##
 # Represents a string object that has been decoded into a real unicode string.
 # If read from a PDF document, this string appeared to match the
 # PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
 # occur.
 class TextStringObject(unicode, PdfObject):
    autodetect_pdfdocencoding = False
    autodetect_utf16 = False
    ##
    # It is occasionally possible that a text string object gets created where
    # a byte string object was expected due to the autodetection mechanism --
    # if that occurs, this "original_bytes" property can be used to
    # back-calculate what the original encoded bytes were.
    original_bytes = property(lambda self: self.get_original_bytes())
    def get_original_bytes(self):
        # We're a text string object, but the library is trying to get our raw
        # bytes.  This can happen if we auto-detected this string as text, but
        # we were wrong.  It's pretty common.  Return the original bytes that
        # would have been used to create this object, based upon the autodetect
        # method.
        if self.autodetect_utf16:
            return codecs.BOM_UTF16_BE + self.encode("utf-16be")
        elif self.autodetect_pdfdocencoding:
            return encode_pdfdocencoding(self)
        else:
            raise Exception("no information about original bytes")
    def writeToStream(self, stream, encryption_key):
        # Try to write the string out as a PDFDocEncoding encoded string.  It's
        # nicer to look at in the PDF file.  Sadly, we take a performance hit
        # here for trying...
        try:
            bytearr = encode_pdfdocencoding(self)
        except UnicodeEncodeError:
            bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
        if encryption_key:
            bytearr = RC4_encrypt(encryption_key, bytearr)
            obj = ByteStringObject(bytearr)
            obj.writeToStream(stream, None)
        else:
            stream.write("(")
            for c in bytearr:
                if not c.isalnum() and c != ' ':
                    stream.write("\\%03o" % ord(c))
                else:
                    stream.write(c)
            stream.write(")")
 class NameObject(str, PdfObject):
    delimiterCharacters = "(", ")", "<", ">", "[", "]", "{", "}", "/", "%"
    def __init__(self, data):
        str.__init__(self, data)
    def writeToStream(self, stream, encryption_key):
        stream.write(self)
    def readFromStream(stream):
        name = stream.read(1)
        if name != "/":
            raise utils.PdfReadError, "name read error"
        while True:
            tok = stream.read(1)
            if tok.isspace() or tok in NameObject.delimiterCharacters:
                stream.seek(-1, 1)
                break
            name += tok
        return NameObject(name)
    readFromStream = staticmethod(readFromStream)
 class DictionaryObject(dict, PdfObject):
    def __init__(self, *args, **kwargs):
        if len(args) == 0:
            self.update(kwargs)
        elif len(args) == 1:
            arr = args[0]
            # If we're passed a list/tuple, make a dict out of it
            if not hasattr(arr, "iteritems"):
                newarr = {}
                for k, v in arr:
                    newarr[k] = v
                arr = newarr
            self.update(arr)
        else:
            raise TypeError("dict expected at most 1 argument, got 3")
    def update(self, arr):
        # note, a ValueError halfway through copying values
        # will leave half the values in this dict.
        for k, v in arr.iteritems():
            self.__setitem__(k, v)
    def raw_get(self, key):
        return dict.__getitem__(self, key)
    def __setitem__(self, key, value):
        if not isinstance(key, PdfObject):
            raise ValueError("key must be PdfObject")
        if not isinstance(value, PdfObject):
            raise ValueError("value must be PdfObject")
        return dict.__setitem__(self, key, value)
    def setdefault(self, key, value=None):
        if not isinstance(key, PdfObject):
            raise ValueError("key must be PdfObject")
        if not isinstance(value, PdfObject):
            raise ValueError("value must be PdfObject")
        return dict.setdefault(self, key, value)
    def __getitem__(self, key):
        return dict.__getitem__(self, key).getObject()
    ##
    # Retrieves XMP (Extensible Metadata Platform) data relevant to the
    # this object, if available.
    # <p>
    # Stability: Added in v1.12, will exist for all future v1.x releases.
    # @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
    # that can be used to access XMP metadata from the document.  Can also
    # return None if no metadata was found on the document root.
    def getXmpMetadata(self):
        metadata = self.get("/Metadata", None)
        if metadata == None:
            return None
        metadata = metadata.getObject()
        import xmp
        if not isinstance(metadata, xmp.XmpInformation):
            metadata = xmp.XmpInformation(metadata)
            self[NameObject("/Metadata")] = metadata
        return metadata
    ##
    # Read-only property that accesses the {@link
    # #DictionaryObject.getXmpData getXmpData} function.
    # <p>
    # Stability: Added in v1.12, will exist for all future v1.x releases.
    xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
    def writeToStream(self, stream, encryption_key):
        stream.write("<<\n")
        for key, value in self.items():
            key.writeToStream(stream, encryption_key)
            stream.write(" ")
            value.writeToStream(stream, encryption_key)
            stream.write("\n")
        stream.write(">>")
    def readFromStream(stream, pdf):
        tmp = stream.read(2)
        if tmp != "<<":
            raise utils.PdfReadError, "dictionary read error"
        data = {}
        while True:
            tok = readNonWhitespace(stream)
            if tok == ">":
                stream.read(1)
                break
            stream.seek(-1, 1)
            key = readObject(stream, pdf)
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            value = readObject(stream, pdf)
            if data.has_key(key):
                # multiple definitions of key not permitted
                raise utils.PdfReadError, "multiple definitions in dictionary"
            data[key] = value
        pos = stream.tell()
        s = readNonWhitespace(stream)
        if s == 's' and stream.read(5) == 'tream':
            eol = stream.read(1)
            # odd PDF file output has spaces after 'stream' keyword but before EOL.
            # patch provided by Danial Sandler
            while eol == ' ':
                eol = stream.read(1)
            assert eol in ("\n", "\r")
            if eol == "\r":
                # read \n after
                stream.read(1)
            # this is a stream object, not a dictionary
            assert data.has_key("/Length")
            length = data["/Length"]
            if isinstance(length, IndirectObject):
                t = stream.tell()
                length = pdf.getObject(length)
                stream.seek(t, 0)
            data["__streamdata__"] = stream.read(length)
            e = readNonWhitespace(stream)
            ndstream = stream.read(8)
            if (e + ndstream) != "endstream":
                # (sigh) - the odd PDF file has a length that is too long, so
                # we need to read backwards to find the "endstream" ending.
                # ReportLab (unknown version) generates files with this bug,
                # and Python users into PDF files tend to be our audience.
                # we need to do this to correct the streamdata and chop off
                # an extra character.
                pos = stream.tell()
                stream.seek(-10, 1)
                end = stream.read(9)
                if end == "endstream":
                    # we found it by looking back one character further.
                    data["__streamdata__"] = data["__streamdata__"][:-1]
                else:
                    stream.seek(pos, 0)
                    raise utils.PdfReadError, "Unable to find 'endstream' marker after stream."
        else:
            stream.seek(pos, 0)
        if data.has_key("__streamdata__"):
            return StreamObject.initializeFromDictionary(data)
        else:
            retval = DictionaryObject()
            retval.update(data)
            return retval
    readFromStream = staticmethod(readFromStream)
 class StreamObject(DictionaryObject):
    def __init__(self):
        self._data = None
        self.decodedSelf = None
    def writeToStream(self, stream, encryption_key):
        self[NameObject("/Length")] = NumberObject(len(self._data))
        DictionaryObject.writeToStream(self, stream, encryption_key)
        del self["/Length"]
        stream.write("\nstream\n")
        data = self._data
        if encryption_key:
            data = RC4_encrypt(encryption_key, data)
        stream.write(data)
        stream.write("\nendstream")
    def initializeFromDictionary(data):
        if data.has_key("/Filter"):
            retval = EncodedStreamObject()
        else:
            retval = DecodedStreamObject()
        retval._data = data["__streamdata__"]
        del data["__streamdata__"]
        del data["/Length"]
        retval.update(data)
        return retval
    initializeFromDictionary = staticmethod(initializeFromDictionary)
    def flateEncode(self):
        if self.has_key("/Filter"):
            f = self["/Filter"]
            if isinstance(f, ArrayObject):
                f.insert(0, NameObject("/FlateDecode"))
            else:
                newf = ArrayObject()
                newf.append(NameObject("/FlateDecode"))
                newf.append(f)
                f = newf
        else:
            f = NameObject("/FlateDecode")
        retval = EncodedStreamObject()
        retval[NameObject("/Filter")] = f
        retval._data = filters.FlateDecode.encode(self._data)
        return retval
 class DecodedStreamObject(StreamObject):
    def getData(self):
        return self._data
    def setData(self, data):
        self._data = data
 class EncodedStreamObject(StreamObject):
    def __init__(self):
        self.decodedSelf = None
    def getData(self):
        if self.decodedSelf:
            # cached version of decoded object
            return self.decodedSelf.getData()
        else:
            # create decoded object
            decoded = DecodedStreamObject()
            decoded._data = filters.decodeStreamData(self)
            for key, value in self.items():
                if not key in ("/Length", "/Filter", "/DecodeParms"):
                    decoded[key] = value
            self.decodedSelf = decoded
            return decoded._data
    def setData(self, data):
        raise utils.PdfReadError, "Creating EncodedStreamObject is not currently supported"
 class RectangleObject(ArrayObject):
    def __init__(self, arr):
        # must have four points
        assert len(arr) == 4
        # automatically convert arr[x] into NumberObject(arr[x]) if necessary
        ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
    def ensureIsNumber(self, value):
        if not isinstance(value, (NumberObject, FloatObject)):
            value = FloatObject(value)
        return value
    def __repr__(self):
        return "RectangleObject(%s)" % repr(list(self))
    def getLowerLeft_x(self):
        return self[0]
    def getLowerLeft_y(self):
        return self[1]
    def getUpperRight_x(self):
        return self[2]
    def getUpperRight_y(self):
        return self[3]
    def getUpperLeft_x(self):
        return self.getLowerLeft_x()
    def getUpperLeft_y(self):
        return self.getUpperRight_y()
    def getLowerRight_x(self):
        return self.getUpperRight_x()
    def getLowerRight_y(self):
        return self.getLowerLeft_y()
    def getLowerLeft(self):
        return self.getLowerLeft_x(), self.getLowerLeft_y()
    def getLowerRight(self):
        return self.getLowerRight_x(), self.getLowerRight_y()
    def getUpperLeft(self):
        return self.getUpperLeft_x(), self.getUpperLeft_y()
    def getUpperRight(self):
        return self.getUpperRight_x(), self.getUpperRight_y()
    def setLowerLeft(self, value):
        self[0], self[1] = [self.ensureIsNumber(x) for x in value]
    def setLowerRight(self, value):
        self[2], self[1] = [self.ensureIsNumber(x) for x in value]
    def setUpperLeft(self, value):
        self[0], self[3] = [self.ensureIsNumber(x) for x in value]
    def setUpperRight(self, value):
        self[2], self[3] = [self.ensureIsNumber(x) for x in value]
    lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
    lowerRight = property(getLowerRight, setLowerRight, None, None)
    upperLeft = property(getUpperLeft, setUpperLeft, None, None)
    upperRight = property(getUpperRight, setUpperRight, None, None)
 def encode_pdfdocencoding(unicode_string):
    retval = ''
    for c in unicode_string:
        try:
            retval += chr(_pdfDocEncoding_rev[c])
        except KeyError:
            raise UnicodeEncodeError("pdfdocencoding", c, -1, -1,
                    "does not exist in translation table")
    return retval
 def decode_pdfdocencoding(byte_array):
    retval = u''
    for b in byte_array:
        c = _pdfDocEncoding[ord(b)]
        if c == u'\u0000':
            raise UnicodeDecodeError("pdfdocencoding", b, -1, -1,
                    "does not exist in translation table")
        retval += c
    return retval
 _pdfDocEncoding = (
  u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
  u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
  u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000',
  u'\u02d8', u'\u02c7', u'\u02c6', u'\u02d9', u'\u02dd', u'\u02db', u'\u02da', u'\u02dc',
  u'\u0020', u'\u0021', u'\u0022', u'\u0023', u'\u0024', u'\u0025', u'\u0026', u'\u0027',
  u'\u0028', u'\u0029', u'\u002a', u'\u002b', u'\u002c', u'\u002d', u'\u002e', u'\u002f',
  u'\u0030', u'\u0031', u'\u0032', u'\u0033', u'\u0034', u'\u0035', u'\u0036', u'\u0037',
  u'\u0038', u'\u0039', u'\u003a', u'\u003b', u'\u003c', u'\u003d', u'\u003e', u'\u003f',
  u'\u0040', u'\u0041', u'\u0042', u'\u0043', u'\u0044', u'\u0045', u'\u0046', u'\u0047',
  u'\u0048', u'\u0049', u'\u004a', u'\u004b', u'\u004c', u'\u004d', u'\u004e', u'\u004f',
  u'\u0050', u'\u0051', u'\u0052', u'\u0053', u'\u0054', u'\u0055', u'\u0056', u'\u0057',
  u'\u0058', u'\u0059', u'\u005a', u'\u005b', u'\u005c', u'\u005d', u'\u005e', u'\u005f',
  u'\u0060', u'\u0061', u'\u0062', u'\u0063', u'\u0064', u'\u0065', u'\u0066', u'\u0067',
  u'\u0068', u'\u0069', u'\u006a', u'\u006b', u'\u006c', u'\u006d', u'\u006e', u'\u006f',
  u'\u0070', u'\u0071', u'\u0072', u'\u0073', u'\u0074', u'\u0075', u'\u0076', u'\u0077',
  u'\u0078', u'\u0079', u'\u007a', u'\u007b', u'\u007c', u'\u007d', u'\u007e', u'\u0000',
  u'\u2022', u'\u2020', u'\u2021', u'\u2026', u'\u2014', u'\u2013', u'\u0192', u'\u2044',
  u'\u2039', u'\u203a', u'\u2212', u'\u2030', u'\u201e', u'\u201c', u'\u201d', u'\u2018',
  u'\u2019', u'\u201a', u'\u2122', u'\ufb01', u'\ufb02', u'\u0141', u'\u0152', u'\u0160',
  u'\u0178', u'\u017d', u'\u0131', u'\u0142', u'\u0153', u'\u0161', u'\u017e', u'\u0000',
  u'\u20ac', u'\u00a1', u'\u00a2', u'\u00a3', u'\u00a4', u'\u00a5', u'\u00a6', u'\u00a7',
  u'\u00a8', u'\u00a9', u'\u00aa', u'\u00ab', u'\u00ac', u'\u0000', u'\u00ae', u'\u00af',
  u'\u00b0', u'\u00b1', u'\u00b2', u'\u00b3', u'\u00b4', u'\u00b5', u'\u00b6', u'\u00b7',
  u'\u00b8', u'\u00b9', u'\u00ba', u'\u00bb', u'\u00bc', u'\u00bd', u'\u00be', u'\u00bf',
  u'\u00c0', u'\u00c1', u'\u00c2', u'\u00c3', u'\u00c4', u'\u00c5', u'\u00c6', u'\u00c7',
  u'\u00c8', u'\u00c9', u'\u00ca', u'\u00cb', u'\u00cc', u'\u00cd', u'\u00ce', u'\u00cf',
  u'\u00d0', u'\u00d1', u'\u00d2', u'\u00d3', u'\u00d4', u'\u00d5', u'\u00d6', u'\u00d7',
  u'\u00d8', u'\u00d9', u'\u00da', u'\u00db', u'\u00dc', u'\u00dd', u'\u00de', u'\u00df',
  u'\u00e0', u'\u00e1', u'\u00e2', u'\u00e3', u'\u00e4', u'\u00e5', u'\u00e6', u'\u00e7',
  u'\u00e8', u'\u00e9', u'\u00ea', u'\u00eb', u'\u00ec', u'\u00ed', u'\u00ee', u'\u00ef',
  u'\u00f0', u'\u00f1', u'\u00f2', u'\u00f3', u'\u00f4', u'\u00f5', u'\u00f6', u'\u00f7',
  u'\u00f8', u'\u00f9', u'\u00fa', u'\u00fb', u'\u00fc', u'\u00fd', u'\u00fe', u'\u00ff'
 )
 assert len(_pdfDocEncoding) == 256
 _pdfDocEncoding_rev = {}
 for i in xrange(256):
    char = _pdfDocEncoding[i]
    if char == u"\u0000":
        continue
    assert char not in _pdfDocEncoding_rev
    _pdfDocEncoding_rev[char] = i
--- a/src/pyPdf/pdf.py
+++ b/src/pyPdf/pdf.py
--- a/src/pyPdf/utils.py
+++ b/src/pyPdf/utils.py
@ -1,110 +0,0 @@
 # vim: sw=4:expandtab:foldmethod=marker
 #
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 """
 Utility functions for PDF library.
 """
 __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 #ENABLE_PSYCO = False
 #if ENABLE_PSYCO:
 #    try:
 #        import psyco
 #    except ImportError:
 #        ENABLE_PSYCO = False
 #
 #if not ENABLE_PSYCO:
 #    class psyco:
 #        def proxy(func):
 #            return func
 #        proxy = staticmethod(proxy)
 def readUntilWhitespace(stream, maxchars=None):
    txt = ""
    while True:
        tok = stream.read(1)
        if tok.isspace() or not tok:
            break
        txt += tok
        if len(txt) == maxchars:
            break
    return txt
 def readNonWhitespace(stream):
    tok = ' '
    while tok == '\n' or tok == '\r' or tok == ' ' or tok == '\t':
        tok = stream.read(1)
    return tok
 class ConvertFunctionsToVirtualList(object):
    def __init__(self, lengthFunction, getFunction):
        self.lengthFunction = lengthFunction
        self.getFunction = getFunction
    def __len__(self):
        return self.lengthFunction()
    def __getitem__(self, index):
        if not isinstance(index, int):
            raise TypeError, "sequence indices must be integers"
        len_self = len(self)
        if index < 0:
            # support negative indexes
            index = len_self + index
        if index < 0 or index >= len_self:
            raise IndexError, "sequence index out of range"
        return self.getFunction(index)
 def RC4_encrypt(key, plaintext):
    S = [i for i in range(256)]
    j = 0
    for i in range(256):
        j = (j + S[i] + ord(key[i % len(key)])) % 256
        S[i], S[j] = S[j], S[i]
    i, j = 0, 0
    retval = ""
    for x in range(len(plaintext)):
        i = (i + 1) % 256
        j = (j + S[i]) % 256
        S[i], S[j] = S[j], S[i]
        t = S[(S[i] + S[j]) % 256]
        retval += chr(ord(plaintext[x]) ^ t)
    return retval
 class PdfReadError(Exception):
    pass
 if __name__ == "__main__":
    # test RC4
    out = RC4_encrypt("Key", "Plaintext")
    print repr(out)
    pt = RC4_encrypt("Key", out)
    print repr(pt)
--- a/src/pyPdf/xmp.py
+++ b/src/pyPdf/xmp.py
@ -1,355 +0,0 @@
 import re
 import datetime
 import decimal
 from generic import PdfObject
 from xml.dom import getDOMImplementation
 from xml.dom.minidom import parseString
 RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
 XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
 PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
 XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
 # What is the PDFX namespace, you might ask?  I might ask that too.  It's
 # a completely undocumented namespace used to place "custom metadata"
 # properties, which are arbitrary metadata properties with no semantic or
 # documented meaning.  Elements in the namespace are key/value-style storage,
 # where the element name is the key and the content is the value.  The keys
 # are transformed into valid XML identifiers by substituting an invalid
 # identifier character with \u2182 followed by the unicode hex ID of the
 # original character.  A key like "my car" is therefore "my\u21820020car".
 #
 # \u2182, in case you're wondering, is the unicode character
 # \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
 # escaping characters.
 #
 # Intentional users of the pdfx namespace should be shot on sight.  A
 # custom data schema and sensical XML elements could be used instead, as is
 # suggested by Adobe's own documentation on XMP (under "Extensibility of
 # Schemas").
 #
 # Information presented here on the /pdfx/ schema is a result of limited
 # reverse engineering, and does not constitute a full specification.
 PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
 iso8601 = re.compile("""
        (?P<year>[0-9]{4})
        (-
            (?P<month>[0-9]{2})
            (-
                (?P<day>[0-9]+)
                (T
                    (?P<hour>[0-9]{2}):
                    (?P<minute>[0-9]{2})
                    (:(?P<second>[0-9]{2}(.[0-9]+)?))?
                    (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
                )?
            )?
        )?
        """, re.VERBOSE)
 ##
 # An object that represents Adobe XMP metadata.
 class XmpInformation(PdfObject):
    def __init__(self, stream):
        self.stream = stream
        docRoot = parseString(self.stream.getData())
        self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
        self.cache = {}
    def writeToStream(self, stream, encryption_key):
        self.stream.writeToStream(stream, encryption_key)
    def getElement(self, aboutUri, namespace, name):
        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
                attr = desc.getAttributeNodeNS(namespace, name)
                if attr != None:
                    yield attr
                for element in desc.getElementsByTagNameNS(namespace, name):
                    yield element
    def getNodesInNamespace(self, aboutUri, namespace):
        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
                for i in range(desc.attributes.length):
                    attr = desc.attributes.item(i)
                    if attr.namespaceURI == namespace:
                        yield attr
                for child in desc.childNodes:
                    if child.namespaceURI == namespace:
                        yield child
    def _getText(self, element):
        text = ""
        for child in element.childNodes:
            if child.nodeType == child.TEXT_NODE:
                text += child.data
        return text
    def _converter_string(value):
        return value
    def _converter_date(value):
        m = iso8601.match(value)
        year = int(m.group("year"))
        month = int(m.group("month") or "1")
        day = int(m.group("day") or "1")
        hour = int(m.group("hour") or "0")
        minute = int(m.group("minute") or "0")
        second = decimal.Decimal(m.group("second") or "0")
        seconds = second.to_integral(decimal.ROUND_FLOOR)
        milliseconds = (second - seconds) * 1000000
        tzd = m.group("tzd") or "Z"
        dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
        if tzd != "Z":
            tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
            tzd_hours *= -1
            if tzd_hours < 0:
                tzd_minutes *= -1
            dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
        return dt
    _test_converter_date = staticmethod(_converter_date)
    def _getter_bag(namespace, name, converter):
        def get(self):
            cached = self.cache.get(namespace, {}).get(name)
            if cached:
                return cached
            retval = []
            for element in self.getElement("", namespace, name):
                bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
                if len(bags):
                    for bag in bags:
                        for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
                            value = self._getText(item)
                            value = converter(value)
                            retval.append(value)
            ns_cache = self.cache.setdefault(namespace, {})
            ns_cache[name] = retval
            return retval
        return get
    def _getter_seq(namespace, name, converter):
        def get(self):
            cached = self.cache.get(namespace, {}).get(name)
            if cached:
                return cached
            retval = []
            for element in self.getElement("", namespace, name):
                seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
                if len(seqs):
                    for seq in seqs:
                        for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
                            value = self._getText(item)
                            value = converter(value)
                            retval.append(value)
                else:
                    value = converter(self._getText(element))
                    retval.append(value)
            ns_cache = self.cache.setdefault(namespace, {})
            ns_cache[name] = retval
            return retval
        return get
    def _getter_langalt(namespace, name, converter):
        def get(self):
            cached = self.cache.get(namespace, {}).get(name)
            if cached:
                return cached
            retval = {}
            for element in self.getElement("", namespace, name):
                alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
                if len(alts):
                    for alt in alts:
                        for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
                            value = self._getText(item)
                            value = converter(value)
                            retval[item.getAttribute("xml:lang")] = value
                else:
                    retval["x-default"] = converter(self._getText(element))
            ns_cache = self.cache.setdefault(namespace, {})
            ns_cache[name] = retval
            return retval
        return get
    def _getter_single(namespace, name, converter):
        def get(self):
            cached = self.cache.get(namespace, {}).get(name)
            if cached:
                return cached
            value = None
            for element in self.getElement("", namespace, name):
                if element.nodeType == element.ATTRIBUTE_NODE:
                    value = element.nodeValue
                else:
                    value = self._getText(element)
                break
            if value != None:
                value = converter(value)
            ns_cache = self.cache.setdefault(namespace, {})
            ns_cache[name] = value
            return value
        return get
    ##
    # Contributors to the resource (other than the authors).  An unsorted
    # array of names.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
    ##
    # Text describing the extent or scope of the resource.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
    ##
    # A sorted array of names of the authors of the resource, listed in order
    # of precedence.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
    ##
    # A sorted array of dates (datetime.datetime instances) of signifigance to
    # the resource.  The dates and times are in UTC.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
    ##
    # A language-keyed dictionary of textual descriptions of the content of the
    # resource.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
    ##
    # The mime-type of the resource.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
    ##
    # Unique identifier of the resource.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
    ##
    # An unordered array specifying the languages used in the resource.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
    ##
    # An unordered array of publisher names.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
    ##
    # An unordered array of text descriptions of relationships to other
    # documents.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
    ##
    # A language-keyed dictionary of textual descriptions of the rights the
    # user has to this resource.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
    ##
    # Unique identifier of the work from which this resource was derived.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
    ##
    # An unordered array of descriptive phrases or keywrods that specify the
    # topic of the content of the resource.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
    ##
    # A language-keyed dictionary of the title of the resource.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
    ##
    # An unordered array of textual descriptions of the document type.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
    ##
    # An unformatted text string representing document keywords.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
    ##
    # The PDF file version, for example 1.0, 1.3.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
    ##
    # The name of the tool that created the PDF document.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
    ##
    # The date and time the resource was originally created.  The date and
    # time are returned as a UTC datetime.datetime object.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
    ##
    # The date and time the resource was last modified.  The date and time
    # are returned as a UTC datetime.datetime object.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
    ##
    # The date and time that any metadata for this resource was last
    # changed.  The date and time are returned as a UTC datetime.datetime
    # object.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
    ##
    # The name of the first known tool used to create the resource.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
    ##
    # The common identifier for all versions and renditions of this resource.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
    ##
    # An identifier for a specific incarnation of a document, updated each
    # time a file is saved.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
    def custom_properties(self):
        if not hasattr(self, "_custom_properties"):
            self._custom_properties = {}
            for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
                key = node.localName
                while True:
                    # see documentation about PDFX_NAMESPACE earlier in file
                    idx = key.find(u"\u2182")
                    if idx == -1:
                        break
                    key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
                if node.nodeType == node.ATTRIBUTE_NODE:
                    value = node.nodeValue
                else:
                    value = self._getText(node)
                self._custom_properties[key] = value
        return self._custom_properties
    ##
    # Retrieves custom metadata properties defined in the undocumented pdfx
    # metadata schema.
    # <p>Stability: Added in v1.12, will exist for all future v1.x releases.
    # @return Returns a dictionary of key/value items for custom metadata
    # properties.
    custom_properties = property(custom_properties)
		`@ -1,2 +0,0 @@`
			`from pdf import PdfFileReader, PdfFileWriter`
			`__all__ = ["pdf"]`