pdfmanipulate moved to new command line option framework

2025-07-09 03:04:10 -04:00 · 2009-04-03 21:06:31 -04:00 · 2009-04-03 21:06:31 -04:00 · 7f5a619ad9
commit 7f5a619ad9
parent 64683d4bc3
10 changed files with 368 additions and 225 deletions
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -1,3 +1,4 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
--- a/src/calibre/ebooks/pdf/manipulate/init.py
+++ b/src/calibre/ebooks/pdf/manipulate/init.py
--- a/src/calibre/ebooks/pdf/manipulate/cli.py
+++ b/src/calibre/ebooks/pdf/manipulate/cli.py
@ -1,38 +1,31 @@
 '''
 Command line interface to run pdf manipulation commands.
 '''
 from __future__ import with_statement
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Command line interface to run pdf manipulation commands.
 '''
 import string, sys
-from calibre.utils.config import Config, StringConfig
+from calibre.utils.config import OptionParser
-from calibre.ebooks.pdf.manipulate import info, merge, reverse, split, trim
+from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ebooks.pdf.manipulate import crop, info, merge, reverse, split
 COMMANDS = {
             'crop'    : crop,
             'info'    : info,
             'merge'   : merge,
             'reverse' : reverse,
             'split'   : split,
             'trim'    : trim,
           }
-def config(defaults=None):
+USAGE = '%prog ' + _('''command ...
    desc = _('Options to control the transformation of pdf')
    if defaults is None:
        c = Config('manipulatepdf', desc)
    else:
        c = StringConfig(defaults, desc)
    return c
 def option_parser():
    c = config()
    return c.option_parser(usage=_('''\
 	%prog command ...
 command can be one of the following:
 [%%commands]
@ -40,15 +33,23 @@ def option_parser():
 Use %prog command --help to get more information about a specific command
 Manipulate a PDF.
-	'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))))
+'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', ')))
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser():
    return OptionParser(usage=USAGE)
 def main(args=sys.argv):
    log = Log()
    parser = option_parser()
    if len(args) < 2:
        print 'Error: No command sepecified.\n'
-        print parser.get_usage()
+        print_help(parser, log)
-        return 2
+        return 1
    command = args[1].lower().strip()
@ -58,12 +59,11 @@ def main(args=sys.argv):
    else:
        parser.parse_args(args)
        print 'Unknown command %s.\n' % command
-        print parser.get_usage()
+        print_help(parser, log)
-        return 2
+        return 1
    # We should never get here.
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/crop.py
+++ b/src/calibre/ebooks/pdf/manipulate/crop.py
@ -0,0 +1,155 @@
 # -*- coding: utf-8 -*-
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, James Beal <james_@catbus.co.uk>, ' \
                '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Crop a pdf file
 '''
 import os, sys, re
 from optparse import OptionGroup, Option
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from pyPdf import PdfFileWriter, PdfFileReader
 DEFAULT_CROP = '10'
 USAGE = '%prog %%name ' + _('''
 [options] file.pdf
 Crop a PDF file.
 ''')
 OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='cropped.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
    OptionRecommendation(name='bottom_left_x', recommended_value=DEFAULT_CROP,
        level=OptionRecommendation.LOW, long_switch='leftx', short_switch='x',
        help=_('Number of pixels to crop from the left most x (default is %s) ' % DEFAULT_CROP)),
    OptionRecommendation(name='bottom_left_y', recommended_value=DEFAULT_CROP,
        level=OptionRecommendation.LOW, long_switch='lefty', short_switch='y',
        help=_('Number of pixels to crop from the left most y (default is %s) ' % DEFAULT_CROP)),
    OptionRecommendation(name='top_right_x', recommended_value=DEFAULT_CROP,
        level=OptionRecommendation.LOW, long_switch='rightx', short_switch='v',
        help=_('Number of pixels to crop from the right most x (default is %s) ' % DEFAULT_CROP)),
    OptionRecommendation(name='top_right_y', recommended_value=DEFAULT_CROP,
        level=OptionRecommendation.LOW, long_switch='right y', short_switch='w',
        help=_('Number of pixels to crop from the right most y (default is %s)' % DEFAULT_CROP)),
    OptionRecommendation(name='bounding', recommended_value=None,
        level=OptionRecommendation.LOW, long_switch='bounding', short_switch='b',
        help=_('A file generated by ghostscript which allows each page to be individually cropped `gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox file.pdf 2> bounding`')),
 ])
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def option_recommendation_to_cli_option(add_option, rec):
    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Crop Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def crop_pdf(pdf_path, opts, metadata=None):
    if metadata == None:
        title = _('Unknown')
        author = _('Unknown')
    else:
        title = metadata.title
        author = authors_to_string(metadata.authors)
    input_pdf = PdfFileReader(open(pdf_path, 'rb'))
    bounding_lines = []
    if opts.bounding != None:
        try:
            bounding = open(opts.bounding , 'r')
            bounding_regex = re.compile('%%BoundingBox: (?P<bottom_x>\d+) (?P<bottom_y>\d+) (?P<top_x>\d+) (?P<top_y>\d+)')
        except:
            raise Exception('Error reading %s' % opts.bounding)
        lines = bounding.readlines()
        for line in lines:
            if line.startswith('%%BoundingBox:'):
                bounding_lines.append(line)
        if len(bounding_lines) != input_pdf.numPages:
            raise Exception('Error bounding file %s page count does not correspond to specified pdf' % opts.bounding)    
    output_pdf = PdfFileWriter(title=title,author=author)
    blines = iter(bounding_lines)
    for page in input_pdf.pages:
        if bounding_lines != []:
            mo = bounding_regex.search(blines.next())
            if mo == None:
                raise Exception('Error in bounding file %s' % opts.bounding)
            page.mediaBox.upperRight = (mo.group('top_x'), mo.group('top_y'))
            page.mediaBox.lowerLeft  = (mo.group('bottom_x'), mo.group('bottom_y'))
        else:
            page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x() - opts.top_right_x, page.bleedBox.getUpperRight_y() - opts.top_right_y)
            page.mediaBox.lowerLeft  = (page.bleedBox.getLowerLeft_x() + opts.bottom_left_x, page.bleedBox.getLowerLeft_y() + opts.bottom_left_y)
        output_pdf.addPage(page)
    with open(opts.output, 'wb') as output_file:
        output_pdf.write(output_file)
 # Return True if the pdf is valid.
 def valid_pdf(pdf_path):
    try:
        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
            pdf = PdfFileReader(pdf_file)
            if pdf.isEncrypted or pdf.numPages <= 0:
                raise Exception
    except:
        return False
    return True
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 1:
        print 'Error: A PDF file is required.\n'
        print_help(parser, log)
        return 1
    if not valid_pdf(args[0]):
        print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
        return 1
    mi = metadata_from_formats([args[0]])
    crop_pdf(args[0], opts, mi)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/info.py
+++ b/src/calibre/ebooks/pdf/manipulate/info.py
@ -1,34 +1,37 @@
 '''
 Merge PDF files into a single PDF document.
 '''
 from __future__ import with_statement
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import os, re, sys, time
+'''
 Merge PDF files into a single PDF document.
 '''
-from calibre.utils.config import Config, StringConfig
+import os, re, sys, time
 from optparse import OptionGroup, Option
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from pyPdf import PdfFileWriter, PdfFileReader
-
+USAGE = '%prog %%name ' + _('''
-def config(defaults=None):
+file.pdf ...
    desc = _('Options to control the transformation of pdf')
    if defaults is None:
        c = Config('manipulatepdf', desc)
    else:
        c = StringConfig(defaults, desc)
    return c
 def option_parser(name):
    c = config()
    return c.option_parser(usage=_('''\
 	%prog %%name [options] file.pdf ...
 Get info about a PDF.
-	'''.replace('%%name', name)))
+''')
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def print_info(pdf_path):
    with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
@ -65,20 +68,22 @@ def verify_files(files):
    return invalid
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 1:
        print 'Error: No PDF sepecified.\n'
-        print parser.get_usage()
+        print_help(parser, log)
-        return 2
+        return 1
    bad_pdfs = verify_files(args)
    if bad_pdfs != []:
        for pdf in bad_pdfs:
            print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
-        return 2
+        return 1
    for pdf in args:
        print_info(pdf)
@ -87,4 +92,3 @@ def main(args=sys.argv, name=''):
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/merge.py
+++ b/src/calibre/ebooks/pdf/manipulate/merge.py
@ -1,37 +1,63 @@
 '''
 Merge PDF files into a single PDF document.
 '''
 from __future__ import with_statement
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Merge PDF files into a single PDF document.
 '''
 import os, sys
 from optparse import OptionGroup, Option
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
-from calibre.utils.config import Config, StringConfig
+from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from pyPdf import PdfFileWriter, PdfFileReader
-def config(defaults=None):
+USAGE = '%prog %%name ' + _('''
-    desc = _('Options to control the transformation of pdf')
+[options] file1.pdf file2.pdf ...
-    if defaults is None:
+
-        c = Config('mergepdf', desc)
+Metadata will be used from the first PDF specified.
-    else:
+
-        c = StringConfig(defaults, desc)
+Merges individual PDFs.
-    c.add_opt('output', ['-o', '--output'], default='merged.pdf',
+''')
-          help=_('Path to output file. By default a file is created in the current directory.'))
+
-    return c
+OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='merged.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
 ])
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
-    c = config()
+    usage = USAGE.replace('%%name', name)
-    return c.option_parser(usage=_('''\
+    return OptionParser(usage=usage)
 	%prog %%name [options] file1.pdf file2.pdf ...
-	Merges individual PDFs. Metadata will be used from the first PDF specified.
+def option_recommendation_to_cli_option(add_option, rec):
-	'''.replace('%%name', name)))
+    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Merge Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def merge_files(in_paths, out_path, metadata=None):
    if metadata == None:
@ -65,20 +91,23 @@ def verify_files(files):
    return invalid
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 2:
-        print 'Error: Two or more PDF files are required.\n\n'
+        print 'Error: Two or more PDF files are required.\n'
-        print parser.get_usage()
+        print_help(parser, log)
-        return 2
+        return 1
    bad_pdfs = verify_files(args)
    if bad_pdfs != []:
        for pdf in bad_pdfs:
            print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
-        return 2
+        return 1
    mi = metadata_from_formats([args[0]])
@ -88,4 +117,3 @@ def main(args=sys.argv, name=''):
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/reverse.py
+++ b/src/calibre/ebooks/pdf/manipulate/reverse.py
@ -10,30 +10,52 @@ Reverse content of PDF.
 '''
 import os, sys
 from optparse import OptionGroup, Option
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
-from calibre.utils.config import Config, StringConfig
+from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from pyPdf import PdfFileWriter, PdfFileReader
-def config(defaults=None):
+USAGE = '%prog %%name ' + _('''
-    desc = _('Options to control the transformation of pdf')
+[options] file.pdf
    if defaults is None:
        c = Config('reversepdf', desc)
    else:
        c = StringConfig(defaults, desc)
    c.add_opt('output', ['-o', '--output'], default='reversed.pdf',
          help=_('Path to output file. By default a file is created in the current directory.'))
    return c
 def option_parser(name):
    c = config()
    return c.option_parser(usage=_('''\
 	%prog %%name [options] file1.pdf
 Reverse PDF.
-	'''.replace('%%name', name)))
+''')
 OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='reversed.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
 ])
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def option_recommendation_to_cli_option(add_option, rec):
    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Reverse Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def reverse(pdf_path, out_path, metadata=None):
    if metadata == None:
@ -63,20 +85,22 @@ def valid_pdf(pdf_path):
        return False
    return True
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 1:
-        print 'Error: A PDF file is required.\n\n'
+        print 'Error: A PDF file is required.\n'
-        print parser.get_usage()
+        print_help(parser, log)
-        return 2
+        return 1
    if not valid_pdf(args[0]):
        print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
-        return 2
+        return 1
    mi = metadata_from_formats([args[0]])
--- a/src/calibre/ebooks/pdf/manipulate/split.py
+++ b/src/calibre/ebooks/pdf/manipulate/split.py
@ -1,35 +1,27 @@
-'''
+# -*- coding: utf-8 -*-
 Split PDF file into multiple PDF documents.
 '''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Split PDF file into multiple PDF documents.
 '''
 import os, sys, re
 from optparse import OptionGroup, Option
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
-from calibre.utils.config import Config, StringConfig
+from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from pyPdf import PdfFileWriter, PdfFileReader
-def config(defaults=None):
+USAGE = _('''
    desc = _('Options to control the transformation of pdf')
    if defaults is None:
        c = Config('splitpdf', desc)
    else:
        c = StringConfig(defaults, desc)
    c.add_opt('output', ['-o', '--output'], default='split.pdf',
          help=_('Path to output file. By default a file is created in the current directory. \
            The file name will be the base name for the output.'))
    return c
 def option_parser(name):
    c = config()
    return c.option_parser(usage=_('''\
 %prog %%name [options] file.pdf page_to_split_on ...
 %prog %%name [options] file.pdf page_range_to_split_on ...
@ -40,7 +32,37 @@ def option_parser(name):
 %prog %%name file.pdf 6-12 8 10 9-20
 Split a PDF.
-	'''.replace('%%name', name)))
+''')
 OPTIONS = set([
    OptionRecommendation(name='output', recommended_value='split.pdf',
        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
        help=_('Path to output file. By default a file is created in the current directory.')),
 ])
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
 def option_parser(name):
    usage = USAGE.replace('%%name', name)
    return OptionParser(usage=usage)
 def option_recommendation_to_cli_option(add_option, rec):
    opt = rec.option
    switches = ['-'+opt.short_switch] if opt.short_switch else []
    switches.append('--'+opt.long_switch)
    attrs = dict(dest=opt.name, help=opt.help,
                     choices=opt.choices, default=rec.recommended_value)
    add_option(Option(*switches, **attrs))
 def add_options(parser):
    group = OptionGroup(parser, _('Split Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)
 def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
    pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
@ -153,25 +175,28 @@ def valid_pdf(pdf_path):
    return True
 def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
    opts, args = parser.parse_args(args)
    pdf, pages, page_ranges, unknown = split_args(args[1:])
    if pdf == '' and (pages == [] or page_ranges == []):
-        print 'Error: PDF and where to split is required.\n\n'
+        print 'Error: PDF and where to split is required.\n'
-        print parser.get_usage()
+        print_help(parser, log)
-        return 2
+        return 1
    if unknown != []:
        for arg in unknown:
            print 'Error: Unknown argument `%s`' % arg
-        print parser.get_usage()
+        print_help(parser, log)
-        return 2
+        return 1
    if not valid_pdf(pdf):
        print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
-        return 2
+        return 1
    pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
@ -183,4 +208,3 @@ def main(args=sys.argv, name=''):
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate/trim.py
+++ b/src/calibre/ebooks/pdf/manipulate/trim.py
@ -1,93 +0,0 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, James Beal, james_@catbus.co.uk'
 __docformat__ = 'restructuredtext en'
 'crop a pdf file'
 import os, sys, re
 from calibre.utils.config import Config, StringConfig
 from pyPdf import PdfFileWriter, PdfFileReader
 def config(defaults=None):
    desc = _('Options to control the transformation of pdf')
    default_crop=10
    if defaults is None:
        c = Config('trimpdf', desc)
    else:
        c = StringConfig(defaults, desc)
    c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
          help=_('Path to output file. By default a file is created in the current directory.'))
    c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
          help=_('Number of pixels to crop from the left most x (default is %d) ')%default_crop )
    c.add_opt('bottom_left_y', [ '-y', '--lefty'], default=default_crop,
          help=_('Number of pixels to crop from the left most y (default is %d) ')%default_crop )
    c.add_opt('top_right_x', [ '-v', '--rightx'], default=default_crop,
          help=_('Number of pixels to crop from the right most x (default is %d) ')%default_crop )
    c.add_opt('top_right_y', [ '-w', '--righty'], default=default_crop,
          help=_('Number of pixels to crop from the right most y (default is %d)')%default_crop )
    c.add_opt('bounding', ['-b', '--bounding'],
          help=_('A file generated by ghostscript which allows each page to be individually cropped [gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox > bounding] '))
    return c
 def option_parser(name):
    c = config()
    return c.option_parser(usage=_('''\
 	%prog %%name [options] file.pdf
 	Crops a pdf. 
 	'''.replace('%%name', name)))
 def main(args=sys.argv, name=''):
    parser = option_parser(name)
    opts, args = parser.parse_args(args)
    try:
        source = os.path.abspath(args[1])
        input_pdf = PdfFileReader(file(source, "rb"))
    except:
        print "Unable to read input"
        return 2
    title   = _('Unknown')
    author  = _('Unknown')
    try:
        info = input_pdf.getDocumentInfo()
        if info.title:
            title   = info.title
        if info.author:
            author  = info.author
    except:
        pass
    if opts.bounding != None:
        try:
            bounding = open( opts.bounding , 'r' )
            bounding_regex= re.compile('%%BoundingBox: (?P<bottom_x>[0-9]+) (?P<bottom_y>[0-9]+) (?P<top_x>[0-9]+) (?P<top_y>[0-9]+)')
        except:
            print 'Error opening %s' % opts.bounding 
            return 1
    output_pdf = PdfFileWriter(title=title,author=author)
    for page_number in range (0, input_pdf.getNumPages() ):
        page = input_pdf.getPage(page_number)
        if opts.bounding != None:
            while True:
                line=bounding.readline()
                match=bounding_regex.search(line)
                if match !=None:
                    break
            page.mediaBox.upperRight = (match.group('top_x'),match.group('top_y'))
            page.mediaBox.lowerLeft  = (match.group('bottom_x'),match.group('bottom_y'))
        else:
            page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x()-opts.top_right_x,page.bleedBox.getUpperRight_y()-opts.top_right_y)
            page.mediaBox.lowerLeft  = (page.bleedBox.getLowerLeft_x()+opts.bottom_left_x,page.bleedBox.getLowerLeft_y()+opts.bottom_left_y)
        output_pdf.addPage(page)
    if opts.bounding != None:
        bounding.close()
    output_file = file(opts.output, "wb")
    output_pdf.write(output_file)
    output_file.close()
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@ -2,8 +2,8 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> \
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, ' \
-                 2009, John Schember <john@nachtimwald.com>'
+                '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import errno, os, sys, subprocess