From 70e3ea15bba8c1143508b435b3871cdb6b2cebee Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 2 Apr 2009 20:55:46 -0400
Subject: [PATCH 1/3] move pdf manipulate into its own module

---
 src/calibre/ebooks/pdf/{manipulate.py => manipulate/cli.py} | 2 +-
 src/calibre/ebooks/pdf/{ => manipulate}/info.py             | 0
 src/calibre/ebooks/pdf/{ => manipulate}/merge.py            | 0
 src/calibre/ebooks/pdf/{ => manipulate}/reverse.py          | 0
 src/calibre/ebooks/pdf/{ => manipulate}/split.py            | 0
 src/calibre/ebooks/pdf/{ => manipulate}/trim.py             | 0
 src/calibre/linux.py                                        | 2 +-
 7 files changed, 2 insertions(+), 2 deletions(-)
 rename src/calibre/ebooks/pdf/{manipulate.py => manipulate/cli.py} (95%)
 rename src/calibre/ebooks/pdf/{ => manipulate}/info.py (100%)
 rename src/calibre/ebooks/pdf/{ => manipulate}/merge.py (100%)
 rename src/calibre/ebooks/pdf/{ => manipulate}/reverse.py (100%)
 rename src/calibre/ebooks/pdf/{ => manipulate}/split.py (100%)
 rename src/calibre/ebooks/pdf/{ => manipulate}/trim.py (100%)

diff --git a/src/calibre/ebooks/pdf/manipulate.py b/src/calibre/ebooks/pdf/manipulate/cli.py
similarity index 95%
rename from src/calibre/ebooks/pdf/manipulate.py
rename to src/calibre/ebooks/pdf/manipulate/cli.py
index 8c49650730..e82946f2ea 100644
--- a/src/calibre/ebooks/pdf/manipulate.py
+++ b/src/calibre/ebooks/pdf/manipulate/cli.py
@@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
 import string, sys
 
 from calibre.utils.config import Config, StringConfig
-from calibre.ebooks.pdf import info, merge, reverse, split, trim
+from calibre.ebooks.pdf.manipulate import info, merge, reverse, split, trim
 
 COMMANDS = {
              'info'    : info,
diff --git a/src/calibre/ebooks/pdf/info.py b/src/calibre/ebooks/pdf/manipulate/info.py
similarity index 100%
rename from src/calibre/ebooks/pdf/info.py
rename to src/calibre/ebooks/pdf/manipulate/info.py
diff --git a/src/calibre/ebooks/pdf/merge.py b/src/calibre/ebooks/pdf/manipulate/merge.py
similarity index 100%
rename from src/calibre/ebooks/pdf/merge.py
rename to src/calibre/ebooks/pdf/manipulate/merge.py
diff --git a/src/calibre/ebooks/pdf/reverse.py b/src/calibre/ebooks/pdf/manipulate/reverse.py
similarity index 100%
rename from src/calibre/ebooks/pdf/reverse.py
rename to src/calibre/ebooks/pdf/manipulate/reverse.py
diff --git a/src/calibre/ebooks/pdf/split.py b/src/calibre/ebooks/pdf/manipulate/split.py
similarity index 100%
rename from src/calibre/ebooks/pdf/split.py
rename to src/calibre/ebooks/pdf/manipulate/split.py
diff --git a/src/calibre/ebooks/pdf/trim.py b/src/calibre/ebooks/pdf/manipulate/trim.py
similarity index 100%
rename from src/calibre/ebooks/pdf/trim.py
rename to src/calibre/ebooks/pdf/manipulate/trim.py
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index b680ecc304..592a25c170 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -39,7 +39,7 @@ entry_points = {
              'calibre-fontconfig = calibre.utils.fontconfig:main',
              'calibre-parallel   = calibre.parallel:main',
              'calibre-customize  = calibre.customize.ui:main',
-             'pdfmanipulate      = calibre.ebooks.pdf.manipulate:main',
+             'pdfmanipulate      = calibre.ebooks.pdf.manipulate.cli:main',
              'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
              'calibre-smtp = calibre.utils.smtp:main',
         ],

From 7f5a619ad9e65cdf64b3d2a825d4516575c0e7c6 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 3 Apr 2009 21:06:31 -0400
Subject: [PATCH 2/3] pdfmanipulate moved to new command line option framework

---
 src/calibre/ebooks/__init__.py                |   1 +
 src/calibre/ebooks/pdf/manipulate/__init__.py |   0
 src/calibre/ebooks/pdf/manipulate/cli.py      |  62 +++----
 src/calibre/ebooks/pdf/manipulate/crop.py     | 155 ++++++++++++++++++
 src/calibre/ebooks/pdf/manipulate/info.py     |  48 +++---
 src/calibre/ebooks/pdf/manipulate/merge.py    |  74 ++++++---
 src/calibre/ebooks/pdf/manipulate/reverse.py  |  64 +++++---
 src/calibre/ebooks/pdf/manipulate/split.py    |  92 +++++++----
 src/calibre/ebooks/pdf/manipulate/trim.py     |  93 -----------
 src/calibre/ebooks/pdf/pdftohtml.py           |   4 +-
 10 files changed, 368 insertions(+), 225 deletions(-)
 create mode 100644 src/calibre/ebooks/pdf/manipulate/__init__.py
 create mode 100644 src/calibre/ebooks/pdf/manipulate/crop.py
 delete mode 100644 src/calibre/ebooks/pdf/manipulate/trim.py

diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index e208b5a688..26d2394818 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -1,3 +1,4 @@
+from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 
diff --git a/src/calibre/ebooks/pdf/manipulate/__init__.py b/src/calibre/ebooks/pdf/manipulate/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/calibre/ebooks/pdf/manipulate/cli.py b/src/calibre/ebooks/pdf/manipulate/cli.py
index e82946f2ea..e3fcef559c 100644
--- a/src/calibre/ebooks/pdf/manipulate/cli.py
+++ b/src/calibre/ebooks/pdf/manipulate/cli.py
@@ -1,69 +1,69 @@
-'''
-Command line interface to run pdf manipulation commands.
-'''
 from __future__ import with_statement
+# -*- coding: utf-8 -*-
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
+'''
+Command line interface to run pdf manipulation commands.
+'''
+
 import string, sys
 
-from calibre.utils.config import Config, StringConfig
-from calibre.ebooks.pdf.manipulate import info, merge, reverse, split, trim
+from calibre.utils.config import OptionParser
+from calibre.utils.logging import Log
+from calibre.constants import preferred_encoding
+from calibre.customize.conversion import OptionRecommendation
+from calibre.ebooks.pdf.manipulate import crop, info, merge, reverse, split
 
 COMMANDS = {
+             'crop'    : crop,
              'info'    : info,
              'merge'   : merge,
              'reverse' : reverse,
              'split'   : split,
-             'trim'    : trim,
            }
 
-def config(defaults=None):
-    desc = _('Options to control the transformation of pdf')
-    if defaults is None:
-        c = Config('manipulatepdf', desc)
-    else:
-        c = StringConfig(defaults, desc)
-    return c
+USAGE = '%prog ' + _('''command ...
+	
+command can be one of the following:
+[%%commands]
+
+Use %prog command --help to get more information about a specific command
+
+Manipulate a PDF.
+'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', ')))
+
+def print_help(parser, log):
+    help = parser.format_help().encode(preferred_encoding, 'replace')
+    log(help)
 
 def option_parser():
-    c = config()
-    return c.option_parser(usage=_('''\
-    
-	%prog command ...
-	
-	command can be one of the following:
-	[%%commands]
-	
-	Use %prog command --help to get more information about a specific command
-	
-	Manipulate a PDF.
-	'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))))
+    return OptionParser(usage=USAGE)
 
 def main(args=sys.argv):
+    log = Log()
     parser = option_parser()
 
     if len(args) < 2:
         print 'Error: No command sepecified.\n'
-        print parser.get_usage()
-        return 2
+        print_help(parser, log)
+        return 1
     
     command = args[1].lower().strip()
     
-    if command in COMMANDS.keys():    
+    if command in COMMANDS.keys():
         del args[1]
         return COMMANDS[command].main(args, command)
     else:
         parser.parse_args(args)
         print 'Unknown command %s.\n' % command
-        print parser.get_usage()
-        return 2
+        print_help(parser, log)
+        return 1
     
     # We should never get here.
     return 0
 
 if __name__ == '__main__':
     sys.exit(main())
-
diff --git a/src/calibre/ebooks/pdf/manipulate/crop.py b/src/calibre/ebooks/pdf/manipulate/crop.py
new file mode 100644
index 0000000000..c3eb70c56d
--- /dev/null
+++ b/src/calibre/ebooks/pdf/manipulate/crop.py
@@ -0,0 +1,155 @@
+# -*- coding: utf-8 -*-
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, James Beal <james_@catbus.co.uk>, ' \
+                '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Crop a pdf file
+'''
+
+import os, sys, re
+from optparse import OptionGroup, Option
+
+from calibre.ebooks.metadata.meta import metadata_from_formats
+from calibre.ebooks.metadata import authors_to_string
+from calibre.utils.config import OptionParser
+from calibre.utils.logging import Log
+from calibre.constants import preferred_encoding
+from calibre.customize.conversion import OptionRecommendation
+
+from pyPdf import PdfFileWriter, PdfFileReader
+
+DEFAULT_CROP = '10'
+
+USAGE = '%prog %%name ' + _('''
+[options] file.pdf
+
+Crop a PDF file.
+''')
+
+OPTIONS = set([
+    OptionRecommendation(name='output', recommended_value='cropped.pdf',
+        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
+        help=_('Path to output file. By default a file is created in the current directory.')),
+    OptionRecommendation(name='bottom_left_x', recommended_value=DEFAULT_CROP,
+        level=OptionRecommendation.LOW, long_switch='leftx', short_switch='x',
+        help=_('Number of pixels to crop from the left most x (default is %s) ' % DEFAULT_CROP)),
+    OptionRecommendation(name='bottom_left_y', recommended_value=DEFAULT_CROP,
+        level=OptionRecommendation.LOW, long_switch='lefty', short_switch='y',
+        help=_('Number of pixels to crop from the left most y (default is %s) ' % DEFAULT_CROP)),
+    OptionRecommendation(name='top_right_x', recommended_value=DEFAULT_CROP,
+        level=OptionRecommendation.LOW, long_switch='rightx', short_switch='v',
+        help=_('Number of pixels to crop from the right most x (default is %s) ' % DEFAULT_CROP)),
+    OptionRecommendation(name='top_right_y', recommended_value=DEFAULT_CROP,
+        level=OptionRecommendation.LOW, long_switch='right y', short_switch='w',
+        help=_('Number of pixels to crop from the right most y (default is %s)' % DEFAULT_CROP)),
+    OptionRecommendation(name='bounding', recommended_value=None,
+        level=OptionRecommendation.LOW, long_switch='bounding', short_switch='b',
+        help=_('A file generated by ghostscript which allows each page to be individually cropped `gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox file.pdf 2> bounding`')),
+])
+
+def print_help(parser, log):
+    help = parser.format_help().encode(preferred_encoding, 'replace')
+    log(help)
+
+def option_parser(name):
+    usage = USAGE.replace('%%name', name)
+    return OptionParser(usage=usage)
+
+def option_recommendation_to_cli_option(add_option, rec):
+    opt = rec.option
+    switches = ['-'+opt.short_switch] if opt.short_switch else []
+    switches.append('--'+opt.long_switch)
+    attrs = dict(dest=opt.name, help=opt.help,
+                     choices=opt.choices, default=rec.recommended_value)
+    add_option(Option(*switches, **attrs))
+
+def add_options(parser):
+    group = OptionGroup(parser, _('Crop Options:'), _('Options to control the transformation of pdf'))
+    parser.add_option_group(group)
+    add_option = group.add_option
+    
+    for rec in OPTIONS:
+        option_recommendation_to_cli_option(add_option, rec)
+
+def crop_pdf(pdf_path, opts, metadata=None):
+    if metadata == None:
+        title = _('Unknown')
+        author = _('Unknown')
+    else:
+        title = metadata.title
+        author = authors_to_string(metadata.authors)
+
+    input_pdf = PdfFileReader(open(pdf_path, 'rb'))
+        
+    bounding_lines = []
+    if opts.bounding != None:
+        try:
+            bounding = open(opts.bounding , 'r')
+            bounding_regex = re.compile('%%BoundingBox: (?P<bottom_x>\d+) (?P<bottom_y>\d+) (?P<top_x>\d+) (?P<top_y>\d+)')
+        except:
+            raise Exception('Error reading %s' % opts.bounding)
+            
+        lines = bounding.readlines()
+        for line in lines:
+            if line.startswith('%%BoundingBox:'):
+                bounding_lines.append(line)
+        if len(bounding_lines) != input_pdf.numPages:
+            raise Exception('Error bounding file %s page count does not correspond to specified pdf' % opts.bounding)    
+            
+    output_pdf = PdfFileWriter(title=title,author=author)
+    blines = iter(bounding_lines)
+    for page in input_pdf.pages:
+        if bounding_lines != []:
+            mo = bounding_regex.search(blines.next())
+            if mo == None:
+                raise Exception('Error in bounding file %s' % opts.bounding)
+            page.mediaBox.upperRight = (mo.group('top_x'), mo.group('top_y'))
+            page.mediaBox.lowerLeft  = (mo.group('bottom_x'), mo.group('bottom_y'))
+        else:
+            page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x() - opts.top_right_x, page.bleedBox.getUpperRight_y() - opts.top_right_y)
+            page.mediaBox.lowerLeft  = (page.bleedBox.getLowerLeft_x() + opts.bottom_left_x, page.bleedBox.getLowerLeft_y() + opts.bottom_left_y)
+        output_pdf.addPage(page)
+        
+    with open(opts.output, 'wb') as output_file:
+        output_pdf.write(output_file)
+
+# Return True if the pdf is valid.
+def valid_pdf(pdf_path):
+    try:
+        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
+            pdf = PdfFileReader(pdf_file)
+            if pdf.isEncrypted or pdf.numPages <= 0:
+                raise Exception
+    except:
+        return False
+    return True
+    
+def main(args=sys.argv, name=''):
+    log = Log()
+    parser = option_parser(name)
+    add_options(parser)
+    
+    opts, args = parser.parse_args(args)
+    args = args[1:]
+    
+    if len(args) < 1:
+        print 'Error: A PDF file is required.\n'
+        print_help(parser, log)
+        return 1
+    
+    if not valid_pdf(args[0]):
+        print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
+        return 1
+    
+    mi = metadata_from_formats([args[0]])
+    
+    crop_pdf(args[0], opts, mi)
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/calibre/ebooks/pdf/manipulate/info.py b/src/calibre/ebooks/pdf/manipulate/info.py
index 115e411ce4..4aff524330 100644
--- a/src/calibre/ebooks/pdf/manipulate/info.py
+++ b/src/calibre/ebooks/pdf/manipulate/info.py
@@ -1,34 +1,37 @@
-'''
-Merge PDF files into a single PDF document.
-'''
 from __future__ import with_statement
+# -*- coding: utf-8 -*-
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
-import os, re, sys, time
+'''
+Merge PDF files into a single PDF document.
+'''
 
-from calibre.utils.config import Config, StringConfig
+import os, re, sys, time
+from optparse import OptionGroup, Option
+
+from calibre.utils.config import OptionParser
+from calibre.utils.logging import Log
+from calibre.constants import preferred_encoding
+from calibre.customize.conversion import OptionRecommendation
 
 from pyPdf import PdfFileWriter, PdfFileReader
 
+USAGE = '%prog %%name ' + _('''
+file.pdf ...
 
-def config(defaults=None):
-    desc = _('Options to control the transformation of pdf')
-    if defaults is None:
-        c = Config('manipulatepdf', desc)
-    else:
-        c = StringConfig(defaults, desc)
-    return c
+Get info about a PDF.
+''')
+
+def print_help(parser, log):
+    help = parser.format_help().encode(preferred_encoding, 'replace')
+    log(help)
 
 def option_parser(name):
-    c = config()
-    return c.option_parser(usage=_('''\
-	%prog %%name [options] file.pdf ...
-
-	Get info about a PDF.
-	'''.replace('%%name', name)))
+    usage = USAGE.replace('%%name', name)
+    return OptionParser(usage=usage)
 
 def print_info(pdf_path):
     with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
@@ -65,20 +68,22 @@ def verify_files(files):
     return invalid
 
 def main(args=sys.argv, name=''):
+    log = Log()
     parser = option_parser(name)
+    
     opts, args = parser.parse_args(args)
     args = args[1:]
     
     if len(args) < 1:
         print 'Error: No PDF sepecified.\n'
-        print parser.get_usage()
-        return 2
+        print_help(parser, log)
+        return 1
     
     bad_pdfs = verify_files(args)
     if bad_pdfs != []:
         for pdf in bad_pdfs:
             print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
-        return 2
+        return 1
         
     for pdf in args:
         print_info(pdf)
@@ -87,4 +92,3 @@ def main(args=sys.argv, name=''):
 
 if __name__ == '__main__':
     sys.exit(main())
-
diff --git a/src/calibre/ebooks/pdf/manipulate/merge.py b/src/calibre/ebooks/pdf/manipulate/merge.py
index c0385080ad..f0ecb9bd7a 100644
--- a/src/calibre/ebooks/pdf/manipulate/merge.py
+++ b/src/calibre/ebooks/pdf/manipulate/merge.py
@@ -1,37 +1,63 @@
-'''
-Merge PDF files into a single PDF document.
-'''
 from __future__ import with_statement
+# -*- coding: utf-8 -*-
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
+'''
+Merge PDF files into a single PDF document.
+'''
+
 import os, sys
+from optparse import OptionGroup, Option
 
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
-from calibre.utils.config import Config, StringConfig
+from calibre.utils.config import OptionParser
+from calibre.utils.logging import Log
+from calibre.constants import preferred_encoding
+from calibre.customize.conversion import OptionRecommendation
 
 from pyPdf import PdfFileWriter, PdfFileReader
 
-def config(defaults=None):
-    desc = _('Options to control the transformation of pdf')
-    if defaults is None:
-        c = Config('mergepdf', desc)
-    else:
-        c = StringConfig(defaults, desc)
-    c.add_opt('output', ['-o', '--output'], default='merged.pdf',
-          help=_('Path to output file. By default a file is created in the current directory.'))
-    return c
+USAGE = '%prog %%name ' + _('''
+[options] file1.pdf file2.pdf ...
+
+Metadata will be used from the first PDF specified.
+
+Merges individual PDFs.
+''')
+
+OPTIONS = set([
+    OptionRecommendation(name='output', recommended_value='merged.pdf',
+        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
+        help=_('Path to output file. By default a file is created in the current directory.')),
+])
+
+def print_help(parser, log):
+    help = parser.format_help().encode(preferred_encoding, 'replace')
+    log(help)
 
 def option_parser(name):
-    c = config()
-    return c.option_parser(usage=_('''\
-	%prog %%name [options] file1.pdf file2.pdf ...
+    usage = USAGE.replace('%%name', name)
+    return OptionParser(usage=usage)
 
-	Merges individual PDFs. Metadata will be used from the first PDF specified.
-	'''.replace('%%name', name)))
+def option_recommendation_to_cli_option(add_option, rec):
+    opt = rec.option
+    switches = ['-'+opt.short_switch] if opt.short_switch else []
+    switches.append('--'+opt.long_switch)
+    attrs = dict(dest=opt.name, help=opt.help,
+                     choices=opt.choices, default=rec.recommended_value)
+    add_option(Option(*switches, **attrs))
+
+def add_options(parser):
+    group = OptionGroup(parser, _('Merge Options:'), _('Options to control the transformation of pdf'))
+    parser.add_option_group(group)
+    add_option = group.add_option
+    
+    for rec in OPTIONS:
+        option_recommendation_to_cli_option(add_option, rec)
 
 def merge_files(in_paths, out_path, metadata=None):
     if metadata == None:
@@ -65,20 +91,23 @@ def verify_files(files):
     return invalid
 
 def main(args=sys.argv, name=''):
+    log = Log()
     parser = option_parser(name)
+    add_options(parser)
+    
     opts, args = parser.parse_args(args)
     args = args[1:]
     
     if len(args) < 2:
-        print 'Error: Two or more PDF files are required.\n\n'
-        print parser.get_usage()
-        return 2
+        print 'Error: Two or more PDF files are required.\n'
+        print_help(parser, log)
+        return 1
     
     bad_pdfs = verify_files(args)
     if bad_pdfs != []:
         for pdf in bad_pdfs:
             print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
-        return 2
+        return 1
         
     mi = metadata_from_formats([args[0]])
 
@@ -88,4 +117,3 @@ def main(args=sys.argv, name=''):
 
 if __name__ == '__main__':
     sys.exit(main())
-
diff --git a/src/calibre/ebooks/pdf/manipulate/reverse.py b/src/calibre/ebooks/pdf/manipulate/reverse.py
index 87bb9018c1..189cbf009b 100644
--- a/src/calibre/ebooks/pdf/manipulate/reverse.py
+++ b/src/calibre/ebooks/pdf/manipulate/reverse.py
@@ -10,30 +10,52 @@ Reverse content of PDF.
 '''
 
 import os, sys
+from optparse import OptionGroup, Option
 
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
-from calibre.utils.config import Config, StringConfig
+from calibre.utils.config import OptionParser
+from calibre.utils.logging import Log
+from calibre.constants import preferred_encoding
+from calibre.customize.conversion import OptionRecommendation
 
 from pyPdf import PdfFileWriter, PdfFileReader
 
-def config(defaults=None):
-    desc = _('Options to control the transformation of pdf')
-    if defaults is None:
-        c = Config('reversepdf', desc)
-    else:
-        c = StringConfig(defaults, desc)
-    c.add_opt('output', ['-o', '--output'], default='reversed.pdf',
-          help=_('Path to output file. By default a file is created in the current directory.'))
-    return c
+USAGE = '%prog %%name ' + _('''
+[options] file.pdf
+
+Reverse PDF.
+''')
+
+OPTIONS = set([
+    OptionRecommendation(name='output', recommended_value='reversed.pdf',
+        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
+        help=_('Path to output file. By default a file is created in the current directory.')),
+])
+
+def print_help(parser, log):
+    help = parser.format_help().encode(preferred_encoding, 'replace')
+    log(help)
 
 def option_parser(name):
-    c = config()
-    return c.option_parser(usage=_('''\
-	%prog %%name [options] file1.pdf
+    usage = USAGE.replace('%%name', name)
+    return OptionParser(usage=usage)
 
-	Reverse PDF.
-	'''.replace('%%name', name)))
+def option_recommendation_to_cli_option(add_option, rec):
+    opt = rec.option
+    switches = ['-'+opt.short_switch] if opt.short_switch else []
+    switches.append('--'+opt.long_switch)
+    attrs = dict(dest=opt.name, help=opt.help,
+                     choices=opt.choices, default=rec.recommended_value)
+    add_option(Option(*switches, **attrs))
+
+def add_options(parser):
+    group = OptionGroup(parser, _('Reverse Options:'), _('Options to control the transformation of pdf'))
+    parser.add_option_group(group)
+    add_option = group.add_option
+    
+    for rec in OPTIONS:
+        option_recommendation_to_cli_option(add_option, rec)
 
 def reverse(pdf_path, out_path, metadata=None):
     if metadata == None:
@@ -63,20 +85,22 @@ def valid_pdf(pdf_path):
         return False
     return True
 
-
 def main(args=sys.argv, name=''):
+    log = Log()
     parser = option_parser(name)
+    add_options(parser)
+    
     opts, args = parser.parse_args(args)
     args = args[1:]
     
     if len(args) < 1:
-        print 'Error: A PDF file is required.\n\n'
-        print parser.get_usage()
-        return 2
+        print 'Error: A PDF file is required.\n'
+        print_help(parser, log)
+        return 1
     
     if not valid_pdf(args[0]):
         print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
-        return 2
+        return 1
     
     mi = metadata_from_formats([args[0]])
 
diff --git a/src/calibre/ebooks/pdf/manipulate/split.py b/src/calibre/ebooks/pdf/manipulate/split.py
index cc6965dd68..8996a4cb6b 100644
--- a/src/calibre/ebooks/pdf/manipulate/split.py
+++ b/src/calibre/ebooks/pdf/manipulate/split.py
@@ -1,46 +1,68 @@
-'''
-Split PDF file into multiple PDF documents.
-'''
+# -*- coding: utf-8 -*-
 from __future__ import with_statement
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
+'''
+Split PDF file into multiple PDF documents.
+'''
+
 import os, sys, re
+from optparse import OptionGroup, Option
 
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
-from calibre.utils.config import Config, StringConfig
+from calibre.utils.config import OptionParser
+from calibre.utils.logging import Log
+from calibre.constants import preferred_encoding
+from calibre.customize.conversion import OptionRecommendation
 
 from pyPdf import PdfFileWriter, PdfFileReader
 
-def config(defaults=None):
-    desc = _('Options to control the transformation of pdf')
-    if defaults is None:
-        c = Config('splitpdf', desc)
-    else:
-        c = StringConfig(defaults, desc)
-    c.add_opt('output', ['-o', '--output'], default='split.pdf',
-          help=_('Path to output file. By default a file is created in the current directory. \
-            The file name will be the base name for the output.'))
-    return c
+USAGE = _('''
+%prog %%name [options] file.pdf page_to_split_on ...
+%prog %%name [options] file.pdf page_range_to_split_on ...
+	
+Ex.
+	
+%prog %%name file.pdf 6
+%prog %%name file.pdf 6-12
+%prog %%name file.pdf 6-12 8 10 9-20
+
+Split a PDF.
+''')
+
+OPTIONS = set([
+    OptionRecommendation(name='output', recommended_value='split.pdf',
+        level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
+        help=_('Path to output file. By default a file is created in the current directory.')),
+])
+
+def print_help(parser, log):
+    help = parser.format_help().encode(preferred_encoding, 'replace')
+    log(help)
 
 def option_parser(name):
-    c = config()
-    return c.option_parser(usage=_('''\
-    
-	%prog %%name [options] file.pdf page_to_split_on ...
-	%prog %%name [options] file.pdf page_range_to_split_on ...
-	
-	Ex.
-	
-	%prog %%name file.pdf 6
-	%prog %%name file.pdf 6-12
-	%prog %%name file.pdf 6-12 8 10 9-20
+    usage = USAGE.replace('%%name', name)
+    return OptionParser(usage=usage)
 
-	Split a PDF.
-	'''.replace('%%name', name)))
+def option_recommendation_to_cli_option(add_option, rec):
+    opt = rec.option
+    switches = ['-'+opt.short_switch] if opt.short_switch else []
+    switches.append('--'+opt.long_switch)
+    attrs = dict(dest=opt.name, help=opt.help,
+                     choices=opt.choices, default=rec.recommended_value)
+    add_option(Option(*switches, **attrs))
+
+def add_options(parser):
+    group = OptionGroup(parser, _('Split Options:'), _('Options to control the transformation of pdf'))
+    parser.add_option_group(group)
+    add_option = group.add_option
+    
+    for rec in OPTIONS:
+        option_recommendation_to_cli_option(add_option, rec)
 
 def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
     pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
@@ -153,25 +175,28 @@ def valid_pdf(pdf_path):
     return True
 
 def main(args=sys.argv, name=''):
+    log = Log()
     parser = option_parser(name)
+    add_options(parser)
+    
     opts, args = parser.parse_args(args)
     
     pdf, pages, page_ranges, unknown = split_args(args[1:])
     
     if pdf == '' and (pages == [] or page_ranges == []):
-        print 'Error: PDF and where to split is required.\n\n'
-        print parser.get_usage()
-        return 2
+        print 'Error: PDF and where to split is required.\n'
+        print_help(parser, log)
+        return 1
     
     if unknown != []:
         for arg in unknown:
             print 'Error: Unknown argument `%s`' % arg
-        print parser.get_usage()
-        return 2
+        print_help(parser, log)
+        return 1
     
     if not valid_pdf(pdf):
         print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
-        return 2
+        return 1
         
     pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
         
@@ -183,4 +208,3 @@ def main(args=sys.argv, name=''):
 
 if __name__ == '__main__':
     sys.exit(main())
-
diff --git a/src/calibre/ebooks/pdf/manipulate/trim.py b/src/calibre/ebooks/pdf/manipulate/trim.py
deleted file mode 100644
index b32312fee8..0000000000
--- a/src/calibre/ebooks/pdf/manipulate/trim.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2009, James Beal, james_@catbus.co.uk'
-__docformat__ = 'restructuredtext en'
-
-'crop a pdf file'
-
-import os, sys, re
-from calibre.utils.config import Config, StringConfig
-from pyPdf import PdfFileWriter, PdfFileReader
-
-def config(defaults=None):
-    desc = _('Options to control the transformation of pdf')
-    default_crop=10
-    if defaults is None:
-        c = Config('trimpdf', desc)
-    else:
-        c = StringConfig(defaults, desc)
-    c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
-          help=_('Path to output file. By default a file is created in the current directory.'))
-    c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
-          help=_('Number of pixels to crop from the left most x (default is %d) ')%default_crop )
-    c.add_opt('bottom_left_y', [ '-y', '--lefty'], default=default_crop,
-          help=_('Number of pixels to crop from the left most y (default is %d) ')%default_crop )
-    c.add_opt('top_right_x', [ '-v', '--rightx'], default=default_crop,
-          help=_('Number of pixels to crop from the right most x (default is %d) ')%default_crop )
-    c.add_opt('top_right_y', [ '-w', '--righty'], default=default_crop,
-          help=_('Number of pixels to crop from the right most y (default is %d)')%default_crop )
-    c.add_opt('bounding', ['-b', '--bounding'],
-          help=_('A file generated by ghostscript which allows each page to be individually cropped [gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox > bounding] '))
-    return c
-
-
-def option_parser(name):
-    c = config()
-    return c.option_parser(usage=_('''\
-	%prog %%name [options] file.pdf
-
-	Crops a pdf. 
-	'''.replace('%%name', name)))
-
-def main(args=sys.argv, name=''):
-    parser = option_parser(name)
-    opts, args = parser.parse_args(args)
-    try:
-        source = os.path.abspath(args[1])
-        input_pdf = PdfFileReader(file(source, "rb"))
-    except:
-        print "Unable to read input"
-        return 2
-    title   = _('Unknown')
-    author  = _('Unknown')
-    try:
-        info = input_pdf.getDocumentInfo()
-        if info.title:
-            title   = info.title
-        if info.author:
-            author  = info.author
-    except:
-        pass
-    if opts.bounding != None:
-        try:
-            bounding = open( opts.bounding , 'r' )
-            bounding_regex= re.compile('%%BoundingBox: (?P<bottom_x>[0-9]+) (?P<bottom_y>[0-9]+) (?P<top_x>[0-9]+) (?P<top_y>[0-9]+)')
-        except:
-            print 'Error opening %s' % opts.bounding 
-            return 1
-    output_pdf = PdfFileWriter(title=title,author=author)
-    for page_number in range (0, input_pdf.getNumPages() ):
-        page = input_pdf.getPage(page_number)
-        if opts.bounding != None:
-            while True:
-                line=bounding.readline()
-                match=bounding_regex.search(line)
-                if match !=None:
-                    break
-            page.mediaBox.upperRight = (match.group('top_x'),match.group('top_y'))
-            page.mediaBox.lowerLeft  = (match.group('bottom_x'),match.group('bottom_y'))
-        else:
-            page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x()-opts.top_right_x,page.bleedBox.getUpperRight_y()-opts.top_right_y)
-            page.mediaBox.lowerLeft  = (page.bleedBox.getLowerLeft_x()+opts.bottom_left_x,page.bleedBox.getLowerLeft_y()+opts.bottom_left_y)
-        output_pdf.addPage(page)
-    if opts.bounding != None:
-        bounding.close()
-    output_file = file(opts.output, "wb")
-    output_pdf.write(output_file)
-    output_file.close()
-
-
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py
index 27cdb3f691..e7707479c3 100644
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@@ -2,8 +2,8 @@
 from __future__ import with_statement
 
 __license__ = 'GPL 3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> \
-                 2009, John Schember <john@nachtimwald.com>'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, ' \
+                '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
 import errno, os, sys, subprocess

From 697eabe9ae74a897f1ca2026fb9d0e45b8caf7ce Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 3 Apr 2009 22:05:41 -0400
Subject: [PATCH 3/3] Refactor pdf manipulate commands

---
 src/calibre/ebooks/pdf/manipulate/crop.py    | 14 ++------
 src/calibre/ebooks/pdf/manipulate/info.py    | 14 ++------
 src/calibre/ebooks/pdf/manipulate/merge.py   | 16 ++-------
 src/calibre/ebooks/pdf/manipulate/reverse.py | 14 ++------
 src/calibre/ebooks/pdf/manipulate/split.py   | 14 ++------
 src/calibre/ebooks/pdf/verify.py             | 37 ++++++++++++++++++++
 6 files changed, 47 insertions(+), 62 deletions(-)
 create mode 100644 src/calibre/ebooks/pdf/verify.py

diff --git a/src/calibre/ebooks/pdf/manipulate/crop.py b/src/calibre/ebooks/pdf/manipulate/crop.py
index c3eb70c56d..fa996b754f 100644
--- a/src/calibre/ebooks/pdf/manipulate/crop.py
+++ b/src/calibre/ebooks/pdf/manipulate/crop.py
@@ -19,6 +19,7 @@ from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
+from calibre.ebooks.pdf.verify import is_valid_pdf
 
 from pyPdf import PdfFileWriter, PdfFileReader
 
@@ -116,17 +117,6 @@ def crop_pdf(pdf_path, opts, metadata=None):
         
     with open(opts.output, 'wb') as output_file:
         output_pdf.write(output_file)
-
-# Return True if the pdf is valid.
-def valid_pdf(pdf_path):
-    try:
-        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
-            pdf = PdfFileReader(pdf_file)
-            if pdf.isEncrypted or pdf.numPages <= 0:
-                raise Exception
-    except:
-        return False
-    return True
     
 def main(args=sys.argv, name=''):
     log = Log()
@@ -141,7 +131,7 @@ def main(args=sys.argv, name=''):
         print_help(parser, log)
         return 1
     
-    if not valid_pdf(args[0]):
+    if not is_valid_pdf(args[0]):
         print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
         return 1
     
diff --git a/src/calibre/ebooks/pdf/manipulate/info.py b/src/calibre/ebooks/pdf/manipulate/info.py
index 4aff524330..21a07fdeff 100644
--- a/src/calibre/ebooks/pdf/manipulate/info.py
+++ b/src/calibre/ebooks/pdf/manipulate/info.py
@@ -16,6 +16,7 @@ from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
+from calibre.ebooks.pdf.verify import is_valid_pdfs
 
 from pyPdf import PdfFileWriter, PdfFileReader
 
@@ -56,17 +57,6 @@ def print_info(pdf_path):
                 print _('PDF Version:           %s' % mo.group('version'))
         except: pass
 
-def verify_files(files):
-    invalid = []
-
-    for pdf_path in files:
-        try:
-            with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
-                pdf = PdfFileReader(pdf_file)
-        except:
-            invalid.append(pdf_path)
-    return invalid
-
 def main(args=sys.argv, name=''):
     log = Log()
     parser = option_parser(name)
@@ -79,7 +69,7 @@ def main(args=sys.argv, name=''):
         print_help(parser, log)
         return 1
     
-    bad_pdfs = verify_files(args)
+    bad_pdfs = is_valid_pdfs(args)
     if bad_pdfs != []:
         for pdf in bad_pdfs:
             print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
diff --git a/src/calibre/ebooks/pdf/manipulate/merge.py b/src/calibre/ebooks/pdf/manipulate/merge.py
index f0ecb9bd7a..1e285e3bdf 100644
--- a/src/calibre/ebooks/pdf/manipulate/merge.py
+++ b/src/calibre/ebooks/pdf/manipulate/merge.py
@@ -18,6 +18,7 @@ from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
+from calibre.ebooks.pdf.verify import is_valid_pdfs
 
 from pyPdf import PdfFileWriter, PdfFileReader
 
@@ -76,19 +77,6 @@ def merge_files(in_paths, out_path, metadata=None):
 
     with open(out_path, 'wb') as out_file:
         out_pdf.write(out_file)
-    
-def verify_files(files):
-    invalid = []
-
-    for pdf_path in files:
-        try:
-            with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
-                pdf = PdfFileReader(pdf_file)
-                if pdf.isEncrypted or pdf.numPages <= 0:
-                    raise Exception
-        except:
-            invalid.append(pdf_path)
-    return invalid
 
 def main(args=sys.argv, name=''):
     log = Log()
@@ -103,7 +91,7 @@ def main(args=sys.argv, name=''):
         print_help(parser, log)
         return 1
     
-    bad_pdfs = verify_files(args)
+    bad_pdfs = is_valid_pdfs(args)
     if bad_pdfs != []:
         for pdf in bad_pdfs:
             print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
diff --git a/src/calibre/ebooks/pdf/manipulate/reverse.py b/src/calibre/ebooks/pdf/manipulate/reverse.py
index 189cbf009b..564e523ae3 100644
--- a/src/calibre/ebooks/pdf/manipulate/reverse.py
+++ b/src/calibre/ebooks/pdf/manipulate/reverse.py
@@ -18,6 +18,7 @@ from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
+from calibre.ebooks.pdf.verify import is_valid_pdf
 
 from pyPdf import PdfFileWriter, PdfFileReader
 
@@ -74,17 +75,6 @@ def reverse(pdf_path, out_path, metadata=None):
     with open(out_path, 'wb') as out_file:
         out_pdf.write(out_file)
 
-# Return True if the pdf is valid.
-def valid_pdf(pdf_path):
-    try:
-        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
-            pdf = PdfFileReader(pdf_file)
-            if pdf.isEncrypted or pdf.numPages <= 0:
-                raise Exception
-    except:
-        return False
-    return True
-
 def main(args=sys.argv, name=''):
     log = Log()
     parser = option_parser(name)
@@ -98,7 +88,7 @@ def main(args=sys.argv, name=''):
         print_help(parser, log)
         return 1
     
-    if not valid_pdf(args[0]):
+    if not is_valid_pdf(args[0]):
         print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
         return 1
     
diff --git a/src/calibre/ebooks/pdf/manipulate/split.py b/src/calibre/ebooks/pdf/manipulate/split.py
index 8996a4cb6b..fb7e4d06d7 100644
--- a/src/calibre/ebooks/pdf/manipulate/split.py
+++ b/src/calibre/ebooks/pdf/manipulate/split.py
@@ -18,6 +18,7 @@ from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
+from calibre.ebooks.pdf.verify import is_valid_pdf
 
 from pyPdf import PdfFileWriter, PdfFileReader
 
@@ -163,17 +164,6 @@ def clean_page_list(pdf_path, pages, page_ranges):
     
     return pages, page_ranges
 
-# Return True if the pdf is valid.
-def valid_pdf(pdf_path):
-    try:
-        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
-            pdf = PdfFileReader(pdf_file)
-            if pdf.isEncrypted or pdf.numPages <= 0:
-                raise Exception
-    except:
-        return False
-    return True
-
 def main(args=sys.argv, name=''):
     log = Log()
     parser = option_parser(name)
@@ -194,7 +184,7 @@ def main(args=sys.argv, name=''):
         print_help(parser, log)
         return 1
     
-    if not valid_pdf(pdf):
+    if not is_valid_pdf(pdf):
         print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
         return 1
         
diff --git a/src/calibre/ebooks/pdf/verify.py b/src/calibre/ebooks/pdf/verify.py
new file mode 100644
index 0000000000..35f7edf0be
--- /dev/null
+++ b/src/calibre/ebooks/pdf/verify.py
@@ -0,0 +1,37 @@
+from __future__ import with_statement
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Verify PDF files.
+'''
+
+import os
+
+from pyPdf import PdfFileWriter, PdfFileReader
+
+def is_valid_pdf(pdf_path):
+    '''
+    Returns True if the pdf file is valid.
+    '''
+    
+    try:
+        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
+            pdf = PdfFileReader(pdf_file)
+    except:
+        return False
+    return True
+    
+def is_valid_pdfs(pdf_paths):
+    '''
+    Returns a list of invalid pdf files.
+    '''
+    
+    invalid = []
+    for pdf_path in pdf_paths:
+        if not is_valid_pdf(pdf_path):
+            invalid.append(pdf_path)
+    return invalid