From 70e3ea15bba8c1143508b435b3871cdb6b2cebee Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 2 Apr 2009 20:55:46 -0400 Subject: [PATCH 1/3] move pdf manipulate into its own module --- src/calibre/ebooks/pdf/{manipulate.py => manipulate/cli.py} | 2 +- src/calibre/ebooks/pdf/{ => manipulate}/info.py | 0 src/calibre/ebooks/pdf/{ => manipulate}/merge.py | 0 src/calibre/ebooks/pdf/{ => manipulate}/reverse.py | 0 src/calibre/ebooks/pdf/{ => manipulate}/split.py | 0 src/calibre/ebooks/pdf/{ => manipulate}/trim.py | 0 src/calibre/linux.py | 2 +- 7 files changed, 2 insertions(+), 2 deletions(-) rename src/calibre/ebooks/pdf/{manipulate.py => manipulate/cli.py} (95%) rename src/calibre/ebooks/pdf/{ => manipulate}/info.py (100%) rename src/calibre/ebooks/pdf/{ => manipulate}/merge.py (100%) rename src/calibre/ebooks/pdf/{ => manipulate}/reverse.py (100%) rename src/calibre/ebooks/pdf/{ => manipulate}/split.py (100%) rename src/calibre/ebooks/pdf/{ => manipulate}/trim.py (100%) diff --git a/src/calibre/ebooks/pdf/manipulate.py b/src/calibre/ebooks/pdf/manipulate/cli.py similarity index 95% rename from src/calibre/ebooks/pdf/manipulate.py rename to src/calibre/ebooks/pdf/manipulate/cli.py index 8c49650730..e82946f2ea 100644 --- a/src/calibre/ebooks/pdf/manipulate.py +++ b/src/calibre/ebooks/pdf/manipulate/cli.py @@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en' import string, sys from calibre.utils.config import Config, StringConfig -from calibre.ebooks.pdf import info, merge, reverse, split, trim +from calibre.ebooks.pdf.manipulate import info, merge, reverse, split, trim COMMANDS = { 'info' : info, diff --git a/src/calibre/ebooks/pdf/info.py b/src/calibre/ebooks/pdf/manipulate/info.py similarity index 100% rename from src/calibre/ebooks/pdf/info.py rename to src/calibre/ebooks/pdf/manipulate/info.py diff --git a/src/calibre/ebooks/pdf/merge.py b/src/calibre/ebooks/pdf/manipulate/merge.py similarity index 100% rename from src/calibre/ebooks/pdf/merge.py rename to src/calibre/ebooks/pdf/manipulate/merge.py diff --git a/src/calibre/ebooks/pdf/reverse.py b/src/calibre/ebooks/pdf/manipulate/reverse.py similarity index 100% rename from src/calibre/ebooks/pdf/reverse.py rename to src/calibre/ebooks/pdf/manipulate/reverse.py diff --git a/src/calibre/ebooks/pdf/split.py b/src/calibre/ebooks/pdf/manipulate/split.py similarity index 100% rename from src/calibre/ebooks/pdf/split.py rename to src/calibre/ebooks/pdf/manipulate/split.py diff --git a/src/calibre/ebooks/pdf/trim.py b/src/calibre/ebooks/pdf/manipulate/trim.py similarity index 100% rename from src/calibre/ebooks/pdf/trim.py rename to src/calibre/ebooks/pdf/manipulate/trim.py diff --git a/src/calibre/linux.py b/src/calibre/linux.py index b680ecc304..592a25c170 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -39,7 +39,7 @@ entry_points = { 'calibre-fontconfig = calibre.utils.fontconfig:main', 'calibre-parallel = calibre.parallel:main', 'calibre-customize = calibre.customize.ui:main', - 'pdfmanipulate = calibre.ebooks.pdf.manipulate:main', + 'pdfmanipulate = calibre.ebooks.pdf.manipulate.cli:main', 'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main', 'calibre-smtp = calibre.utils.smtp:main', ], From 7f5a619ad9e65cdf64b3d2a825d4516575c0e7c6 Mon Sep 17 00:00:00 2001 From: John Schember Date: Fri, 3 Apr 2009 21:06:31 -0400 Subject: [PATCH 2/3] pdfmanipulate moved to new command line option framework --- src/calibre/ebooks/__init__.py | 1 + src/calibre/ebooks/pdf/manipulate/__init__.py | 0 src/calibre/ebooks/pdf/manipulate/cli.py | 62 +++---- src/calibre/ebooks/pdf/manipulate/crop.py | 155 ++++++++++++++++++ src/calibre/ebooks/pdf/manipulate/info.py | 48 +++--- src/calibre/ebooks/pdf/manipulate/merge.py | 74 ++++++--- src/calibre/ebooks/pdf/manipulate/reverse.py | 64 +++++--- src/calibre/ebooks/pdf/manipulate/split.py | 92 +++++++---- src/calibre/ebooks/pdf/manipulate/trim.py | 93 ----------- src/calibre/ebooks/pdf/pdftohtml.py | 4 +- 10 files changed, 368 insertions(+), 225 deletions(-) create mode 100644 src/calibre/ebooks/pdf/manipulate/__init__.py create mode 100644 src/calibre/ebooks/pdf/manipulate/crop.py delete mode 100644 src/calibre/ebooks/pdf/manipulate/trim.py diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index e208b5a688..26d2394818 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -1,3 +1,4 @@ +from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' diff --git a/src/calibre/ebooks/pdf/manipulate/__init__.py b/src/calibre/ebooks/pdf/manipulate/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/calibre/ebooks/pdf/manipulate/cli.py b/src/calibre/ebooks/pdf/manipulate/cli.py index e82946f2ea..e3fcef559c 100644 --- a/src/calibre/ebooks/pdf/manipulate/cli.py +++ b/src/calibre/ebooks/pdf/manipulate/cli.py @@ -1,69 +1,69 @@ -''' -Command line interface to run pdf manipulation commands. -''' from __future__ import with_statement +# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' +''' +Command line interface to run pdf manipulation commands. +''' + import string, sys -from calibre.utils.config import Config, StringConfig -from calibre.ebooks.pdf.manipulate import info, merge, reverse, split, trim +from calibre.utils.config import OptionParser +from calibre.utils.logging import Log +from calibre.constants import preferred_encoding +from calibre.customize.conversion import OptionRecommendation +from calibre.ebooks.pdf.manipulate import crop, info, merge, reverse, split COMMANDS = { + 'crop' : crop, 'info' : info, 'merge' : merge, 'reverse' : reverse, 'split' : split, - 'trim' : trim, } -def config(defaults=None): - desc = _('Options to control the transformation of pdf') - if defaults is None: - c = Config('manipulatepdf', desc) - else: - c = StringConfig(defaults, desc) - return c +USAGE = '%prog ' + _('''command ... + +command can be one of the following: +[%%commands] + +Use %prog command --help to get more information about a specific command + +Manipulate a PDF. +'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))) + +def print_help(parser, log): + help = parser.format_help().encode(preferred_encoding, 'replace') + log(help) def option_parser(): - c = config() - return c.option_parser(usage=_('''\ - - %prog command ... - - command can be one of the following: - [%%commands] - - Use %prog command --help to get more information about a specific command - - Manipulate a PDF. - '''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', ')))) + return OptionParser(usage=USAGE) def main(args=sys.argv): + log = Log() parser = option_parser() if len(args) < 2: print 'Error: No command sepecified.\n' - print parser.get_usage() - return 2 + print_help(parser, log) + return 1 command = args[1].lower().strip() - if command in COMMANDS.keys(): + if command in COMMANDS.keys(): del args[1] return COMMANDS[command].main(args, command) else: parser.parse_args(args) print 'Unknown command %s.\n' % command - print parser.get_usage() - return 2 + print_help(parser, log) + return 1 # We should never get here. return 0 if __name__ == '__main__': sys.exit(main()) - diff --git a/src/calibre/ebooks/pdf/manipulate/crop.py b/src/calibre/ebooks/pdf/manipulate/crop.py new file mode 100644 index 0000000000..c3eb70c56d --- /dev/null +++ b/src/calibre/ebooks/pdf/manipulate/crop.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, James Beal , ' \ + '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +''' +Crop a pdf file +''' + +import os, sys, re +from optparse import OptionGroup, Option + +from calibre.ebooks.metadata.meta import metadata_from_formats +from calibre.ebooks.metadata import authors_to_string +from calibre.utils.config import OptionParser +from calibre.utils.logging import Log +from calibre.constants import preferred_encoding +from calibre.customize.conversion import OptionRecommendation + +from pyPdf import PdfFileWriter, PdfFileReader + +DEFAULT_CROP = '10' + +USAGE = '%prog %%name ' + _(''' +[options] file.pdf + +Crop a PDF file. +''') + +OPTIONS = set([ + OptionRecommendation(name='output', recommended_value='cropped.pdf', + level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', + help=_('Path to output file. By default a file is created in the current directory.')), + OptionRecommendation(name='bottom_left_x', recommended_value=DEFAULT_CROP, + level=OptionRecommendation.LOW, long_switch='leftx', short_switch='x', + help=_('Number of pixels to crop from the left most x (default is %s) ' % DEFAULT_CROP)), + OptionRecommendation(name='bottom_left_y', recommended_value=DEFAULT_CROP, + level=OptionRecommendation.LOW, long_switch='lefty', short_switch='y', + help=_('Number of pixels to crop from the left most y (default is %s) ' % DEFAULT_CROP)), + OptionRecommendation(name='top_right_x', recommended_value=DEFAULT_CROP, + level=OptionRecommendation.LOW, long_switch='rightx', short_switch='v', + help=_('Number of pixels to crop from the right most x (default is %s) ' % DEFAULT_CROP)), + OptionRecommendation(name='top_right_y', recommended_value=DEFAULT_CROP, + level=OptionRecommendation.LOW, long_switch='right y', short_switch='w', + help=_('Number of pixels to crop from the right most y (default is %s)' % DEFAULT_CROP)), + OptionRecommendation(name='bounding', recommended_value=None, + level=OptionRecommendation.LOW, long_switch='bounding', short_switch='b', + help=_('A file generated by ghostscript which allows each page to be individually cropped `gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox file.pdf 2> bounding`')), +]) + +def print_help(parser, log): + help = parser.format_help().encode(preferred_encoding, 'replace') + log(help) + +def option_parser(name): + usage = USAGE.replace('%%name', name) + return OptionParser(usage=usage) + +def option_recommendation_to_cli_option(add_option, rec): + opt = rec.option + switches = ['-'+opt.short_switch] if opt.short_switch else [] + switches.append('--'+opt.long_switch) + attrs = dict(dest=opt.name, help=opt.help, + choices=opt.choices, default=rec.recommended_value) + add_option(Option(*switches, **attrs)) + +def add_options(parser): + group = OptionGroup(parser, _('Crop Options:'), _('Options to control the transformation of pdf')) + parser.add_option_group(group) + add_option = group.add_option + + for rec in OPTIONS: + option_recommendation_to_cli_option(add_option, rec) + +def crop_pdf(pdf_path, opts, metadata=None): + if metadata == None: + title = _('Unknown') + author = _('Unknown') + else: + title = metadata.title + author = authors_to_string(metadata.authors) + + input_pdf = PdfFileReader(open(pdf_path, 'rb')) + + bounding_lines = [] + if opts.bounding != None: + try: + bounding = open(opts.bounding , 'r') + bounding_regex = re.compile('%%BoundingBox: (?P\d+) (?P\d+) (?P\d+) (?P\d+)') + except: + raise Exception('Error reading %s' % opts.bounding) + + lines = bounding.readlines() + for line in lines: + if line.startswith('%%BoundingBox:'): + bounding_lines.append(line) + if len(bounding_lines) != input_pdf.numPages: + raise Exception('Error bounding file %s page count does not correspond to specified pdf' % opts.bounding) + + output_pdf = PdfFileWriter(title=title,author=author) + blines = iter(bounding_lines) + for page in input_pdf.pages: + if bounding_lines != []: + mo = bounding_regex.search(blines.next()) + if mo == None: + raise Exception('Error in bounding file %s' % opts.bounding) + page.mediaBox.upperRight = (mo.group('top_x'), mo.group('top_y')) + page.mediaBox.lowerLeft = (mo.group('bottom_x'), mo.group('bottom_y')) + else: + page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x() - opts.top_right_x, page.bleedBox.getUpperRight_y() - opts.top_right_y) + page.mediaBox.lowerLeft = (page.bleedBox.getLowerLeft_x() + opts.bottom_left_x, page.bleedBox.getLowerLeft_y() + opts.bottom_left_y) + output_pdf.addPage(page) + + with open(opts.output, 'wb') as output_file: + output_pdf.write(output_file) + +# Return True if the pdf is valid. +def valid_pdf(pdf_path): + try: + with open(os.path.abspath(pdf_path), 'rb') as pdf_file: + pdf = PdfFileReader(pdf_file) + if pdf.isEncrypted or pdf.numPages <= 0: + raise Exception + except: + return False + return True + +def main(args=sys.argv, name=''): + log = Log() + parser = option_parser(name) + add_options(parser) + + opts, args = parser.parse_args(args) + args = args[1:] + + if len(args) < 1: + print 'Error: A PDF file is required.\n' + print_help(parser, log) + return 1 + + if not valid_pdf(args[0]): + print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0] + return 1 + + mi = metadata_from_formats([args[0]]) + + crop_pdf(args[0], opts, mi) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/manipulate/info.py b/src/calibre/ebooks/pdf/manipulate/info.py index 115e411ce4..4aff524330 100644 --- a/src/calibre/ebooks/pdf/manipulate/info.py +++ b/src/calibre/ebooks/pdf/manipulate/info.py @@ -1,34 +1,37 @@ -''' -Merge PDF files into a single PDF document. -''' from __future__ import with_statement +# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' -import os, re, sys, time +''' +Merge PDF files into a single PDF document. +''' -from calibre.utils.config import Config, StringConfig +import os, re, sys, time +from optparse import OptionGroup, Option + +from calibre.utils.config import OptionParser +from calibre.utils.logging import Log +from calibre.constants import preferred_encoding +from calibre.customize.conversion import OptionRecommendation from pyPdf import PdfFileWriter, PdfFileReader +USAGE = '%prog %%name ' + _(''' +file.pdf ... -def config(defaults=None): - desc = _('Options to control the transformation of pdf') - if defaults is None: - c = Config('manipulatepdf', desc) - else: - c = StringConfig(defaults, desc) - return c +Get info about a PDF. +''') + +def print_help(parser, log): + help = parser.format_help().encode(preferred_encoding, 'replace') + log(help) def option_parser(name): - c = config() - return c.option_parser(usage=_('''\ - %prog %%name [options] file.pdf ... - - Get info about a PDF. - '''.replace('%%name', name))) + usage = USAGE.replace('%%name', name) + return OptionParser(usage=usage) def print_info(pdf_path): with open(os.path.abspath(pdf_path), 'rb') as pdf_file: @@ -65,20 +68,22 @@ def verify_files(files): return invalid def main(args=sys.argv, name=''): + log = Log() parser = option_parser(name) + opts, args = parser.parse_args(args) args = args[1:] if len(args) < 1: print 'Error: No PDF sepecified.\n' - print parser.get_usage() - return 2 + print_help(parser, log) + return 1 bad_pdfs = verify_files(args) if bad_pdfs != []: for pdf in bad_pdfs: print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf - return 2 + return 1 for pdf in args: print_info(pdf) @@ -87,4 +92,3 @@ def main(args=sys.argv, name=''): if __name__ == '__main__': sys.exit(main()) - diff --git a/src/calibre/ebooks/pdf/manipulate/merge.py b/src/calibre/ebooks/pdf/manipulate/merge.py index c0385080ad..f0ecb9bd7a 100644 --- a/src/calibre/ebooks/pdf/manipulate/merge.py +++ b/src/calibre/ebooks/pdf/manipulate/merge.py @@ -1,37 +1,63 @@ -''' -Merge PDF files into a single PDF document. -''' from __future__ import with_statement +# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' +''' +Merge PDF files into a single PDF document. +''' + import os, sys +from optparse import OptionGroup, Option from calibre.ebooks.metadata.meta import metadata_from_formats from calibre.ebooks.metadata import authors_to_string -from calibre.utils.config import Config, StringConfig +from calibre.utils.config import OptionParser +from calibre.utils.logging import Log +from calibre.constants import preferred_encoding +from calibre.customize.conversion import OptionRecommendation from pyPdf import PdfFileWriter, PdfFileReader -def config(defaults=None): - desc = _('Options to control the transformation of pdf') - if defaults is None: - c = Config('mergepdf', desc) - else: - c = StringConfig(defaults, desc) - c.add_opt('output', ['-o', '--output'], default='merged.pdf', - help=_('Path to output file. By default a file is created in the current directory.')) - return c +USAGE = '%prog %%name ' + _(''' +[options] file1.pdf file2.pdf ... + +Metadata will be used from the first PDF specified. + +Merges individual PDFs. +''') + +OPTIONS = set([ + OptionRecommendation(name='output', recommended_value='merged.pdf', + level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', + help=_('Path to output file. By default a file is created in the current directory.')), +]) + +def print_help(parser, log): + help = parser.format_help().encode(preferred_encoding, 'replace') + log(help) def option_parser(name): - c = config() - return c.option_parser(usage=_('''\ - %prog %%name [options] file1.pdf file2.pdf ... + usage = USAGE.replace('%%name', name) + return OptionParser(usage=usage) - Merges individual PDFs. Metadata will be used from the first PDF specified. - '''.replace('%%name', name))) +def option_recommendation_to_cli_option(add_option, rec): + opt = rec.option + switches = ['-'+opt.short_switch] if opt.short_switch else [] + switches.append('--'+opt.long_switch) + attrs = dict(dest=opt.name, help=opt.help, + choices=opt.choices, default=rec.recommended_value) + add_option(Option(*switches, **attrs)) + +def add_options(parser): + group = OptionGroup(parser, _('Merge Options:'), _('Options to control the transformation of pdf')) + parser.add_option_group(group) + add_option = group.add_option + + for rec in OPTIONS: + option_recommendation_to_cli_option(add_option, rec) def merge_files(in_paths, out_path, metadata=None): if metadata == None: @@ -65,20 +91,23 @@ def verify_files(files): return invalid def main(args=sys.argv, name=''): + log = Log() parser = option_parser(name) + add_options(parser) + opts, args = parser.parse_args(args) args = args[1:] if len(args) < 2: - print 'Error: Two or more PDF files are required.\n\n' - print parser.get_usage() - return 2 + print 'Error: Two or more PDF files are required.\n' + print_help(parser, log) + return 1 bad_pdfs = verify_files(args) if bad_pdfs != []: for pdf in bad_pdfs: print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf - return 2 + return 1 mi = metadata_from_formats([args[0]]) @@ -88,4 +117,3 @@ def main(args=sys.argv, name=''): if __name__ == '__main__': sys.exit(main()) - diff --git a/src/calibre/ebooks/pdf/manipulate/reverse.py b/src/calibre/ebooks/pdf/manipulate/reverse.py index 87bb9018c1..189cbf009b 100644 --- a/src/calibre/ebooks/pdf/manipulate/reverse.py +++ b/src/calibre/ebooks/pdf/manipulate/reverse.py @@ -10,30 +10,52 @@ Reverse content of PDF. ''' import os, sys +from optparse import OptionGroup, Option from calibre.ebooks.metadata.meta import metadata_from_formats from calibre.ebooks.metadata import authors_to_string -from calibre.utils.config import Config, StringConfig +from calibre.utils.config import OptionParser +from calibre.utils.logging import Log +from calibre.constants import preferred_encoding +from calibre.customize.conversion import OptionRecommendation from pyPdf import PdfFileWriter, PdfFileReader -def config(defaults=None): - desc = _('Options to control the transformation of pdf') - if defaults is None: - c = Config('reversepdf', desc) - else: - c = StringConfig(defaults, desc) - c.add_opt('output', ['-o', '--output'], default='reversed.pdf', - help=_('Path to output file. By default a file is created in the current directory.')) - return c +USAGE = '%prog %%name ' + _(''' +[options] file.pdf + +Reverse PDF. +''') + +OPTIONS = set([ + OptionRecommendation(name='output', recommended_value='reversed.pdf', + level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', + help=_('Path to output file. By default a file is created in the current directory.')), +]) + +def print_help(parser, log): + help = parser.format_help().encode(preferred_encoding, 'replace') + log(help) def option_parser(name): - c = config() - return c.option_parser(usage=_('''\ - %prog %%name [options] file1.pdf + usage = USAGE.replace('%%name', name) + return OptionParser(usage=usage) - Reverse PDF. - '''.replace('%%name', name))) +def option_recommendation_to_cli_option(add_option, rec): + opt = rec.option + switches = ['-'+opt.short_switch] if opt.short_switch else [] + switches.append('--'+opt.long_switch) + attrs = dict(dest=opt.name, help=opt.help, + choices=opt.choices, default=rec.recommended_value) + add_option(Option(*switches, **attrs)) + +def add_options(parser): + group = OptionGroup(parser, _('Reverse Options:'), _('Options to control the transformation of pdf')) + parser.add_option_group(group) + add_option = group.add_option + + for rec in OPTIONS: + option_recommendation_to_cli_option(add_option, rec) def reverse(pdf_path, out_path, metadata=None): if metadata == None: @@ -63,20 +85,22 @@ def valid_pdf(pdf_path): return False return True - def main(args=sys.argv, name=''): + log = Log() parser = option_parser(name) + add_options(parser) + opts, args = parser.parse_args(args) args = args[1:] if len(args) < 1: - print 'Error: A PDF file is required.\n\n' - print parser.get_usage() - return 2 + print 'Error: A PDF file is required.\n' + print_help(parser, log) + return 1 if not valid_pdf(args[0]): print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0] - return 2 + return 1 mi = metadata_from_formats([args[0]]) diff --git a/src/calibre/ebooks/pdf/manipulate/split.py b/src/calibre/ebooks/pdf/manipulate/split.py index cc6965dd68..8996a4cb6b 100644 --- a/src/calibre/ebooks/pdf/manipulate/split.py +++ b/src/calibre/ebooks/pdf/manipulate/split.py @@ -1,46 +1,68 @@ -''' -Split PDF file into multiple PDF documents. -''' +# -*- coding: utf-8 -*- from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' +''' +Split PDF file into multiple PDF documents. +''' + import os, sys, re +from optparse import OptionGroup, Option from calibre.ebooks.metadata.meta import metadata_from_formats from calibre.ebooks.metadata import authors_to_string -from calibre.utils.config import Config, StringConfig +from calibre.utils.config import OptionParser +from calibre.utils.logging import Log +from calibre.constants import preferred_encoding +from calibre.customize.conversion import OptionRecommendation from pyPdf import PdfFileWriter, PdfFileReader -def config(defaults=None): - desc = _('Options to control the transformation of pdf') - if defaults is None: - c = Config('splitpdf', desc) - else: - c = StringConfig(defaults, desc) - c.add_opt('output', ['-o', '--output'], default='split.pdf', - help=_('Path to output file. By default a file is created in the current directory. \ - The file name will be the base name for the output.')) - return c +USAGE = _(''' +%prog %%name [options] file.pdf page_to_split_on ... +%prog %%name [options] file.pdf page_range_to_split_on ... + +Ex. + +%prog %%name file.pdf 6 +%prog %%name file.pdf 6-12 +%prog %%name file.pdf 6-12 8 10 9-20 + +Split a PDF. +''') + +OPTIONS = set([ + OptionRecommendation(name='output', recommended_value='split.pdf', + level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', + help=_('Path to output file. By default a file is created in the current directory.')), +]) + +def print_help(parser, log): + help = parser.format_help().encode(preferred_encoding, 'replace') + log(help) def option_parser(name): - c = config() - return c.option_parser(usage=_('''\ - - %prog %%name [options] file.pdf page_to_split_on ... - %prog %%name [options] file.pdf page_range_to_split_on ... - - Ex. - - %prog %%name file.pdf 6 - %prog %%name file.pdf 6-12 - %prog %%name file.pdf 6-12 8 10 9-20 + usage = USAGE.replace('%%name', name) + return OptionParser(usage=usage) - Split a PDF. - '''.replace('%%name', name))) +def option_recommendation_to_cli_option(add_option, rec): + opt = rec.option + switches = ['-'+opt.short_switch] if opt.short_switch else [] + switches.append('--'+opt.long_switch) + attrs = dict(dest=opt.name, help=opt.help, + choices=opt.choices, default=rec.recommended_value) + add_option(Option(*switches, **attrs)) + +def add_options(parser): + group = OptionGroup(parser, _('Split Options:'), _('Options to control the transformation of pdf')) + parser.add_option_group(group) + add_option = group.add_option + + for rec in OPTIONS: + option_recommendation_to_cli_option(add_option, rec) def split_pdf(in_path, pages, page_ranges, out_name, metadata=None): pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb')) @@ -153,25 +175,28 @@ def valid_pdf(pdf_path): return True def main(args=sys.argv, name=''): + log = Log() parser = option_parser(name) + add_options(parser) + opts, args = parser.parse_args(args) pdf, pages, page_ranges, unknown = split_args(args[1:]) if pdf == '' and (pages == [] or page_ranges == []): - print 'Error: PDF and where to split is required.\n\n' - print parser.get_usage() - return 2 + print 'Error: PDF and where to split is required.\n' + print_help(parser, log) + return 1 if unknown != []: for arg in unknown: print 'Error: Unknown argument `%s`' % arg - print parser.get_usage() - return 2 + print_help(parser, log) + return 1 if not valid_pdf(pdf): print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf - return 2 + return 1 pages, page_ranges = clean_page_list(pdf, pages, page_ranges) @@ -183,4 +208,3 @@ def main(args=sys.argv, name=''): if __name__ == '__main__': sys.exit(main()) - diff --git a/src/calibre/ebooks/pdf/manipulate/trim.py b/src/calibre/ebooks/pdf/manipulate/trim.py deleted file mode 100644 index b32312fee8..0000000000 --- a/src/calibre/ebooks/pdf/manipulate/trim.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import with_statement -__license__ = 'GPL v3' -__copyright__ = '2009, James Beal, james_@catbus.co.uk' -__docformat__ = 'restructuredtext en' - -'crop a pdf file' - -import os, sys, re -from calibre.utils.config import Config, StringConfig -from pyPdf import PdfFileWriter, PdfFileReader - -def config(defaults=None): - desc = _('Options to control the transformation of pdf') - default_crop=10 - if defaults is None: - c = Config('trimpdf', desc) - else: - c = StringConfig(defaults, desc) - c.add_opt('output', ['-o', '--output'],default='cropped.pdf', - help=_('Path to output file. By default a file is created in the current directory.')) - c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop, - help=_('Number of pixels to crop from the left most x (default is %d) ')%default_crop ) - c.add_opt('bottom_left_y', [ '-y', '--lefty'], default=default_crop, - help=_('Number of pixels to crop from the left most y (default is %d) ')%default_crop ) - c.add_opt('top_right_x', [ '-v', '--rightx'], default=default_crop, - help=_('Number of pixels to crop from the right most x (default is %d) ')%default_crop ) - c.add_opt('top_right_y', [ '-w', '--righty'], default=default_crop, - help=_('Number of pixels to crop from the right most y (default is %d)')%default_crop ) - c.add_opt('bounding', ['-b', '--bounding'], - help=_('A file generated by ghostscript which allows each page to be individually cropped [gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox > bounding] ')) - return c - - -def option_parser(name): - c = config() - return c.option_parser(usage=_('''\ - %prog %%name [options] file.pdf - - Crops a pdf. - '''.replace('%%name', name))) - -def main(args=sys.argv, name=''): - parser = option_parser(name) - opts, args = parser.parse_args(args) - try: - source = os.path.abspath(args[1]) - input_pdf = PdfFileReader(file(source, "rb")) - except: - print "Unable to read input" - return 2 - title = _('Unknown') - author = _('Unknown') - try: - info = input_pdf.getDocumentInfo() - if info.title: - title = info.title - if info.author: - author = info.author - except: - pass - if opts.bounding != None: - try: - bounding = open( opts.bounding , 'r' ) - bounding_regex= re.compile('%%BoundingBox: (?P[0-9]+) (?P[0-9]+) (?P[0-9]+) (?P[0-9]+)') - except: - print 'Error opening %s' % opts.bounding - return 1 - output_pdf = PdfFileWriter(title=title,author=author) - for page_number in range (0, input_pdf.getNumPages() ): - page = input_pdf.getPage(page_number) - if opts.bounding != None: - while True: - line=bounding.readline() - match=bounding_regex.search(line) - if match !=None: - break - page.mediaBox.upperRight = (match.group('top_x'),match.group('top_y')) - page.mediaBox.lowerLeft = (match.group('bottom_x'),match.group('bottom_y')) - else: - page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x()-opts.top_right_x,page.bleedBox.getUpperRight_y()-opts.top_right_y) - page.mediaBox.lowerLeft = (page.bleedBox.getLowerLeft_x()+opts.bottom_left_x,page.bleedBox.getLowerLeft_y()+opts.bottom_left_y) - output_pdf.addPage(page) - if opts.bounding != None: - bounding.close() - output_file = file(opts.output, "wb") - output_pdf.write(output_file) - output_file.close() - - - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py index 27cdb3f691..e7707479c3 100644 --- a/src/calibre/ebooks/pdf/pdftohtml.py +++ b/src/calibre/ebooks/pdf/pdftohtml.py @@ -2,8 +2,8 @@ from __future__ import with_statement __license__ = 'GPL 3' -__copyright__ = '2008, Kovid Goyal \ - 2009, John Schember ' +__copyright__ = '2008, Kovid Goyal , ' \ + '2009, John Schember ' __docformat__ = 'restructuredtext en' import errno, os, sys, subprocess From 697eabe9ae74a897f1ca2026fb9d0e45b8caf7ce Mon Sep 17 00:00:00 2001 From: John Schember Date: Fri, 3 Apr 2009 22:05:41 -0400 Subject: [PATCH 3/3] Refactor pdf manipulate commands --- src/calibre/ebooks/pdf/manipulate/crop.py | 14 ++------ src/calibre/ebooks/pdf/manipulate/info.py | 14 ++------ src/calibre/ebooks/pdf/manipulate/merge.py | 16 ++------- src/calibre/ebooks/pdf/manipulate/reverse.py | 14 ++------ src/calibre/ebooks/pdf/manipulate/split.py | 14 ++------ src/calibre/ebooks/pdf/verify.py | 37 ++++++++++++++++++++ 6 files changed, 47 insertions(+), 62 deletions(-) create mode 100644 src/calibre/ebooks/pdf/verify.py diff --git a/src/calibre/ebooks/pdf/manipulate/crop.py b/src/calibre/ebooks/pdf/manipulate/crop.py index c3eb70c56d..fa996b754f 100644 --- a/src/calibre/ebooks/pdf/manipulate/crop.py +++ b/src/calibre/ebooks/pdf/manipulate/crop.py @@ -19,6 +19,7 @@ from calibre.utils.config import OptionParser from calibre.utils.logging import Log from calibre.constants import preferred_encoding from calibre.customize.conversion import OptionRecommendation +from calibre.ebooks.pdf.verify import is_valid_pdf from pyPdf import PdfFileWriter, PdfFileReader @@ -116,17 +117,6 @@ def crop_pdf(pdf_path, opts, metadata=None): with open(opts.output, 'wb') as output_file: output_pdf.write(output_file) - -# Return True if the pdf is valid. -def valid_pdf(pdf_path): - try: - with open(os.path.abspath(pdf_path), 'rb') as pdf_file: - pdf = PdfFileReader(pdf_file) - if pdf.isEncrypted or pdf.numPages <= 0: - raise Exception - except: - return False - return True def main(args=sys.argv, name=''): log = Log() @@ -141,7 +131,7 @@ def main(args=sys.argv, name=''): print_help(parser, log) return 1 - if not valid_pdf(args[0]): + if not is_valid_pdf(args[0]): print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0] return 1 diff --git a/src/calibre/ebooks/pdf/manipulate/info.py b/src/calibre/ebooks/pdf/manipulate/info.py index 4aff524330..21a07fdeff 100644 --- a/src/calibre/ebooks/pdf/manipulate/info.py +++ b/src/calibre/ebooks/pdf/manipulate/info.py @@ -16,6 +16,7 @@ from calibre.utils.config import OptionParser from calibre.utils.logging import Log from calibre.constants import preferred_encoding from calibre.customize.conversion import OptionRecommendation +from calibre.ebooks.pdf.verify import is_valid_pdfs from pyPdf import PdfFileWriter, PdfFileReader @@ -56,17 +57,6 @@ def print_info(pdf_path): print _('PDF Version: %s' % mo.group('version')) except: pass -def verify_files(files): - invalid = [] - - for pdf_path in files: - try: - with open(os.path.abspath(pdf_path), 'rb') as pdf_file: - pdf = PdfFileReader(pdf_file) - except: - invalid.append(pdf_path) - return invalid - def main(args=sys.argv, name=''): log = Log() parser = option_parser(name) @@ -79,7 +69,7 @@ def main(args=sys.argv, name=''): print_help(parser, log) return 1 - bad_pdfs = verify_files(args) + bad_pdfs = is_valid_pdfs(args) if bad_pdfs != []: for pdf in bad_pdfs: print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf diff --git a/src/calibre/ebooks/pdf/manipulate/merge.py b/src/calibre/ebooks/pdf/manipulate/merge.py index f0ecb9bd7a..1e285e3bdf 100644 --- a/src/calibre/ebooks/pdf/manipulate/merge.py +++ b/src/calibre/ebooks/pdf/manipulate/merge.py @@ -18,6 +18,7 @@ from calibre.utils.config import OptionParser from calibre.utils.logging import Log from calibre.constants import preferred_encoding from calibre.customize.conversion import OptionRecommendation +from calibre.ebooks.pdf.verify import is_valid_pdfs from pyPdf import PdfFileWriter, PdfFileReader @@ -76,19 +77,6 @@ def merge_files(in_paths, out_path, metadata=None): with open(out_path, 'wb') as out_file: out_pdf.write(out_file) - -def verify_files(files): - invalid = [] - - for pdf_path in files: - try: - with open(os.path.abspath(pdf_path), 'rb') as pdf_file: - pdf = PdfFileReader(pdf_file) - if pdf.isEncrypted or pdf.numPages <= 0: - raise Exception - except: - invalid.append(pdf_path) - return invalid def main(args=sys.argv, name=''): log = Log() @@ -103,7 +91,7 @@ def main(args=sys.argv, name=''): print_help(parser, log) return 1 - bad_pdfs = verify_files(args) + bad_pdfs = is_valid_pdfs(args) if bad_pdfs != []: for pdf in bad_pdfs: print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf diff --git a/src/calibre/ebooks/pdf/manipulate/reverse.py b/src/calibre/ebooks/pdf/manipulate/reverse.py index 189cbf009b..564e523ae3 100644 --- a/src/calibre/ebooks/pdf/manipulate/reverse.py +++ b/src/calibre/ebooks/pdf/manipulate/reverse.py @@ -18,6 +18,7 @@ from calibre.utils.config import OptionParser from calibre.utils.logging import Log from calibre.constants import preferred_encoding from calibre.customize.conversion import OptionRecommendation +from calibre.ebooks.pdf.verify import is_valid_pdf from pyPdf import PdfFileWriter, PdfFileReader @@ -74,17 +75,6 @@ def reverse(pdf_path, out_path, metadata=None): with open(out_path, 'wb') as out_file: out_pdf.write(out_file) -# Return True if the pdf is valid. -def valid_pdf(pdf_path): - try: - with open(os.path.abspath(pdf_path), 'rb') as pdf_file: - pdf = PdfFileReader(pdf_file) - if pdf.isEncrypted or pdf.numPages <= 0: - raise Exception - except: - return False - return True - def main(args=sys.argv, name=''): log = Log() parser = option_parser(name) @@ -98,7 +88,7 @@ def main(args=sys.argv, name=''): print_help(parser, log) return 1 - if not valid_pdf(args[0]): + if not is_valid_pdf(args[0]): print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0] return 1 diff --git a/src/calibre/ebooks/pdf/manipulate/split.py b/src/calibre/ebooks/pdf/manipulate/split.py index 8996a4cb6b..fb7e4d06d7 100644 --- a/src/calibre/ebooks/pdf/manipulate/split.py +++ b/src/calibre/ebooks/pdf/manipulate/split.py @@ -18,6 +18,7 @@ from calibre.utils.config import OptionParser from calibre.utils.logging import Log from calibre.constants import preferred_encoding from calibre.customize.conversion import OptionRecommendation +from calibre.ebooks.pdf.verify import is_valid_pdf from pyPdf import PdfFileWriter, PdfFileReader @@ -163,17 +164,6 @@ def clean_page_list(pdf_path, pages, page_ranges): return pages, page_ranges -# Return True if the pdf is valid. -def valid_pdf(pdf_path): - try: - with open(os.path.abspath(pdf_path), 'rb') as pdf_file: - pdf = PdfFileReader(pdf_file) - if pdf.isEncrypted or pdf.numPages <= 0: - raise Exception - except: - return False - return True - def main(args=sys.argv, name=''): log = Log() parser = option_parser(name) @@ -194,7 +184,7 @@ def main(args=sys.argv, name=''): print_help(parser, log) return 1 - if not valid_pdf(pdf): + if not is_valid_pdf(pdf): print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf return 1 diff --git a/src/calibre/ebooks/pdf/verify.py b/src/calibre/ebooks/pdf/verify.py new file mode 100644 index 0000000000..35f7edf0be --- /dev/null +++ b/src/calibre/ebooks/pdf/verify.py @@ -0,0 +1,37 @@ +from __future__ import with_statement +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +''' +Verify PDF files. +''' + +import os + +from pyPdf import PdfFileWriter, PdfFileReader + +def is_valid_pdf(pdf_path): + ''' + Returns True if the pdf file is valid. + ''' + + try: + with open(os.path.abspath(pdf_path), 'rb') as pdf_file: + pdf = PdfFileReader(pdf_file) + except: + return False + return True + +def is_valid_pdfs(pdf_paths): + ''' + Returns a list of invalid pdf files. + ''' + + invalid = [] + for pdf_path in pdf_paths: + if not is_valid_pdf(pdf_path): + invalid.append(pdf_path) + return invalid