mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
pdfmanipulate moved to new command line option framework
This commit is contained in:
parent
64683d4bc3
commit
7f5a619ad9
@ -1,3 +1,4 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
|
0
src/calibre/ebooks/pdf/manipulate/__init__.py
Normal file
0
src/calibre/ebooks/pdf/manipulate/__init__.py
Normal file
@ -1,69 +1,69 @@
|
||||
'''
|
||||
Command line interface to run pdf manipulation commands.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Command line interface to run pdf manipulation commands.
|
||||
'''
|
||||
|
||||
import string, sys
|
||||
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from calibre.ebooks.pdf.manipulate import info, merge, reverse, split, trim
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.ebooks.pdf.manipulate import crop, info, merge, reverse, split
|
||||
|
||||
COMMANDS = {
|
||||
'crop' : crop,
|
||||
'info' : info,
|
||||
'merge' : merge,
|
||||
'reverse' : reverse,
|
||||
'split' : split,
|
||||
'trim' : trim,
|
||||
}
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('manipulatepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
return c
|
||||
USAGE = '%prog ' + _('''command ...
|
||||
|
||||
command can be one of the following:
|
||||
[%%commands]
|
||||
|
||||
Use %prog command --help to get more information about a specific command
|
||||
|
||||
Manipulate a PDF.
|
||||
'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', ')))
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser():
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
|
||||
%prog command ...
|
||||
|
||||
command can be one of the following:
|
||||
[%%commands]
|
||||
|
||||
Use %prog command --help to get more information about a specific command
|
||||
|
||||
Manipulate a PDF.
|
||||
'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))))
|
||||
return OptionParser(usage=USAGE)
|
||||
|
||||
def main(args=sys.argv):
|
||||
log = Log()
|
||||
parser = option_parser()
|
||||
|
||||
if len(args) < 2:
|
||||
print 'Error: No command sepecified.\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
command = args[1].lower().strip()
|
||||
|
||||
if command in COMMANDS.keys():
|
||||
if command in COMMANDS.keys():
|
||||
del args[1]
|
||||
return COMMANDS[command].main(args, command)
|
||||
else:
|
||||
parser.parse_args(args)
|
||||
print 'Unknown command %s.\n' % command
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
# We should never get here.
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
|
155
src/calibre/ebooks/pdf/manipulate/crop.py
Normal file
155
src/calibre/ebooks/pdf/manipulate/crop.py
Normal file
@ -0,0 +1,155 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, James Beal <james_@catbus.co.uk>, ' \
|
||||
'2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Crop a pdf file
|
||||
'''
|
||||
|
||||
import os, sys, re
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
DEFAULT_CROP = '10'
|
||||
|
||||
USAGE = '%prog %%name ' + _('''
|
||||
[options] file.pdf
|
||||
|
||||
Crop a PDF file.
|
||||
''')
|
||||
|
||||
OPTIONS = set([
|
||||
OptionRecommendation(name='output', recommended_value='cropped.pdf',
|
||||
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
|
||||
help=_('Path to output file. By default a file is created in the current directory.')),
|
||||
OptionRecommendation(name='bottom_left_x', recommended_value=DEFAULT_CROP,
|
||||
level=OptionRecommendation.LOW, long_switch='leftx', short_switch='x',
|
||||
help=_('Number of pixels to crop from the left most x (default is %s) ' % DEFAULT_CROP)),
|
||||
OptionRecommendation(name='bottom_left_y', recommended_value=DEFAULT_CROP,
|
||||
level=OptionRecommendation.LOW, long_switch='lefty', short_switch='y',
|
||||
help=_('Number of pixels to crop from the left most y (default is %s) ' % DEFAULT_CROP)),
|
||||
OptionRecommendation(name='top_right_x', recommended_value=DEFAULT_CROP,
|
||||
level=OptionRecommendation.LOW, long_switch='rightx', short_switch='v',
|
||||
help=_('Number of pixels to crop from the right most x (default is %s) ' % DEFAULT_CROP)),
|
||||
OptionRecommendation(name='top_right_y', recommended_value=DEFAULT_CROP,
|
||||
level=OptionRecommendation.LOW, long_switch='right y', short_switch='w',
|
||||
help=_('Number of pixels to crop from the right most y (default is %s)' % DEFAULT_CROP)),
|
||||
OptionRecommendation(name='bounding', recommended_value=None,
|
||||
level=OptionRecommendation.LOW, long_switch='bounding', short_switch='b',
|
||||
help=_('A file generated by ghostscript which allows each page to be individually cropped `gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox file.pdf 2> bounding`')),
|
||||
])
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser(name):
|
||||
usage = USAGE.replace('%%name', name)
|
||||
return OptionParser(usage=usage)
|
||||
|
||||
def option_recommendation_to_cli_option(add_option, rec):
|
||||
opt = rec.option
|
||||
switches = ['-'+opt.short_switch] if opt.short_switch else []
|
||||
switches.append('--'+opt.long_switch)
|
||||
attrs = dict(dest=opt.name, help=opt.help,
|
||||
choices=opt.choices, default=rec.recommended_value)
|
||||
add_option(Option(*switches, **attrs))
|
||||
|
||||
def add_options(parser):
|
||||
group = OptionGroup(parser, _('Crop Options:'), _('Options to control the transformation of pdf'))
|
||||
parser.add_option_group(group)
|
||||
add_option = group.add_option
|
||||
|
||||
for rec in OPTIONS:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
def crop_pdf(pdf_path, opts, metadata=None):
|
||||
if metadata == None:
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
else:
|
||||
title = metadata.title
|
||||
author = authors_to_string(metadata.authors)
|
||||
|
||||
input_pdf = PdfFileReader(open(pdf_path, 'rb'))
|
||||
|
||||
bounding_lines = []
|
||||
if opts.bounding != None:
|
||||
try:
|
||||
bounding = open(opts.bounding , 'r')
|
||||
bounding_regex = re.compile('%%BoundingBox: (?P<bottom_x>\d+) (?P<bottom_y>\d+) (?P<top_x>\d+) (?P<top_y>\d+)')
|
||||
except:
|
||||
raise Exception('Error reading %s' % opts.bounding)
|
||||
|
||||
lines = bounding.readlines()
|
||||
for line in lines:
|
||||
if line.startswith('%%BoundingBox:'):
|
||||
bounding_lines.append(line)
|
||||
if len(bounding_lines) != input_pdf.numPages:
|
||||
raise Exception('Error bounding file %s page count does not correspond to specified pdf' % opts.bounding)
|
||||
|
||||
output_pdf = PdfFileWriter(title=title,author=author)
|
||||
blines = iter(bounding_lines)
|
||||
for page in input_pdf.pages:
|
||||
if bounding_lines != []:
|
||||
mo = bounding_regex.search(blines.next())
|
||||
if mo == None:
|
||||
raise Exception('Error in bounding file %s' % opts.bounding)
|
||||
page.mediaBox.upperRight = (mo.group('top_x'), mo.group('top_y'))
|
||||
page.mediaBox.lowerLeft = (mo.group('bottom_x'), mo.group('bottom_y'))
|
||||
else:
|
||||
page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x() - opts.top_right_x, page.bleedBox.getUpperRight_y() - opts.top_right_y)
|
||||
page.mediaBox.lowerLeft = (page.bleedBox.getLowerLeft_x() + opts.bottom_left_x, page.bleedBox.getLowerLeft_y() + opts.bottom_left_y)
|
||||
output_pdf.addPage(page)
|
||||
|
||||
with open(opts.output, 'wb') as output_file:
|
||||
output_pdf.write(output_file)
|
||||
|
||||
# Return True if the pdf is valid.
|
||||
def valid_pdf(pdf_path):
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
if pdf.isEncrypted or pdf.numPages <= 0:
|
||||
raise Exception
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
add_options(parser)
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 1:
|
||||
print 'Error: A PDF file is required.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
if not valid_pdf(args[0]):
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
|
||||
return 1
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
crop_pdf(args[0], opts, mi)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -1,34 +1,37 @@
|
||||
'''
|
||||
Merge PDF files into a single PDF document.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, sys, time
|
||||
'''
|
||||
Merge PDF files into a single PDF document.
|
||||
'''
|
||||
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
import os, re, sys, time
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
USAGE = '%prog %%name ' + _('''
|
||||
file.pdf ...
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('manipulatepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
return c
|
||||
Get info about a PDF.
|
||||
''')
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file.pdf ...
|
||||
|
||||
Get info about a PDF.
|
||||
'''.replace('%%name', name)))
|
||||
usage = USAGE.replace('%%name', name)
|
||||
return OptionParser(usage=usage)
|
||||
|
||||
def print_info(pdf_path):
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
@ -65,20 +68,22 @@ def verify_files(files):
|
||||
return invalid
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 1:
|
||||
print 'Error: No PDF sepecified.\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
bad_pdfs = verify_files(args)
|
||||
if bad_pdfs != []:
|
||||
for pdf in bad_pdfs:
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||
return 2
|
||||
return 1
|
||||
|
||||
for pdf in args:
|
||||
print_info(pdf)
|
||||
@ -87,4 +92,3 @@ def main(args=sys.argv, name=''):
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
|
@ -1,37 +1,63 @@
|
||||
'''
|
||||
Merge PDF files into a single PDF document.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Merge PDF files into a single PDF document.
|
||||
'''
|
||||
|
||||
import os, sys
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('mergepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'], default='merged.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||
return c
|
||||
USAGE = '%prog %%name ' + _('''
|
||||
[options] file1.pdf file2.pdf ...
|
||||
|
||||
Metadata will be used from the first PDF specified.
|
||||
|
||||
Merges individual PDFs.
|
||||
''')
|
||||
|
||||
OPTIONS = set([
|
||||
OptionRecommendation(name='output', recommended_value='merged.pdf',
|
||||
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
|
||||
help=_('Path to output file. By default a file is created in the current directory.')),
|
||||
])
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file1.pdf file2.pdf ...
|
||||
usage = USAGE.replace('%%name', name)
|
||||
return OptionParser(usage=usage)
|
||||
|
||||
Merges individual PDFs. Metadata will be used from the first PDF specified.
|
||||
'''.replace('%%name', name)))
|
||||
def option_recommendation_to_cli_option(add_option, rec):
|
||||
opt = rec.option
|
||||
switches = ['-'+opt.short_switch] if opt.short_switch else []
|
||||
switches.append('--'+opt.long_switch)
|
||||
attrs = dict(dest=opt.name, help=opt.help,
|
||||
choices=opt.choices, default=rec.recommended_value)
|
||||
add_option(Option(*switches, **attrs))
|
||||
|
||||
def add_options(parser):
|
||||
group = OptionGroup(parser, _('Merge Options:'), _('Options to control the transformation of pdf'))
|
||||
parser.add_option_group(group)
|
||||
add_option = group.add_option
|
||||
|
||||
for rec in OPTIONS:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
def merge_files(in_paths, out_path, metadata=None):
|
||||
if metadata == None:
|
||||
@ -65,20 +91,23 @@ def verify_files(files):
|
||||
return invalid
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
add_options(parser)
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 2:
|
||||
print 'Error: Two or more PDF files are required.\n\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
print 'Error: Two or more PDF files are required.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
bad_pdfs = verify_files(args)
|
||||
if bad_pdfs != []:
|
||||
for pdf in bad_pdfs:
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||
return 2
|
||||
return 1
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
@ -88,4 +117,3 @@ def main(args=sys.argv, name=''):
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
|
@ -10,30 +10,52 @@ Reverse content of PDF.
|
||||
'''
|
||||
|
||||
import os, sys
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('reversepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'], default='reversed.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||
return c
|
||||
USAGE = '%prog %%name ' + _('''
|
||||
[options] file.pdf
|
||||
|
||||
Reverse PDF.
|
||||
''')
|
||||
|
||||
OPTIONS = set([
|
||||
OptionRecommendation(name='output', recommended_value='reversed.pdf',
|
||||
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
|
||||
help=_('Path to output file. By default a file is created in the current directory.')),
|
||||
])
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file1.pdf
|
||||
usage = USAGE.replace('%%name', name)
|
||||
return OptionParser(usage=usage)
|
||||
|
||||
Reverse PDF.
|
||||
'''.replace('%%name', name)))
|
||||
def option_recommendation_to_cli_option(add_option, rec):
|
||||
opt = rec.option
|
||||
switches = ['-'+opt.short_switch] if opt.short_switch else []
|
||||
switches.append('--'+opt.long_switch)
|
||||
attrs = dict(dest=opt.name, help=opt.help,
|
||||
choices=opt.choices, default=rec.recommended_value)
|
||||
add_option(Option(*switches, **attrs))
|
||||
|
||||
def add_options(parser):
|
||||
group = OptionGroup(parser, _('Reverse Options:'), _('Options to control the transformation of pdf'))
|
||||
parser.add_option_group(group)
|
||||
add_option = group.add_option
|
||||
|
||||
for rec in OPTIONS:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
def reverse(pdf_path, out_path, metadata=None):
|
||||
if metadata == None:
|
||||
@ -63,20 +85,22 @@ def valid_pdf(pdf_path):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
add_options(parser)
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 1:
|
||||
print 'Error: A PDF file is required.\n\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
print 'Error: A PDF file is required.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
if not valid_pdf(args[0]):
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
|
||||
return 2
|
||||
return 1
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
|
@ -1,46 +1,68 @@
|
||||
'''
|
||||
Split PDF file into multiple PDF documents.
|
||||
'''
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Split PDF file into multiple PDF documents.
|
||||
'''
|
||||
|
||||
import os, sys, re
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('splitpdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'], default='split.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory. \
|
||||
The file name will be the base name for the output.'))
|
||||
return c
|
||||
USAGE = _('''
|
||||
%prog %%name [options] file.pdf page_to_split_on ...
|
||||
%prog %%name [options] file.pdf page_range_to_split_on ...
|
||||
|
||||
Ex.
|
||||
|
||||
%prog %%name file.pdf 6
|
||||
%prog %%name file.pdf 6-12
|
||||
%prog %%name file.pdf 6-12 8 10 9-20
|
||||
|
||||
Split a PDF.
|
||||
''')
|
||||
|
||||
OPTIONS = set([
|
||||
OptionRecommendation(name='output', recommended_value='split.pdf',
|
||||
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
|
||||
help=_('Path to output file. By default a file is created in the current directory.')),
|
||||
])
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
|
||||
%prog %%name [options] file.pdf page_to_split_on ...
|
||||
%prog %%name [options] file.pdf page_range_to_split_on ...
|
||||
|
||||
Ex.
|
||||
|
||||
%prog %%name file.pdf 6
|
||||
%prog %%name file.pdf 6-12
|
||||
%prog %%name file.pdf 6-12 8 10 9-20
|
||||
usage = USAGE.replace('%%name', name)
|
||||
return OptionParser(usage=usage)
|
||||
|
||||
Split a PDF.
|
||||
'''.replace('%%name', name)))
|
||||
def option_recommendation_to_cli_option(add_option, rec):
|
||||
opt = rec.option
|
||||
switches = ['-'+opt.short_switch] if opt.short_switch else []
|
||||
switches.append('--'+opt.long_switch)
|
||||
attrs = dict(dest=opt.name, help=opt.help,
|
||||
choices=opt.choices, default=rec.recommended_value)
|
||||
add_option(Option(*switches, **attrs))
|
||||
|
||||
def add_options(parser):
|
||||
group = OptionGroup(parser, _('Split Options:'), _('Options to control the transformation of pdf'))
|
||||
parser.add_option_group(group)
|
||||
add_option = group.add_option
|
||||
|
||||
for rec in OPTIONS:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
|
||||
pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
|
||||
@ -153,25 +175,28 @@ def valid_pdf(pdf_path):
|
||||
return True
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
add_options(parser)
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
|
||||
pdf, pages, page_ranges, unknown = split_args(args[1:])
|
||||
|
||||
if pdf == '' and (pages == [] or page_ranges == []):
|
||||
print 'Error: PDF and where to split is required.\n\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
print 'Error: PDF and where to split is required.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
if unknown != []:
|
||||
for arg in unknown:
|
||||
print 'Error: Unknown argument `%s`' % arg
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
if not valid_pdf(pdf):
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||
return 2
|
||||
return 1
|
||||
|
||||
pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
|
||||
|
||||
@ -183,4 +208,3 @@ def main(args=sys.argv, name=''):
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
|
@ -1,93 +0,0 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, James Beal, james_@catbus.co.uk'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'crop a pdf file'
|
||||
|
||||
import os, sys, re
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
default_crop=10
|
||||
if defaults is None:
|
||||
c = Config('trimpdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||
c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
|
||||
help=_('Number of pixels to crop from the left most x (default is %d) ')%default_crop )
|
||||
c.add_opt('bottom_left_y', [ '-y', '--lefty'], default=default_crop,
|
||||
help=_('Number of pixels to crop from the left most y (default is %d) ')%default_crop )
|
||||
c.add_opt('top_right_x', [ '-v', '--rightx'], default=default_crop,
|
||||
help=_('Number of pixels to crop from the right most x (default is %d) ')%default_crop )
|
||||
c.add_opt('top_right_y', [ '-w', '--righty'], default=default_crop,
|
||||
help=_('Number of pixels to crop from the right most y (default is %d)')%default_crop )
|
||||
c.add_opt('bounding', ['-b', '--bounding'],
|
||||
help=_('A file generated by ghostscript which allows each page to be individually cropped [gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox > bounding] '))
|
||||
return c
|
||||
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file.pdf
|
||||
|
||||
Crops a pdf.
|
||||
'''.replace('%%name', name)))
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
parser = option_parser(name)
|
||||
opts, args = parser.parse_args(args)
|
||||
try:
|
||||
source = os.path.abspath(args[1])
|
||||
input_pdf = PdfFileReader(file(source, "rb"))
|
||||
except:
|
||||
print "Unable to read input"
|
||||
return 2
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
try:
|
||||
info = input_pdf.getDocumentInfo()
|
||||
if info.title:
|
||||
title = info.title
|
||||
if info.author:
|
||||
author = info.author
|
||||
except:
|
||||
pass
|
||||
if opts.bounding != None:
|
||||
try:
|
||||
bounding = open( opts.bounding , 'r' )
|
||||
bounding_regex= re.compile('%%BoundingBox: (?P<bottom_x>[0-9]+) (?P<bottom_y>[0-9]+) (?P<top_x>[0-9]+) (?P<top_y>[0-9]+)')
|
||||
except:
|
||||
print 'Error opening %s' % opts.bounding
|
||||
return 1
|
||||
output_pdf = PdfFileWriter(title=title,author=author)
|
||||
for page_number in range (0, input_pdf.getNumPages() ):
|
||||
page = input_pdf.getPage(page_number)
|
||||
if opts.bounding != None:
|
||||
while True:
|
||||
line=bounding.readline()
|
||||
match=bounding_regex.search(line)
|
||||
if match !=None:
|
||||
break
|
||||
page.mediaBox.upperRight = (match.group('top_x'),match.group('top_y'))
|
||||
page.mediaBox.lowerLeft = (match.group('bottom_x'),match.group('bottom_y'))
|
||||
else:
|
||||
page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x()-opts.top_right_x,page.bleedBox.getUpperRight_y()-opts.top_right_y)
|
||||
page.mediaBox.lowerLeft = (page.bleedBox.getLowerLeft_x()+opts.bottom_left_x,page.bleedBox.getLowerLeft_y()+opts.bottom_left_y)
|
||||
output_pdf.addPage(page)
|
||||
if opts.bounding != None:
|
||||
bounding.close()
|
||||
output_file = file(opts.output, "wb")
|
||||
output_pdf.write(output_file)
|
||||
output_file.close()
|
||||
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -2,8 +2,8 @@
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> \
|
||||
2009, John Schember <john@nachtimwald.com>'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, ' \
|
||||
'2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import errno, os, sys, subprocess
|
||||
|
Loading…
x
Reference in New Issue
Block a user