This commit is contained in:
Kovid Goyal 2009-04-03 22:30:26 -07:00
commit 1d7e56c9d8
15 changed files with 552 additions and 424 deletions

View File

@ -1,3 +1,4 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

View File

@ -1,69 +0,0 @@
'''
Command line interface to run pdf manipulation commands.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import string, sys
from calibre.utils.config import Config, StringConfig
from calibre.ebooks.pdf import info, merge, reverse, split, trim
COMMANDS = {
'info' : info,
'merge' : merge,
'reverse' : reverse,
'split' : split,
'trim' : trim,
}
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
if defaults is None:
c = Config('manipulatepdf', desc)
else:
c = StringConfig(defaults, desc)
return c
def option_parser():
c = config()
return c.option_parser(usage=_('''\
%prog command ...
command can be one of the following:
[%%commands]
Use %prog command --help to get more information about a specific command
Manipulate a PDF.
'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))))
def main(args=sys.argv):
parser = option_parser()
if len(args) < 2:
print 'Error: No command sepecified.\n'
print parser.get_usage()
return 2
command = args[1].lower().strip()
if command in COMMANDS.keys():
del args[1]
return COMMANDS[command].main(args, command)
else:
parser.parse_args(args)
print 'Unknown command %s.\n' % command
print parser.get_usage()
return 2
# We should never get here.
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,69 @@
from __future__ import with_statement
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Command line interface to run pdf manipulation commands.
'''
import string, sys
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.ebooks.pdf.manipulate import crop, info, merge, reverse, split
COMMANDS = {
'crop' : crop,
'info' : info,
'merge' : merge,
'reverse' : reverse,
'split' : split,
}
USAGE = '%prog ' + _('''command ...
command can be one of the following:
[%%commands]
Use %prog command --help to get more information about a specific command
Manipulate a PDF.
'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', ')))
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
def option_parser():
return OptionParser(usage=USAGE)
def main(args=sys.argv):
log = Log()
parser = option_parser()
if len(args) < 2:
print 'Error: No command sepecified.\n'
print_help(parser, log)
return 1
command = args[1].lower().strip()
if command in COMMANDS.keys():
del args[1]
return COMMANDS[command].main(args, command)
else:
parser.parse_args(args)
print 'Unknown command %s.\n' % command
print_help(parser, log)
return 1
# We should never get here.
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,145 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, James Beal <james_@catbus.co.uk>, ' \
'2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Crop a pdf file
'''
import os, sys, re
from optparse import OptionGroup, Option
from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.ebooks.pdf.verify import is_valid_pdf
from pyPdf import PdfFileWriter, PdfFileReader
DEFAULT_CROP = '10'
USAGE = '%prog %%name ' + _('''
[options] file.pdf
Crop a PDF file.
''')
OPTIONS = set([
OptionRecommendation(name='output', recommended_value='cropped.pdf',
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
help=_('Path to output file. By default a file is created in the current directory.')),
OptionRecommendation(name='bottom_left_x', recommended_value=DEFAULT_CROP,
level=OptionRecommendation.LOW, long_switch='leftx', short_switch='x',
help=_('Number of pixels to crop from the left most x (default is %s) ' % DEFAULT_CROP)),
OptionRecommendation(name='bottom_left_y', recommended_value=DEFAULT_CROP,
level=OptionRecommendation.LOW, long_switch='lefty', short_switch='y',
help=_('Number of pixels to crop from the left most y (default is %s) ' % DEFAULT_CROP)),
OptionRecommendation(name='top_right_x', recommended_value=DEFAULT_CROP,
level=OptionRecommendation.LOW, long_switch='rightx', short_switch='v',
help=_('Number of pixels to crop from the right most x (default is %s) ' % DEFAULT_CROP)),
OptionRecommendation(name='top_right_y', recommended_value=DEFAULT_CROP,
level=OptionRecommendation.LOW, long_switch='right y', short_switch='w',
help=_('Number of pixels to crop from the right most y (default is %s)' % DEFAULT_CROP)),
OptionRecommendation(name='bounding', recommended_value=None,
level=OptionRecommendation.LOW, long_switch='bounding', short_switch='b',
help=_('A file generated by ghostscript which allows each page to be individually cropped `gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox file.pdf 2> bounding`')),
])
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
def option_parser(name):
usage = USAGE.replace('%%name', name)
return OptionParser(usage=usage)
def option_recommendation_to_cli_option(add_option, rec):
opt = rec.option
switches = ['-'+opt.short_switch] if opt.short_switch else []
switches.append('--'+opt.long_switch)
attrs = dict(dest=opt.name, help=opt.help,
choices=opt.choices, default=rec.recommended_value)
add_option(Option(*switches, **attrs))
def add_options(parser):
group = OptionGroup(parser, _('Crop Options:'), _('Options to control the transformation of pdf'))
parser.add_option_group(group)
add_option = group.add_option
for rec in OPTIONS:
option_recommendation_to_cli_option(add_option, rec)
def crop_pdf(pdf_path, opts, metadata=None):
if metadata == None:
title = _('Unknown')
author = _('Unknown')
else:
title = metadata.title
author = authors_to_string(metadata.authors)
input_pdf = PdfFileReader(open(pdf_path, 'rb'))
bounding_lines = []
if opts.bounding != None:
try:
bounding = open(opts.bounding , 'r')
bounding_regex = re.compile('%%BoundingBox: (?P<bottom_x>\d+) (?P<bottom_y>\d+) (?P<top_x>\d+) (?P<top_y>\d+)')
except:
raise Exception('Error reading %s' % opts.bounding)
lines = bounding.readlines()
for line in lines:
if line.startswith('%%BoundingBox:'):
bounding_lines.append(line)
if len(bounding_lines) != input_pdf.numPages:
raise Exception('Error bounding file %s page count does not correspond to specified pdf' % opts.bounding)
output_pdf = PdfFileWriter(title=title,author=author)
blines = iter(bounding_lines)
for page in input_pdf.pages:
if bounding_lines != []:
mo = bounding_regex.search(blines.next())
if mo == None:
raise Exception('Error in bounding file %s' % opts.bounding)
page.mediaBox.upperRight = (mo.group('top_x'), mo.group('top_y'))
page.mediaBox.lowerLeft = (mo.group('bottom_x'), mo.group('bottom_y'))
else:
page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x() - opts.top_right_x, page.bleedBox.getUpperRight_y() - opts.top_right_y)
page.mediaBox.lowerLeft = (page.bleedBox.getLowerLeft_x() + opts.bottom_left_x, page.bleedBox.getLowerLeft_y() + opts.bottom_left_y)
output_pdf.addPage(page)
with open(opts.output, 'wb') as output_file:
output_pdf.write(output_file)
def main(args=sys.argv, name=''):
log = Log()
parser = option_parser(name)
add_options(parser)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 1:
print 'Error: A PDF file is required.\n'
print_help(parser, log)
return 1
if not is_valid_pdf(args[0]):
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
return 1
mi = metadata_from_formats([args[0]])
crop_pdf(args[0], opts, mi)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,34 +1,38 @@
'''
Merge PDF files into a single PDF document.
'''
from __future__ import with_statement
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, re, sys, time
'''
Merge PDF files into a single PDF document.
'''
from calibre.utils.config import Config, StringConfig
import os, re, sys, time
from optparse import OptionGroup, Option
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.ebooks.pdf.verify import is_valid_pdfs
from pyPdf import PdfFileWriter, PdfFileReader
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
if defaults is None:
c = Config('manipulatepdf', desc)
else:
c = StringConfig(defaults, desc)
return c
def option_parser(name):
c = config()
return c.option_parser(usage=_('''\
%prog %%name [options] file.pdf ...
USAGE = '%prog %%name ' + _('''
file.pdf ...
Get info about a PDF.
'''.replace('%%name', name)))
''')
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
def option_parser(name):
usage = USAGE.replace('%%name', name)
return OptionParser(usage=usage)
def print_info(pdf_path):
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
@ -53,32 +57,23 @@ def print_info(pdf_path):
print _('PDF Version: %s' % mo.group('version'))
except: pass
def verify_files(files):
invalid = []
for pdf_path in files:
try:
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
except:
invalid.append(pdf_path)
return invalid
def main(args=sys.argv, name=''):
log = Log()
parser = option_parser(name)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 1:
print 'Error: No PDF sepecified.\n'
print parser.get_usage()
return 2
print_help(parser, log)
return 1
bad_pdfs = verify_files(args)
bad_pdfs = is_valid_pdfs(args)
if bad_pdfs != []:
for pdf in bad_pdfs:
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
return 2
return 1
for pdf in args:
print_info(pdf)
@ -87,4 +82,3 @@ def main(args=sys.argv, name=''):
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,107 @@
from __future__ import with_statement
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Merge PDF files into a single PDF document.
'''
import os, sys
from optparse import OptionGroup, Option
from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.ebooks.pdf.verify import is_valid_pdfs
from pyPdf import PdfFileWriter, PdfFileReader
USAGE = '%prog %%name ' + _('''
[options] file1.pdf file2.pdf ...
Metadata will be used from the first PDF specified.
Merges individual PDFs.
''')
OPTIONS = set([
OptionRecommendation(name='output', recommended_value='merged.pdf',
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
help=_('Path to output file. By default a file is created in the current directory.')),
])
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
def option_parser(name):
usage = USAGE.replace('%%name', name)
return OptionParser(usage=usage)
def option_recommendation_to_cli_option(add_option, rec):
opt = rec.option
switches = ['-'+opt.short_switch] if opt.short_switch else []
switches.append('--'+opt.long_switch)
attrs = dict(dest=opt.name, help=opt.help,
choices=opt.choices, default=rec.recommended_value)
add_option(Option(*switches, **attrs))
def add_options(parser):
group = OptionGroup(parser, _('Merge Options:'), _('Options to control the transformation of pdf'))
parser.add_option_group(group)
add_option = group.add_option
for rec in OPTIONS:
option_recommendation_to_cli_option(add_option, rec)
def merge_files(in_paths, out_path, metadata=None):
if metadata == None:
title = _('Unknown')
author = _('Unknown')
else:
title = metadata.title
author = authors_to_string(metadata.authors)
out_pdf = PdfFileWriter(title=title, author=author)
for pdf_path in in_paths:
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
for page in pdf.pages:
out_pdf.addPage(page)
with open(out_path, 'wb') as out_file:
out_pdf.write(out_file)
def main(args=sys.argv, name=''):
log = Log()
parser = option_parser(name)
add_options(parser)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 2:
print 'Error: Two or more PDF files are required.\n'
print_help(parser, log)
return 1
bad_pdfs = is_valid_pdfs(args)
if bad_pdfs != []:
for pdf in bad_pdfs:
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
return 1
mi = metadata_from_formats([args[0]])
merge_files(args, opts.output, mi)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,102 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Reverse content of PDF.
'''
import os, sys
from optparse import OptionGroup, Option
from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.ebooks.pdf.verify import is_valid_pdf
from pyPdf import PdfFileWriter, PdfFileReader
USAGE = '%prog %%name ' + _('''
[options] file.pdf
Reverse PDF.
''')
OPTIONS = set([
OptionRecommendation(name='output', recommended_value='reversed.pdf',
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
help=_('Path to output file. By default a file is created in the current directory.')),
])
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
def option_parser(name):
usage = USAGE.replace('%%name', name)
return OptionParser(usage=usage)
def option_recommendation_to_cli_option(add_option, rec):
opt = rec.option
switches = ['-'+opt.short_switch] if opt.short_switch else []
switches.append('--'+opt.long_switch)
attrs = dict(dest=opt.name, help=opt.help,
choices=opt.choices, default=rec.recommended_value)
add_option(Option(*switches, **attrs))
def add_options(parser):
group = OptionGroup(parser, _('Reverse Options:'), _('Options to control the transformation of pdf'))
parser.add_option_group(group)
add_option = group.add_option
for rec in OPTIONS:
option_recommendation_to_cli_option(add_option, rec)
def reverse(pdf_path, out_path, metadata=None):
if metadata == None:
title = _('Unknown')
author = _('Unknown')
else:
title = metadata.title
author = authors_to_string(metadata.authors)
out_pdf = PdfFileWriter(title=title, author=author)
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
for page in reversed(pdf.pages):
out_pdf.addPage(page)
with open(out_path, 'wb') as out_file:
out_pdf.write(out_file)
def main(args=sys.argv, name=''):
log = Log()
parser = option_parser(name)
add_options(parser)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 1:
print 'Error: A PDF file is required.\n'
print_help(parser, log)
return 1
if not is_valid_pdf(args[0]):
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
return 1
mi = metadata_from_formats([args[0]])
reverse(args[0], opts.output, mi)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,35 +1,28 @@
'''
Split PDF file into multiple PDF documents.
'''
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Split PDF file into multiple PDF documents.
'''
import os, sys, re
from optparse import OptionGroup, Option
from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.config import Config, StringConfig
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.ebooks.pdf.verify import is_valid_pdf
from pyPdf import PdfFileWriter, PdfFileReader
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
if defaults is None:
c = Config('splitpdf', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('output', ['-o', '--output'], default='split.pdf',
help=_('Path to output file. By default a file is created in the current directory. \
The file name will be the base name for the output.'))
return c
def option_parser(name):
c = config()
return c.option_parser(usage=_('''\
USAGE = _('''
%prog %%name [options] file.pdf page_to_split_on ...
%prog %%name [options] file.pdf page_range_to_split_on ...
@ -40,7 +33,37 @@ def option_parser(name):
%prog %%name file.pdf 6-12 8 10 9-20
Split a PDF.
'''.replace('%%name', name)))
''')
OPTIONS = set([
OptionRecommendation(name='output', recommended_value='split.pdf',
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
help=_('Path to output file. By default a file is created in the current directory.')),
])
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
def option_parser(name):
usage = USAGE.replace('%%name', name)
return OptionParser(usage=usage)
def option_recommendation_to_cli_option(add_option, rec):
opt = rec.option
switches = ['-'+opt.short_switch] if opt.short_switch else []
switches.append('--'+opt.long_switch)
attrs = dict(dest=opt.name, help=opt.help,
choices=opt.choices, default=rec.recommended_value)
add_option(Option(*switches, **attrs))
def add_options(parser):
group = OptionGroup(parser, _('Split Options:'), _('Options to control the transformation of pdf'))
parser.add_option_group(group)
add_option = group.add_option
for rec in OPTIONS:
option_recommendation_to_cli_option(add_option, rec)
def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
@ -141,37 +164,29 @@ def clean_page_list(pdf_path, pages, page_ranges):
return pages, page_ranges
# Return True if the pdf is valid.
def valid_pdf(pdf_path):
try:
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
if pdf.isEncrypted or pdf.numPages <= 0:
raise Exception
except:
return False
return True
def main(args=sys.argv, name=''):
log = Log()
parser = option_parser(name)
add_options(parser)
opts, args = parser.parse_args(args)
pdf, pages, page_ranges, unknown = split_args(args[1:])
if pdf == '' and (pages == [] or page_ranges == []):
print 'Error: PDF and where to split is required.\n\n'
print parser.get_usage()
return 2
print 'Error: PDF and where to split is required.\n'
print_help(parser, log)
return 1
if unknown != []:
for arg in unknown:
print 'Error: Unknown argument `%s`' % arg
print parser.get_usage()
return 2
print_help(parser, log)
return 1
if not valid_pdf(pdf):
if not is_valid_pdf(pdf):
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
return 2
return 1
pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
@ -183,4 +198,3 @@ def main(args=sys.argv, name=''):
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,91 +0,0 @@
'''
Merge PDF files into a single PDF document.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, sys
from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.config import Config, StringConfig
from pyPdf import PdfFileWriter, PdfFileReader
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
if defaults is None:
c = Config('mergepdf', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('output', ['-o', '--output'], default='merged.pdf',
help=_('Path to output file. By default a file is created in the current directory.'))
return c
def option_parser(name):
c = config()
return c.option_parser(usage=_('''\
%prog %%name [options] file1.pdf file2.pdf ...
Merges individual PDFs. Metadata will be used from the first PDF specified.
'''.replace('%%name', name)))
def merge_files(in_paths, out_path, metadata=None):
if metadata == None:
title = _('Unknown')
author = _('Unknown')
else:
title = metadata.title
author = authors_to_string(metadata.authors)
out_pdf = PdfFileWriter(title=title, author=author)
for pdf_path in in_paths:
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
for page in pdf.pages:
out_pdf.addPage(page)
with open(out_path, 'wb') as out_file:
out_pdf.write(out_file)
def verify_files(files):
invalid = []
for pdf_path in files:
try:
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
if pdf.isEncrypted or pdf.numPages <= 0:
raise Exception
except:
invalid.append(pdf_path)
return invalid
def main(args=sys.argv, name=''):
parser = option_parser(name)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 2:
print 'Error: Two or more PDF files are required.\n\n'
print parser.get_usage()
return 2
bad_pdfs = verify_files(args)
if bad_pdfs != []:
for pdf in bad_pdfs:
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
return 2
mi = metadata_from_formats([args[0]])
merge_files(args, opts.output, mi)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -2,8 +2,8 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> \
2009, John Schember <john@nachtimwald.com>'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, ' \
'2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import errno, os, sys, subprocess

View File

@ -1,88 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Reverse content of PDF.
'''
import os, sys
from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.config import Config, StringConfig
from pyPdf import PdfFileWriter, PdfFileReader
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
if defaults is None:
c = Config('reversepdf', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('output', ['-o', '--output'], default='reversed.pdf',
help=_('Path to output file. By default a file is created in the current directory.'))
return c
def option_parser(name):
c = config()
return c.option_parser(usage=_('''\
%prog %%name [options] file1.pdf
Reverse PDF.
'''.replace('%%name', name)))
def reverse(pdf_path, out_path, metadata=None):
if metadata == None:
title = _('Unknown')
author = _('Unknown')
else:
title = metadata.title
author = authors_to_string(metadata.authors)
out_pdf = PdfFileWriter(title=title, author=author)
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
for page in reversed(pdf.pages):
out_pdf.addPage(page)
with open(out_path, 'wb') as out_file:
out_pdf.write(out_file)
# Return True if the pdf is valid.
def valid_pdf(pdf_path):
try:
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
if pdf.isEncrypted or pdf.numPages <= 0:
raise Exception
except:
return False
return True
def main(args=sys.argv, name=''):
parser = option_parser(name)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 1:
print 'Error: A PDF file is required.\n\n'
print parser.get_usage()
return 2
if not valid_pdf(args[0]):
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
return 2
mi = metadata_from_formats([args[0]])
reverse(args[0], opts.output, mi)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,93 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, James Beal, james_@catbus.co.uk'
__docformat__ = 'restructuredtext en'
'crop a pdf file'
import os, sys, re
from calibre.utils.config import Config, StringConfig
from pyPdf import PdfFileWriter, PdfFileReader
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
default_crop=10
if defaults is None:
c = Config('trimpdf', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
help=_('Path to output file. By default a file is created in the current directory.'))
c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
help=_('Number of pixels to crop from the left most x (default is %d) ')%default_crop )
c.add_opt('bottom_left_y', [ '-y', '--lefty'], default=default_crop,
help=_('Number of pixels to crop from the left most y (default is %d) ')%default_crop )
c.add_opt('top_right_x', [ '-v', '--rightx'], default=default_crop,
help=_('Number of pixels to crop from the right most x (default is %d) ')%default_crop )
c.add_opt('top_right_y', [ '-w', '--righty'], default=default_crop,
help=_('Number of pixels to crop from the right most y (default is %d)')%default_crop )
c.add_opt('bounding', ['-b', '--bounding'],
help=_('A file generated by ghostscript which allows each page to be individually cropped [gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox > bounding] '))
return c
def option_parser(name):
c = config()
return c.option_parser(usage=_('''\
%prog %%name [options] file.pdf
Crops a pdf.
'''.replace('%%name', name)))
def main(args=sys.argv, name=''):
parser = option_parser(name)
opts, args = parser.parse_args(args)
try:
source = os.path.abspath(args[1])
input_pdf = PdfFileReader(file(source, "rb"))
except:
print "Unable to read input"
return 2
title = _('Unknown')
author = _('Unknown')
try:
info = input_pdf.getDocumentInfo()
if info.title:
title = info.title
if info.author:
author = info.author
except:
pass
if opts.bounding != None:
try:
bounding = open( opts.bounding , 'r' )
bounding_regex= re.compile('%%BoundingBox: (?P<bottom_x>[0-9]+) (?P<bottom_y>[0-9]+) (?P<top_x>[0-9]+) (?P<top_y>[0-9]+)')
except:
print 'Error opening %s' % opts.bounding
return 1
output_pdf = PdfFileWriter(title=title,author=author)
for page_number in range (0, input_pdf.getNumPages() ):
page = input_pdf.getPage(page_number)
if opts.bounding != None:
while True:
line=bounding.readline()
match=bounding_regex.search(line)
if match !=None:
break
page.mediaBox.upperRight = (match.group('top_x'),match.group('top_y'))
page.mediaBox.lowerLeft = (match.group('bottom_x'),match.group('bottom_y'))
else:
page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x()-opts.top_right_x,page.bleedBox.getUpperRight_y()-opts.top_right_y)
page.mediaBox.lowerLeft = (page.bleedBox.getLowerLeft_x()+opts.bottom_left_x,page.bleedBox.getLowerLeft_y()+opts.bottom_left_y)
output_pdf.addPage(page)
if opts.bounding != None:
bounding.close()
output_file = file(opts.output, "wb")
output_pdf.write(output_file)
output_file.close()
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,37 @@
from __future__ import with_statement
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Verify PDF files.
'''
import os
from pyPdf import PdfFileWriter, PdfFileReader
def is_valid_pdf(pdf_path):
'''
Returns True if the pdf file is valid.
'''
try:
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
except:
return False
return True
def is_valid_pdfs(pdf_paths):
'''
Returns a list of invalid pdf files.
'''
invalid = []
for pdf_path in pdf_paths:
if not is_valid_pdf(pdf_path):
invalid.append(pdf_path)
return invalid

View File

@ -39,7 +39,7 @@ entry_points = {
'calibre-fontconfig = calibre.utils.fontconfig:main',
'calibre-parallel = calibre.parallel:main',
'calibre-customize = calibre.customize.ui:main',
'pdfmanipulate = calibre.ebooks.pdf.manipulate:main',
'pdfmanipulate = calibre.ebooks.pdf.manipulate.cli:main',
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
'calibre-smtp = calibre.utils.smtp:main',
],