mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
IGN:...
This commit is contained in:
commit
1d7e56c9d8
@ -1,3 +1,4 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
|
@ -1,69 +0,0 @@
|
||||
'''
|
||||
Command line interface to run pdf manipulation commands.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import string, sys
|
||||
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from calibre.ebooks.pdf import info, merge, reverse, split, trim
|
||||
|
||||
COMMANDS = {
|
||||
'info' : info,
|
||||
'merge' : merge,
|
||||
'reverse' : reverse,
|
||||
'split' : split,
|
||||
'trim' : trim,
|
||||
}
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('manipulatepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
return c
|
||||
|
||||
def option_parser():
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
|
||||
%prog command ...
|
||||
|
||||
command can be one of the following:
|
||||
[%%commands]
|
||||
|
||||
Use %prog command --help to get more information about a specific command
|
||||
|
||||
Manipulate a PDF.
|
||||
'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))))
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
|
||||
if len(args) < 2:
|
||||
print 'Error: No command sepecified.\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
command = args[1].lower().strip()
|
||||
|
||||
if command in COMMANDS.keys():
|
||||
del args[1]
|
||||
return COMMANDS[command].main(args, command)
|
||||
else:
|
||||
parser.parse_args(args)
|
||||
print 'Unknown command %s.\n' % command
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
# We should never get here.
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
0
src/calibre/ebooks/pdf/manipulate/__init__.py
Normal file
0
src/calibre/ebooks/pdf/manipulate/__init__.py
Normal file
69
src/calibre/ebooks/pdf/manipulate/cli.py
Normal file
69
src/calibre/ebooks/pdf/manipulate/cli.py
Normal file
@ -0,0 +1,69 @@
|
||||
from __future__ import with_statement
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Command line interface to run pdf manipulation commands.
|
||||
'''
|
||||
|
||||
import string, sys
|
||||
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.ebooks.pdf.manipulate import crop, info, merge, reverse, split
|
||||
|
||||
COMMANDS = {
|
||||
'crop' : crop,
|
||||
'info' : info,
|
||||
'merge' : merge,
|
||||
'reverse' : reverse,
|
||||
'split' : split,
|
||||
}
|
||||
|
||||
USAGE = '%prog ' + _('''command ...
|
||||
|
||||
command can be one of the following:
|
||||
[%%commands]
|
||||
|
||||
Use %prog command --help to get more information about a specific command
|
||||
|
||||
Manipulate a PDF.
|
||||
'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', ')))
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser():
|
||||
return OptionParser(usage=USAGE)
|
||||
|
||||
def main(args=sys.argv):
|
||||
log = Log()
|
||||
parser = option_parser()
|
||||
|
||||
if len(args) < 2:
|
||||
print 'Error: No command sepecified.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
command = args[1].lower().strip()
|
||||
|
||||
if command in COMMANDS.keys():
|
||||
del args[1]
|
||||
return COMMANDS[command].main(args, command)
|
||||
else:
|
||||
parser.parse_args(args)
|
||||
print 'Unknown command %s.\n' % command
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
# We should never get here.
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
145
src/calibre/ebooks/pdf/manipulate/crop.py
Normal file
145
src/calibre/ebooks/pdf/manipulate/crop.py
Normal file
@ -0,0 +1,145 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, James Beal <james_@catbus.co.uk>, ' \
|
||||
'2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Crop a pdf file
|
||||
'''
|
||||
|
||||
import os, sys, re
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.ebooks.pdf.verify import is_valid_pdf
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
DEFAULT_CROP = '10'
|
||||
|
||||
USAGE = '%prog %%name ' + _('''
|
||||
[options] file.pdf
|
||||
|
||||
Crop a PDF file.
|
||||
''')
|
||||
|
||||
OPTIONS = set([
|
||||
OptionRecommendation(name='output', recommended_value='cropped.pdf',
|
||||
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
|
||||
help=_('Path to output file. By default a file is created in the current directory.')),
|
||||
OptionRecommendation(name='bottom_left_x', recommended_value=DEFAULT_CROP,
|
||||
level=OptionRecommendation.LOW, long_switch='leftx', short_switch='x',
|
||||
help=_('Number of pixels to crop from the left most x (default is %s) ' % DEFAULT_CROP)),
|
||||
OptionRecommendation(name='bottom_left_y', recommended_value=DEFAULT_CROP,
|
||||
level=OptionRecommendation.LOW, long_switch='lefty', short_switch='y',
|
||||
help=_('Number of pixels to crop from the left most y (default is %s) ' % DEFAULT_CROP)),
|
||||
OptionRecommendation(name='top_right_x', recommended_value=DEFAULT_CROP,
|
||||
level=OptionRecommendation.LOW, long_switch='rightx', short_switch='v',
|
||||
help=_('Number of pixels to crop from the right most x (default is %s) ' % DEFAULT_CROP)),
|
||||
OptionRecommendation(name='top_right_y', recommended_value=DEFAULT_CROP,
|
||||
level=OptionRecommendation.LOW, long_switch='right y', short_switch='w',
|
||||
help=_('Number of pixels to crop from the right most y (default is %s)' % DEFAULT_CROP)),
|
||||
OptionRecommendation(name='bounding', recommended_value=None,
|
||||
level=OptionRecommendation.LOW, long_switch='bounding', short_switch='b',
|
||||
help=_('A file generated by ghostscript which allows each page to be individually cropped `gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox file.pdf 2> bounding`')),
|
||||
])
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser(name):
|
||||
usage = USAGE.replace('%%name', name)
|
||||
return OptionParser(usage=usage)
|
||||
|
||||
def option_recommendation_to_cli_option(add_option, rec):
|
||||
opt = rec.option
|
||||
switches = ['-'+opt.short_switch] if opt.short_switch else []
|
||||
switches.append('--'+opt.long_switch)
|
||||
attrs = dict(dest=opt.name, help=opt.help,
|
||||
choices=opt.choices, default=rec.recommended_value)
|
||||
add_option(Option(*switches, **attrs))
|
||||
|
||||
def add_options(parser):
|
||||
group = OptionGroup(parser, _('Crop Options:'), _('Options to control the transformation of pdf'))
|
||||
parser.add_option_group(group)
|
||||
add_option = group.add_option
|
||||
|
||||
for rec in OPTIONS:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
def crop_pdf(pdf_path, opts, metadata=None):
|
||||
if metadata == None:
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
else:
|
||||
title = metadata.title
|
||||
author = authors_to_string(metadata.authors)
|
||||
|
||||
input_pdf = PdfFileReader(open(pdf_path, 'rb'))
|
||||
|
||||
bounding_lines = []
|
||||
if opts.bounding != None:
|
||||
try:
|
||||
bounding = open(opts.bounding , 'r')
|
||||
bounding_regex = re.compile('%%BoundingBox: (?P<bottom_x>\d+) (?P<bottom_y>\d+) (?P<top_x>\d+) (?P<top_y>\d+)')
|
||||
except:
|
||||
raise Exception('Error reading %s' % opts.bounding)
|
||||
|
||||
lines = bounding.readlines()
|
||||
for line in lines:
|
||||
if line.startswith('%%BoundingBox:'):
|
||||
bounding_lines.append(line)
|
||||
if len(bounding_lines) != input_pdf.numPages:
|
||||
raise Exception('Error bounding file %s page count does not correspond to specified pdf' % opts.bounding)
|
||||
|
||||
output_pdf = PdfFileWriter(title=title,author=author)
|
||||
blines = iter(bounding_lines)
|
||||
for page in input_pdf.pages:
|
||||
if bounding_lines != []:
|
||||
mo = bounding_regex.search(blines.next())
|
||||
if mo == None:
|
||||
raise Exception('Error in bounding file %s' % opts.bounding)
|
||||
page.mediaBox.upperRight = (mo.group('top_x'), mo.group('top_y'))
|
||||
page.mediaBox.lowerLeft = (mo.group('bottom_x'), mo.group('bottom_y'))
|
||||
else:
|
||||
page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x() - opts.top_right_x, page.bleedBox.getUpperRight_y() - opts.top_right_y)
|
||||
page.mediaBox.lowerLeft = (page.bleedBox.getLowerLeft_x() + opts.bottom_left_x, page.bleedBox.getLowerLeft_y() + opts.bottom_left_y)
|
||||
output_pdf.addPage(page)
|
||||
|
||||
with open(opts.output, 'wb') as output_file:
|
||||
output_pdf.write(output_file)
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
add_options(parser)
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 1:
|
||||
print 'Error: A PDF file is required.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
if not is_valid_pdf(args[0]):
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
|
||||
return 1
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
crop_pdf(args[0], opts, mi)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -1,34 +1,38 @@
|
||||
'''
|
||||
Merge PDF files into a single PDF document.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, sys, time
|
||||
'''
|
||||
Merge PDF files into a single PDF document.
|
||||
'''
|
||||
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
import os, re, sys, time
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.ebooks.pdf.verify import is_valid_pdfs
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
USAGE = '%prog %%name ' + _('''
|
||||
file.pdf ...
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('manipulatepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
return c
|
||||
Get info about a PDF.
|
||||
''')
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file.pdf ...
|
||||
|
||||
Get info about a PDF.
|
||||
'''.replace('%%name', name)))
|
||||
usage = USAGE.replace('%%name', name)
|
||||
return OptionParser(usage=usage)
|
||||
|
||||
def print_info(pdf_path):
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
@ -53,32 +57,23 @@ def print_info(pdf_path):
|
||||
print _('PDF Version: %s' % mo.group('version'))
|
||||
except: pass
|
||||
|
||||
def verify_files(files):
|
||||
invalid = []
|
||||
|
||||
for pdf_path in files:
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
except:
|
||||
invalid.append(pdf_path)
|
||||
return invalid
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 1:
|
||||
print 'Error: No PDF sepecified.\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
bad_pdfs = verify_files(args)
|
||||
bad_pdfs = is_valid_pdfs(args)
|
||||
if bad_pdfs != []:
|
||||
for pdf in bad_pdfs:
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||
return 2
|
||||
return 1
|
||||
|
||||
for pdf in args:
|
||||
print_info(pdf)
|
||||
@ -87,4 +82,3 @@ def main(args=sys.argv, name=''):
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
107
src/calibre/ebooks/pdf/manipulate/merge.py
Normal file
107
src/calibre/ebooks/pdf/manipulate/merge.py
Normal file
@ -0,0 +1,107 @@
|
||||
from __future__ import with_statement
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Merge PDF files into a single PDF document.
|
||||
'''
|
||||
|
||||
import os, sys
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.ebooks.pdf.verify import is_valid_pdfs
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
USAGE = '%prog %%name ' + _('''
|
||||
[options] file1.pdf file2.pdf ...
|
||||
|
||||
Metadata will be used from the first PDF specified.
|
||||
|
||||
Merges individual PDFs.
|
||||
''')
|
||||
|
||||
OPTIONS = set([
|
||||
OptionRecommendation(name='output', recommended_value='merged.pdf',
|
||||
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
|
||||
help=_('Path to output file. By default a file is created in the current directory.')),
|
||||
])
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser(name):
|
||||
usage = USAGE.replace('%%name', name)
|
||||
return OptionParser(usage=usage)
|
||||
|
||||
def option_recommendation_to_cli_option(add_option, rec):
|
||||
opt = rec.option
|
||||
switches = ['-'+opt.short_switch] if opt.short_switch else []
|
||||
switches.append('--'+opt.long_switch)
|
||||
attrs = dict(dest=opt.name, help=opt.help,
|
||||
choices=opt.choices, default=rec.recommended_value)
|
||||
add_option(Option(*switches, **attrs))
|
||||
|
||||
def add_options(parser):
|
||||
group = OptionGroup(parser, _('Merge Options:'), _('Options to control the transformation of pdf'))
|
||||
parser.add_option_group(group)
|
||||
add_option = group.add_option
|
||||
|
||||
for rec in OPTIONS:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
def merge_files(in_paths, out_path, metadata=None):
|
||||
if metadata == None:
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
else:
|
||||
title = metadata.title
|
||||
author = authors_to_string(metadata.authors)
|
||||
|
||||
out_pdf = PdfFileWriter(title=title, author=author)
|
||||
|
||||
for pdf_path in in_paths:
|
||||
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||
for page in pdf.pages:
|
||||
out_pdf.addPage(page)
|
||||
|
||||
with open(out_path, 'wb') as out_file:
|
||||
out_pdf.write(out_file)
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
add_options(parser)
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 2:
|
||||
print 'Error: Two or more PDF files are required.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
bad_pdfs = is_valid_pdfs(args)
|
||||
if bad_pdfs != []:
|
||||
for pdf in bad_pdfs:
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||
return 1
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
merge_files(args, opts.output, mi)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
102
src/calibre/ebooks/pdf/manipulate/reverse.py
Normal file
102
src/calibre/ebooks/pdf/manipulate/reverse.py
Normal file
@ -0,0 +1,102 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Reverse content of PDF.
|
||||
'''
|
||||
|
||||
import os, sys
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.ebooks.pdf.verify import is_valid_pdf
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
USAGE = '%prog %%name ' + _('''
|
||||
[options] file.pdf
|
||||
|
||||
Reverse PDF.
|
||||
''')
|
||||
|
||||
OPTIONS = set([
|
||||
OptionRecommendation(name='output', recommended_value='reversed.pdf',
|
||||
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
|
||||
help=_('Path to output file. By default a file is created in the current directory.')),
|
||||
])
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser(name):
|
||||
usage = USAGE.replace('%%name', name)
|
||||
return OptionParser(usage=usage)
|
||||
|
||||
def option_recommendation_to_cli_option(add_option, rec):
|
||||
opt = rec.option
|
||||
switches = ['-'+opt.short_switch] if opt.short_switch else []
|
||||
switches.append('--'+opt.long_switch)
|
||||
attrs = dict(dest=opt.name, help=opt.help,
|
||||
choices=opt.choices, default=rec.recommended_value)
|
||||
add_option(Option(*switches, **attrs))
|
||||
|
||||
def add_options(parser):
|
||||
group = OptionGroup(parser, _('Reverse Options:'), _('Options to control the transformation of pdf'))
|
||||
parser.add_option_group(group)
|
||||
add_option = group.add_option
|
||||
|
||||
for rec in OPTIONS:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
def reverse(pdf_path, out_path, metadata=None):
|
||||
if metadata == None:
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
else:
|
||||
title = metadata.title
|
||||
author = authors_to_string(metadata.authors)
|
||||
|
||||
out_pdf = PdfFileWriter(title=title, author=author)
|
||||
|
||||
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||
for page in reversed(pdf.pages):
|
||||
out_pdf.addPage(page)
|
||||
|
||||
with open(out_path, 'wb') as out_file:
|
||||
out_pdf.write(out_file)
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
add_options(parser)
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 1:
|
||||
print 'Error: A PDF file is required.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
if not is_valid_pdf(args[0]):
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
|
||||
return 1
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
reverse(args[0], opts.output, mi)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -1,46 +1,69 @@
|
||||
'''
|
||||
Split PDF file into multiple PDF documents.
|
||||
'''
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Split PDF file into multiple PDF documents.
|
||||
'''
|
||||
|
||||
import os, sys, re
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.ebooks.pdf.verify import is_valid_pdf
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('splitpdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'], default='split.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory. \
|
||||
The file name will be the base name for the output.'))
|
||||
return c
|
||||
USAGE = _('''
|
||||
%prog %%name [options] file.pdf page_to_split_on ...
|
||||
%prog %%name [options] file.pdf page_range_to_split_on ...
|
||||
|
||||
Ex.
|
||||
|
||||
%prog %%name file.pdf 6
|
||||
%prog %%name file.pdf 6-12
|
||||
%prog %%name file.pdf 6-12 8 10 9-20
|
||||
|
||||
Split a PDF.
|
||||
''')
|
||||
|
||||
OPTIONS = set([
|
||||
OptionRecommendation(name='output', recommended_value='split.pdf',
|
||||
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
|
||||
help=_('Path to output file. By default a file is created in the current directory.')),
|
||||
])
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
usage = USAGE.replace('%%name', name)
|
||||
return OptionParser(usage=usage)
|
||||
|
||||
%prog %%name [options] file.pdf page_to_split_on ...
|
||||
%prog %%name [options] file.pdf page_range_to_split_on ...
|
||||
def option_recommendation_to_cli_option(add_option, rec):
|
||||
opt = rec.option
|
||||
switches = ['-'+opt.short_switch] if opt.short_switch else []
|
||||
switches.append('--'+opt.long_switch)
|
||||
attrs = dict(dest=opt.name, help=opt.help,
|
||||
choices=opt.choices, default=rec.recommended_value)
|
||||
add_option(Option(*switches, **attrs))
|
||||
|
||||
Ex.
|
||||
def add_options(parser):
|
||||
group = OptionGroup(parser, _('Split Options:'), _('Options to control the transformation of pdf'))
|
||||
parser.add_option_group(group)
|
||||
add_option = group.add_option
|
||||
|
||||
%prog %%name file.pdf 6
|
||||
%prog %%name file.pdf 6-12
|
||||
%prog %%name file.pdf 6-12 8 10 9-20
|
||||
|
||||
Split a PDF.
|
||||
'''.replace('%%name', name)))
|
||||
for rec in OPTIONS:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
|
||||
pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
|
||||
@ -141,37 +164,29 @@ def clean_page_list(pdf_path, pages, page_ranges):
|
||||
|
||||
return pages, page_ranges
|
||||
|
||||
# Return True if the pdf is valid.
|
||||
def valid_pdf(pdf_path):
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
if pdf.isEncrypted or pdf.numPages <= 0:
|
||||
raise Exception
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
add_options(parser)
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
|
||||
pdf, pages, page_ranges, unknown = split_args(args[1:])
|
||||
|
||||
if pdf == '' and (pages == [] or page_ranges == []):
|
||||
print 'Error: PDF and where to split is required.\n\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
print 'Error: PDF and where to split is required.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
if unknown != []:
|
||||
for arg in unknown:
|
||||
print 'Error: Unknown argument `%s`' % arg
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
if not valid_pdf(pdf):
|
||||
if not is_valid_pdf(pdf):
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||
return 2
|
||||
return 1
|
||||
|
||||
pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
|
||||
|
||||
@ -183,4 +198,3 @@ def main(args=sys.argv, name=''):
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@ -1,91 +0,0 @@
|
||||
'''
|
||||
Merge PDF files into a single PDF document.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, sys
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('mergepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'], default='merged.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||
return c
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file1.pdf file2.pdf ...
|
||||
|
||||
Merges individual PDFs. Metadata will be used from the first PDF specified.
|
||||
'''.replace('%%name', name)))
|
||||
|
||||
def merge_files(in_paths, out_path, metadata=None):
|
||||
if metadata == None:
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
else:
|
||||
title = metadata.title
|
||||
author = authors_to_string(metadata.authors)
|
||||
|
||||
out_pdf = PdfFileWriter(title=title, author=author)
|
||||
|
||||
for pdf_path in in_paths:
|
||||
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||
for page in pdf.pages:
|
||||
out_pdf.addPage(page)
|
||||
|
||||
with open(out_path, 'wb') as out_file:
|
||||
out_pdf.write(out_file)
|
||||
|
||||
def verify_files(files):
|
||||
invalid = []
|
||||
|
||||
for pdf_path in files:
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
if pdf.isEncrypted or pdf.numPages <= 0:
|
||||
raise Exception
|
||||
except:
|
||||
invalid.append(pdf_path)
|
||||
return invalid
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
parser = option_parser(name)
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 2:
|
||||
print 'Error: Two or more PDF files are required.\n\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
bad_pdfs = verify_files(args)
|
||||
if bad_pdfs != []:
|
||||
for pdf in bad_pdfs:
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||
return 2
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
merge_files(args, opts.output, mi)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@ -2,8 +2,8 @@
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> \
|
||||
2009, John Schember <john@nachtimwald.com>'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, ' \
|
||||
'2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import errno, os, sys, subprocess
|
||||
|
@ -1,88 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Reverse content of PDF.
|
||||
'''
|
||||
|
||||
import os, sys
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('reversepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'], default='reversed.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||
return c
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file1.pdf
|
||||
|
||||
Reverse PDF.
|
||||
'''.replace('%%name', name)))
|
||||
|
||||
def reverse(pdf_path, out_path, metadata=None):
|
||||
if metadata == None:
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
else:
|
||||
title = metadata.title
|
||||
author = authors_to_string(metadata.authors)
|
||||
|
||||
out_pdf = PdfFileWriter(title=title, author=author)
|
||||
|
||||
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||
for page in reversed(pdf.pages):
|
||||
out_pdf.addPage(page)
|
||||
|
||||
with open(out_path, 'wb') as out_file:
|
||||
out_pdf.write(out_file)
|
||||
|
||||
# Return True if the pdf is valid.
|
||||
def valid_pdf(pdf_path):
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
if pdf.isEncrypted or pdf.numPages <= 0:
|
||||
raise Exception
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
parser = option_parser(name)
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 1:
|
||||
print 'Error: A PDF file is required.\n\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
if not valid_pdf(args[0]):
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
|
||||
return 2
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
reverse(args[0], opts.output, mi)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -1,93 +0,0 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, James Beal, james_@catbus.co.uk'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'crop a pdf file'
|
||||
|
||||
import os, sys, re
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
default_crop=10
|
||||
if defaults is None:
|
||||
c = Config('trimpdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||
c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
|
||||
help=_('Number of pixels to crop from the left most x (default is %d) ')%default_crop )
|
||||
c.add_opt('bottom_left_y', [ '-y', '--lefty'], default=default_crop,
|
||||
help=_('Number of pixels to crop from the left most y (default is %d) ')%default_crop )
|
||||
c.add_opt('top_right_x', [ '-v', '--rightx'], default=default_crop,
|
||||
help=_('Number of pixels to crop from the right most x (default is %d) ')%default_crop )
|
||||
c.add_opt('top_right_y', [ '-w', '--righty'], default=default_crop,
|
||||
help=_('Number of pixels to crop from the right most y (default is %d)')%default_crop )
|
||||
c.add_opt('bounding', ['-b', '--bounding'],
|
||||
help=_('A file generated by ghostscript which allows each page to be individually cropped [gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox > bounding] '))
|
||||
return c
|
||||
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file.pdf
|
||||
|
||||
Crops a pdf.
|
||||
'''.replace('%%name', name)))
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
parser = option_parser(name)
|
||||
opts, args = parser.parse_args(args)
|
||||
try:
|
||||
source = os.path.abspath(args[1])
|
||||
input_pdf = PdfFileReader(file(source, "rb"))
|
||||
except:
|
||||
print "Unable to read input"
|
||||
return 2
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
try:
|
||||
info = input_pdf.getDocumentInfo()
|
||||
if info.title:
|
||||
title = info.title
|
||||
if info.author:
|
||||
author = info.author
|
||||
except:
|
||||
pass
|
||||
if opts.bounding != None:
|
||||
try:
|
||||
bounding = open( opts.bounding , 'r' )
|
||||
bounding_regex= re.compile('%%BoundingBox: (?P<bottom_x>[0-9]+) (?P<bottom_y>[0-9]+) (?P<top_x>[0-9]+) (?P<top_y>[0-9]+)')
|
||||
except:
|
||||
print 'Error opening %s' % opts.bounding
|
||||
return 1
|
||||
output_pdf = PdfFileWriter(title=title,author=author)
|
||||
for page_number in range (0, input_pdf.getNumPages() ):
|
||||
page = input_pdf.getPage(page_number)
|
||||
if opts.bounding != None:
|
||||
while True:
|
||||
line=bounding.readline()
|
||||
match=bounding_regex.search(line)
|
||||
if match !=None:
|
||||
break
|
||||
page.mediaBox.upperRight = (match.group('top_x'),match.group('top_y'))
|
||||
page.mediaBox.lowerLeft = (match.group('bottom_x'),match.group('bottom_y'))
|
||||
else:
|
||||
page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x()-opts.top_right_x,page.bleedBox.getUpperRight_y()-opts.top_right_y)
|
||||
page.mediaBox.lowerLeft = (page.bleedBox.getLowerLeft_x()+opts.bottom_left_x,page.bleedBox.getLowerLeft_y()+opts.bottom_left_y)
|
||||
output_pdf.addPage(page)
|
||||
if opts.bounding != None:
|
||||
bounding.close()
|
||||
output_file = file(opts.output, "wb")
|
||||
output_pdf.write(output_file)
|
||||
output_file.close()
|
||||
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
37
src/calibre/ebooks/pdf/verify.py
Normal file
37
src/calibre/ebooks/pdf/verify.py
Normal file
@ -0,0 +1,37 @@
|
||||
from __future__ import with_statement
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Verify PDF files.
|
||||
'''
|
||||
|
||||
import os
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def is_valid_pdf(pdf_path):
|
||||
'''
|
||||
Returns True if the pdf file is valid.
|
||||
'''
|
||||
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_valid_pdfs(pdf_paths):
|
||||
'''
|
||||
Returns a list of invalid pdf files.
|
||||
'''
|
||||
|
||||
invalid = []
|
||||
for pdf_path in pdf_paths:
|
||||
if not is_valid_pdf(pdf_path):
|
||||
invalid.append(pdf_path)
|
||||
return invalid
|
@ -39,7 +39,7 @@ entry_points = {
|
||||
'calibre-fontconfig = calibre.utils.fontconfig:main',
|
||||
'calibre-parallel = calibre.parallel:main',
|
||||
'calibre-customize = calibre.customize.ui:main',
|
||||
'pdfmanipulate = calibre.ebooks.pdf.manipulate:main',
|
||||
'pdfmanipulate = calibre.ebooks.pdf.manipulate.cli:main',
|
||||
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
|
||||
'calibre-smtp = calibre.utils.smtp:main',
|
||||
],
|
||||
|
Loading…
x
Reference in New Issue
Block a user