New pdf manipulate commands. Remove old option parser from pdf metadata.

This commit is contained in:
John Schember 2009-04-04 10:37:00 -04:00
parent 697eabe9ae
commit 08971e8316
11 changed files with 274 additions and 54 deletions

View File

@ -7,7 +7,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, os, re, StringIO
from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
from calibre.ebooks.metadata import MetaInformation, authors_to_string
from calibre.ptempfile import TemporaryDirectory
from pyPdf import PdfFileReader, PdfFileWriter
import Image
@ -96,40 +96,3 @@ def get_cover(stream):
traceback.print_exc()
return data.getvalue()
def option_parser():
p = get_parser('pdf')
p.remove_option('--category')
p.remove_option('--comment')
p.add_option('--get-cover', default=False, action='store_true',
help=_('Extract the cover'))
return p
def main(args=sys.argv):
p = option_parser()
opts, args = p.parse_args(args)
with open(os.path.abspath(os.path.expanduser(args[1])), 'r+b') as stream:
mi = get_metadata(stream, extract_cover=opts.get_cover)
changed = False
if opts.title:
mi.title = opts.title
changed = True
if opts.authors:
mi.authors = opts.authors.split(',')
changed = True
if changed:
set_metadata(stream, mi)
print unicode(get_metadata(stream, extract_cover=False)).encode('utf-8')
if mi.cover_data[1] is not None:
cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg'
with open(cpath, 'wb') as f:
f.write(mi.cover_data[1])
print 'Cover saved to', f.name
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -15,10 +15,13 @@ from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.ebooks.pdf.manipulate import crop, info, merge, reverse, split
from calibre.ebooks.pdf.manipulate import crop, decrypt, encrypt, \
info, merge, reverse, split
COMMANDS = {
'crop' : crop,
'decrypt' : decrypt,
'encrypt' : encrypt,
'info' : info,
'merge' : merge,
'reverse' : reverse,

View File

@ -25,7 +25,7 @@ from pyPdf import PdfFileWriter, PdfFileReader
DEFAULT_CROP = '10'
USAGE = '%prog %%name ' + _('''
USAGE = '\n%prog %%name ' + _('''\
[options] file.pdf
Crop a PDF file.
@ -132,7 +132,11 @@ def main(args=sys.argv, name=''):
return 1
if not is_valid_pdf(args[0]):
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
print 'Error: Could not read file `%s`.' % args[0]
return 1
if is_encrypted(args[0]):
print 'Error: file `%s` is encrypted.' % args[0]
return 1
mi = metadata_from_formats([args[0]])

View File

@ -0,0 +1,115 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Decrypt content of PDF.
'''
import os, sys
from optparse import OptionGroup, Option
from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
from pyPdf import PdfFileWriter, PdfFileReader
USAGE = '\n%prog %%name ' + _('''\
[options] file.pdf password
Decrypt a PDF.
''')
OPTIONS = set([
OptionRecommendation(name='output', recommended_value='decrypted.pdf',
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
help=_('Path to output file. By default a file is created in the current directory.')),
])
class DecryptionError(Exception):
def __init__(self, pdf_path):
self.value = 'Unable to decrypt file `%s`.' % value
def __str__(self):
return repr(self.value)
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
def option_parser(name):
usage = USAGE.replace('%%name', name)
return OptionParser(usage=usage)
def option_recommendation_to_cli_option(add_option, rec):
opt = rec.option
switches = ['-'+opt.short_switch] if opt.short_switch else []
switches.append('--'+opt.long_switch)
attrs = dict(dest=opt.name, help=opt.help,
choices=opt.choices, default=rec.recommended_value)
add_option(Option(*switches, **attrs))
def add_options(parser):
group = OptionGroup(parser, _('Decrypt Options:'), _('Options to control the transformation of pdf'))
parser.add_option_group(group)
add_option = group.add_option
for rec in OPTIONS:
option_recommendation_to_cli_option(add_option, rec)
def decrypt(pdf_path, out_path, password):
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
if pdf.decrypt(str(password)) == 0:
raise DecryptionError(pdf_path)
title = pdf.documentInfo.title if pdf.documentInfo.title else _('Unknown')
author = pdf.documentInfo.author if pdf.documentInfo.author else _('Unknown')
out_pdf = PdfFileWriter(title=title, author=author)
for page in pdf.pages:
out_pdf.addPage(page)
with open(out_path, 'wb') as out_file:
out_pdf.write(out_file)
def main(args=sys.argv, name=''):
log = Log()
parser = option_parser(name)
add_options(parser)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 2:
print 'Error: A PDF file and decryption password is required.\n'
print_help(parser, log)
return 1
if not is_valid_pdf(args[0]):
print 'Error: Could not read file `%s`.' % args[0]
return 1
if not is_encrypted(args[0]):
print 'Error: file `%s` is not encrypted.' % args[0]
return 1
try:
decrypt(args[0], opts.output, args[1])
except DecryptionError, e:
print e.value
return 1
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,105 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Encrypt a PDF.
'''
import os, sys
from optparse import OptionGroup, Option
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
from pyPdf import PdfFileWriter, PdfFileReader
USAGE = '\n%prog %%name ' + _('''\
[options] file.pdf password
Encrypt a PDF.
''')
OPTIONS = set([
OptionRecommendation(name='output', recommended_value='encrypted.pdf',
level=OptionRecommendation.HIGH, long_switch='output', short_switch='o',
help=_('Path to output file. By default a file is created in the current directory.')),
])
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
def option_parser(name):
usage = USAGE.replace('%%name', name)
return OptionParser(usage=usage)
def option_recommendation_to_cli_option(add_option, rec):
opt = rec.option
switches = ['-'+opt.short_switch] if opt.short_switch else []
switches.append('--'+opt.long_switch)
attrs = dict(dest=opt.name, help=opt.help,
choices=opt.choices, default=rec.recommended_value)
add_option(Option(*switches, **attrs))
def add_options(parser):
group = OptionGroup(parser, _('Encrypt Options:'), _('Options to control the transformation of pdf'))
parser.add_option_group(group)
add_option = group.add_option
for rec in OPTIONS:
option_recommendation_to_cli_option(add_option, rec)
def encrypt(pdf_path, out_path, password, metadata=None):
if metadata == None:
title = _('Unknown')
author = _('Unknown')
else:
title = metadata.title
author = authors_to_string(metadata.authors)
out_pdf = PdfFileWriter(title=title, author=author)
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
for page in pdf.pages:
out_pdf.addPage(page)
with open(out_path, 'wb') as out_file:
out_pdf.encrypt(str(password))
out_pdf.write(out_file)
def main(args=sys.argv, name=''):
log = Log()
parser = option_parser(name)
add_options(parser)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 2:
print 'Error: A PDF file and decryption password is required.\n'
print_help(parser, log)
return 1
if not is_valid_pdf(args[0]):
print 'Error: Could not read file `%s`.' % args[0]
return 1
if is_encrypted(args[0]):
print 'Error: file `%s` is already encrypted.' % args[0]
return 1
mi = metadata_from_formats([args[0]])
encrypt(args[0], opts.output, args[1], mi)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -16,11 +16,11 @@ from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.ebooks.pdf.verify import is_valid_pdfs
from calibre.ebooks.pdf.verify import is_valid_pdfs, is_encrypted
from pyPdf import PdfFileWriter, PdfFileReader
USAGE = '%prog %%name ' + _('''
USAGE = '\n%prog %%name ' + _('''\
file.pdf ...
Get info about a PDF.
@ -72,9 +72,17 @@ def main(args=sys.argv, name=''):
bad_pdfs = is_valid_pdfs(args)
if bad_pdfs != []:
for pdf in bad_pdfs:
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
print 'Error: Could not read file `%s`.' % pdf
return 1
enc = False
for pdf in args:
if is_encrypted(pdf):
enc = True
print 'Error: file `%s` is encrypted. Please decrypt first.' % pdf
if enc:
return 1
for pdf in args:
print_info(pdf)

View File

@ -22,7 +22,7 @@ from calibre.ebooks.pdf.verify import is_valid_pdfs
from pyPdf import PdfFileWriter, PdfFileReader
USAGE = '%prog %%name ' + _('''
USAGE = '\n%prog %%name ' + _('''\
[options] file1.pdf file2.pdf ...
Metadata will be used from the first PDF specified.
@ -94,9 +94,17 @@ def main(args=sys.argv, name=''):
bad_pdfs = is_valid_pdfs(args)
if bad_pdfs != []:
for pdf in bad_pdfs:
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
print 'Error: Could not read file `%s`.' % pdf
return 1
enc = False
for pdf in args:
if is_encrypted(pdf):
enc = True
print 'Error: file `%s` is encrypted.' % pdf
if enc:
return 1
mi = metadata_from_formats([args[0]])
merge_files(args, opts.output, mi)

View File

@ -22,10 +22,10 @@ from calibre.ebooks.pdf.verify import is_valid_pdf
from pyPdf import PdfFileWriter, PdfFileReader
USAGE = '%prog %%name ' + _('''
USAGE = '\n%prog %%name ' + _('''\
[options] file.pdf
Reverse PDF.
Reverse a PDF.
''')
OPTIONS = set([
@ -89,7 +89,11 @@ def main(args=sys.argv, name=''):
return 1
if not is_valid_pdf(args[0]):
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
print 'Error: Could not read file `%s`.' % args[0]
return 1
if is_encrypted(args[0]):
print 'Error: file `%s` is encrypted.' % args[0]
return 1
mi = metadata_from_formats([args[0]])

View File

@ -185,7 +185,11 @@ def main(args=sys.argv, name=''):
return 1
if not is_valid_pdf(pdf):
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
print 'Error: Could not read file `%s`.' % pdf
return 1
if is_encrypted(args[0]):
print 'Error: file `%s` is encrypted.' % args[0]
return 1
pages, page_ranges = clean_page_list(pdf, pages, page_ranges)

View File

@ -35,3 +35,10 @@ def is_valid_pdfs(pdf_paths):
if not is_valid_pdf(pdf_path):
invalid.append(pdf_path)
return invalid
def is_encrypted(pdf_path):
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
if pdf.isEncrypted:
return True
return False

View File

@ -10,7 +10,6 @@ import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.markdown import markdown
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.metadata import MetaInformation
#from calibre.ebooks.metadata.meta import metadata_from_formats
class TXTInput(InputFormatPlugin):
@ -32,7 +31,7 @@ class TXTInput(InputFormatPlugin):
index.write(html.encode('utf-8'))
#mi = metadata_from_formats([stream.name])
mi = MetaInformation(_('Unknown'), _('Unknown'))
mi = None
opf = OPFCreator(os.getcwd(), mi)
opf.create_manifest([('index.html', None)])
opf.create_spine(['index.html'])