diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index ad59351248..8ff652c01b 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -1,16 +1,37 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' '''Read meta information from PDF files''' -import sys, os, re +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +import sys, os, re, StringIO from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser -from pyPdf import PdfFileReader +from calibre.ptempfile import TemporaryDirectory +from pyPdf import PdfFileReader, PdfFileWriter +import Image +try: + from calibre.utils.PythonMagickWand import \ + NewMagickWand, MagickReadImage, MagickSetImageFormat, MagickWriteImage + _imagemagick_loaded = True +except: + _imagemagick_loaded = False -def get_metadata(stream): +def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) + + if extract_cover and _imagemagick_loaded: + try: + cdata = get_cover(stream) + if cdata is not None: + mi.cover_data = ('jpg', cdata) + except: + import traceback + traceback.print_exc() + try: info = PdfFileReader(stream).getDocumentInfo() if info.title: @@ -45,27 +66,68 @@ def set_metadata(stream, mi): stream.write(raw) stream.seek(0) +def get_cover(stream): + try: + pdf = PdfFileReader(stream) + output = PdfFileWriter() + + if len(pdf.pages) >= 1: + output.addPage(pdf.getPage(0)) + + with TemporaryDirectory('_pdfmeta') as tdir: + cover_path = os.path.join(tdir, 'cover.pdf') + + outputStream = file(cover_path, "wb") + output.write(outputStream) + outputStream.close() + + wand = NewMagickWand() + MagickReadImage(wand, cover_path) + MagickSetImageFormat(wand, 'JPEG') + MagickWriteImage(wand, '%s.jpg' % cover_path) + + img = Image.open('%s.jpg' % cover_path) + + data = StringIO.StringIO() + img.save(data, 'JPEG') + return data.getvalue() + except: + import traceback + traceback.print_exc() + def option_parser(): p = get_parser('pdf') p.remove_option('--category') p.remove_option('--comment') + p.add_option('--get-cover', default=False, action='store_true', + help=_('Extract the cover')) return p def main(args=sys.argv): - #p = option_parser() - #opts, args = p.parse_args(args) - if len(args) != 2: - print >>sys.stderr, _('Usage: pdf-meta file.pdf') - print >>sys.stderr, _('No filename specified.') - return 1 - - stream = open(os.path.abspath(os.path.expanduser(args[1])), 'r+b') - #mi = MetaInformation(opts.title, opts.authors) - #if mi.title or mi.authors: - # set_metadata(stream, mi) - print unicode(get_metadata(stream)).encode('utf-8') - + p = option_parser() + opts, args = p.parse_args(args) + + with open(os.path.abspath(os.path.expanduser(args[1])), 'r+b') as stream: + mi = get_metadata(stream, extract_cover=opts.get_cover) + changed = False + if opts.title: + mi.title = opts.title + changed = True + if opts.authors: + mi.authors = opts.authors.split(',') + changed = True + + if changed: + set_metadata(stream, mi) + print unicode(get_metadata(stream, extract_cover=False)).encode('utf-8') + + if mi.cover_data[1] is not None: + cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg' + with open(cpath, 'wb') as f: + f.write(mi.cover_data[1]) + print 'Cover saved to', f.name + return 0 if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file + sys.exit(main())