Fix bug 1059: First page of pdf used as cover image

This commit is contained in:
John Schember 2009-02-13 07:38:32 -05:00
parent 2f3562ca20
commit 86ad16de68

View File

@ -1,16 +1,37 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from PDF files'''
import sys, os, re
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, os, re, StringIO
from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
from pyPdf import PdfFileReader
from calibre.ptempfile import TemporaryDirectory
from pyPdf import PdfFileReader, PdfFileWriter
import Image
try:
from calibre.utils.PythonMagickWand import \
NewMagickWand, MagickReadImage, MagickSetImageFormat, MagickWriteImage
_imagemagick_loaded = True
except:
_imagemagick_loaded = False
def get_metadata(stream):
def get_metadata(stream, extract_cover=True):
""" Return metadata as a L{MetaInfo} object """
mi = MetaInformation(_('Unknown'), [_('Unknown')])
stream.seek(0)
if extract_cover and _imagemagick_loaded:
try:
cdata = get_cover(stream)
if cdata is not None:
mi.cover_data = ('jpg', cdata)
except:
import traceback
traceback.print_exc()
try:
info = PdfFileReader(stream).getDocumentInfo()
if info.title:
@ -45,27 +66,68 @@ def set_metadata(stream, mi):
stream.write(raw)
stream.seek(0)
def get_cover(stream):
try:
pdf = PdfFileReader(stream)
output = PdfFileWriter()
if len(pdf.pages) >= 1:
output.addPage(pdf.getPage(0))
with TemporaryDirectory('_pdfmeta') as tdir:
cover_path = os.path.join(tdir, 'cover.pdf')
outputStream = file(cover_path, "wb")
output.write(outputStream)
outputStream.close()
wand = NewMagickWand()
MagickReadImage(wand, cover_path)
MagickSetImageFormat(wand, 'JPEG')
MagickWriteImage(wand, '%s.jpg' % cover_path)
img = Image.open('%s.jpg' % cover_path)
data = StringIO.StringIO()
img.save(data, 'JPEG')
return data.getvalue()
except:
import traceback
traceback.print_exc()
def option_parser():
p = get_parser('pdf')
p.remove_option('--category')
p.remove_option('--comment')
p.add_option('--get-cover', default=False, action='store_true',
help=_('Extract the cover'))
return p
def main(args=sys.argv):
#p = option_parser()
#opts, args = p.parse_args(args)
if len(args) != 2:
print >>sys.stderr, _('Usage: pdf-meta file.pdf')
print >>sys.stderr, _('No filename specified.')
return 1
stream = open(os.path.abspath(os.path.expanduser(args[1])), 'r+b')
#mi = MetaInformation(opts.title, opts.authors)
#if mi.title or mi.authors:
# set_metadata(stream, mi)
print unicode(get_metadata(stream)).encode('utf-8')
p = option_parser()
opts, args = p.parse_args(args)
with open(os.path.abspath(os.path.expanduser(args[1])), 'r+b') as stream:
mi = get_metadata(stream, extract_cover=opts.get_cover)
changed = False
if opts.title:
mi.title = opts.title
changed = True
if opts.authors:
mi.authors = opts.authors.split(',')
changed = True
if changed:
set_metadata(stream, mi)
print unicode(get_metadata(stream, extract_cover=False)).encode('utf-8')
if mi.cover_data[1] is not None:
cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg'
with open(cpath, 'wb') as f:
f.write(mi.cover_data[1])
print 'Cover saved to', f.name
return 0
if __name__ == '__main__':
sys.exit(main())
sys.exit(main())