Use FileWrapper instead of StringIO for bug 2112 fix.

This commit is contained in:
John Schember 2009-04-17 22:29:00 -04:00
parent a2064499e8
commit 70e1336a90
2 changed files with 58 additions and 41 deletions

View File

@ -246,6 +246,23 @@ class CurrentDir(object):
os.chdir(self.cwd) os.chdir(self.cwd)
class FileWrapper(object):
'''
Used primarily with pyPdf to ensure the stream is properly closed.
'''
def __init__(self, stream):
for x in ('read', 'seek', 'tell'):
setattr(self, x, getattr(stream, x))
def __exit__(self, *args):
for x in ('read', 'seek', 'tell'):
setattr(self, x, None)
def __enter__(self):
return self
def detect_ncpus(): def detect_ncpus():
"""Detects the number of effective CPUs in the system""" """Detects the number of effective CPUs in the system"""
try: try:

View File

@ -4,8 +4,9 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from PDF files''' '''Read meta information from PDF files'''
import sys, os, StringIO import sys, os, cStringIO
from calibre import FileWrapper
from calibre.ebooks.metadata import MetaInformation, authors_to_string from calibre.ebooks.metadata import MetaInformation, authors_to_string
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from pyPdf import PdfFileReader, PdfFileWriter from pyPdf import PdfFileReader, PdfFileWriter
@ -21,7 +22,6 @@ def get_metadata(stream, extract_cover=True):
""" Return metadata as a L{MetaInfo} object """ """ Return metadata as a L{MetaInfo} object """
mi = MetaInformation(_('Unknown'), [_('Unknown')]) mi = MetaInformation(_('Unknown'), [_('Unknown')])
stream.seek(0) stream.seek(0)
stream = StringIO.StringIO(stream.read())
if extract_cover and _imagemagick_loaded: if extract_cover and _imagemagick_loaded:
try: try:
@ -33,18 +33,19 @@ def get_metadata(stream, extract_cover=True):
traceback.print_exc() traceback.print_exc()
try: try:
info = PdfFileReader(stream).getDocumentInfo() with FileWrapper(stream) as stream:
if info.title: info = PdfFileReader(stream).getDocumentInfo()
mi.title = info.title if info.title:
if info.author: mi.title = info.title
src = info.author.split('&') if info.author:
authors = [] src = info.author.split('&')
for au in src: authors = []
authors += au.split(',') for au in src:
mi.authors = authors authors += au.split(',')
mi.author = info.author mi.authors = authors
if info.subject: mi.author = info.author
mi.category = info.subject if info.subject:
mi.category = info.subject
except Exception, err: except Exception, err:
msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err)) msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err))
print >>sys.stderr, msg.encode('utf8') print >>sys.stderr, msg.encode('utf8')
@ -52,17 +53,17 @@ def get_metadata(stream, extract_cover=True):
def set_metadata(stream, mi): def set_metadata(stream, mi):
stream.seek(0) stream.seek(0)
# Use a StringIO object for the pdf because we will want to over # Use a cStringIO object for the pdf because we will want to over
# write it later and if we are working on the stream directly it # write it later and if we are working on the stream directly it
# could cause some issues. # could cause some issues.
raw = StringIO.StringIO(stream.read()) raw = cStringIO.StringIO(stream.read())
orig_pdf = PdfFileReader(raw) orig_pdf = PdfFileReader(raw)
title = mi.title if mi.title else orig_pdf.documentInfo.title title = mi.title if mi.title else orig_pdf.documentInfo.title
author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
out_pdf = PdfFileWriter(title=title, author=author) out_pdf = PdfFileWriter(title=title, author=author)
for page in orig_pdf.pages: for page in orig_pdf.pages:
out_pdf.addPage(page) out_pdf.addPage(page)
out_str = StringIO.StringIO() out_str = cStringIO.StringIO()
out_pdf.write(out_str) out_pdf.write(out_str)
stream.seek(0) stream.seek(0)
stream.truncate() stream.truncate()
@ -72,33 +73,32 @@ def set_metadata(stream, mi):
def get_cover(stream): def get_cover(stream):
stream.seek(0) stream.seek(0)
if not isinstance(stream, StringIO.StringIO):
stream = StringIO.StringIO(stream.read())
data = StringIO.StringIO() data = cStringIO.StringIO()
try: try:
pdf = PdfFileReader(stream) with FileWrapper(stream) as stream:
output = PdfFileWriter() pdf = PdfFileReader(stream)
output = PdfFileWriter()
if len(pdf.pages) >= 1:
output.addPage(pdf.getPage(0)) if len(pdf.pages) >= 1:
output.addPage(pdf.getPage(0))
with TemporaryDirectory('_pdfmeta') as tdir:
cover_path = os.path.join(tdir, 'cover.pdf') with TemporaryDirectory('_pdfmeta') as tdir:
cover_path = os.path.join(tdir, 'cover.pdf')
outputStream = file(cover_path, "wb")
output.write(outputStream) outputStream = file(cover_path, "wb")
outputStream.close() output.write(outputStream)
outputStream.close()
wand = NewMagickWand()
MagickReadImage(wand, cover_path) wand = NewMagickWand()
MagickSetImageFormat(wand, 'JPEG') MagickReadImage(wand, cover_path)
MagickWriteImage(wand, '%s.jpg' % cover_path) MagickSetImageFormat(wand, 'JPEG')
MagickWriteImage(wand, '%s.jpg' % cover_path)
img = Image.open('%s.jpg' % cover_path)
img = Image.open('%s.jpg' % cover_path)
img.save(data, 'JPEG')
img.save(data, 'JPEG')
except: except:
import traceback import traceback
traceback.print_exc() traceback.print_exc()