From 70e1336a90b4bdf7f7a388734d6a58710f1f8b62 Mon Sep 17 00:00:00 2001 From: John Schember Date: Fri, 17 Apr 2009 22:29:00 -0400 Subject: [PATCH] Use FileWrapper instead of StringIO for bug 2112 fix. --- src/calibre/__init__.py | 17 +++++++ src/calibre/ebooks/metadata/pdf.py | 82 +++++++++++++++--------------- 2 files changed, 58 insertions(+), 41 deletions(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 807ce1def5..6299bb8782 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -246,6 +246,23 @@ class CurrentDir(object): os.chdir(self.cwd) +class FileWrapper(object): + ''' + Used primarily with pyPdf to ensure the stream is properly closed. + ''' + + def __init__(self, stream): + for x in ('read', 'seek', 'tell'): + setattr(self, x, getattr(stream, x)) + + def __exit__(self, *args): + for x in ('read', 'seek', 'tell'): + setattr(self, x, None) + + def __enter__(self): + return self + + def detect_ncpus(): """Detects the number of effective CPUs in the system""" try: diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index 9946d831af..4dc98509e2 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -4,8 +4,9 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' '''Read meta information from PDF files''' -import sys, os, StringIO +import sys, os, cStringIO +from calibre import FileWrapper from calibre.ebooks.metadata import MetaInformation, authors_to_string from calibre.ptempfile import TemporaryDirectory from pyPdf import PdfFileReader, PdfFileWriter @@ -21,7 +22,6 @@ def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) - stream = StringIO.StringIO(stream.read()) if extract_cover and _imagemagick_loaded: try: @@ -33,18 +33,19 @@ def get_metadata(stream, extract_cover=True): traceback.print_exc() try: - info = PdfFileReader(stream).getDocumentInfo() - if info.title: - mi.title = info.title - if info.author: - src = info.author.split('&') - authors = [] - for au in src: - authors += au.split(',') - mi.authors = authors - mi.author = info.author - if info.subject: - mi.category = info.subject + with FileWrapper(stream) as stream: + info = PdfFileReader(stream).getDocumentInfo() + if info.title: + mi.title = info.title + if info.author: + src = info.author.split('&') + authors = [] + for au in src: + authors += au.split(',') + mi.authors = authors + mi.author = info.author + if info.subject: + mi.category = info.subject except Exception, err: msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err)) print >>sys.stderr, msg.encode('utf8') @@ -52,17 +53,17 @@ def get_metadata(stream, extract_cover=True): def set_metadata(stream, mi): stream.seek(0) - # Use a StringIO object for the pdf because we will want to over + # Use a cStringIO object for the pdf because we will want to over # write it later and if we are working on the stream directly it # could cause some issues. - raw = StringIO.StringIO(stream.read()) + raw = cStringIO.StringIO(stream.read()) orig_pdf = PdfFileReader(raw) title = mi.title if mi.title else orig_pdf.documentInfo.title author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author out_pdf = PdfFileWriter(title=title, author=author) for page in orig_pdf.pages: out_pdf.addPage(page) - out_str = StringIO.StringIO() + out_str = cStringIO.StringIO() out_pdf.write(out_str) stream.seek(0) stream.truncate() @@ -72,33 +73,32 @@ def set_metadata(stream, mi): def get_cover(stream): stream.seek(0) - if not isinstance(stream, StringIO.StringIO): - stream = StringIO.StringIO(stream.read()) - data = StringIO.StringIO() + data = cStringIO.StringIO() try: - pdf = PdfFileReader(stream) - output = PdfFileWriter() - - if len(pdf.pages) >= 1: - output.addPage(pdf.getPage(0)) - - with TemporaryDirectory('_pdfmeta') as tdir: - cover_path = os.path.join(tdir, 'cover.pdf') - - outputStream = file(cover_path, "wb") - output.write(outputStream) - outputStream.close() - - wand = NewMagickWand() - MagickReadImage(wand, cover_path) - MagickSetImageFormat(wand, 'JPEG') - MagickWriteImage(wand, '%s.jpg' % cover_path) - - img = Image.open('%s.jpg' % cover_path) - - img.save(data, 'JPEG') + with FileWrapper(stream) as stream: + pdf = PdfFileReader(stream) + output = PdfFileWriter() + + if len(pdf.pages) >= 1: + output.addPage(pdf.getPage(0)) + + with TemporaryDirectory('_pdfmeta') as tdir: + cover_path = os.path.join(tdir, 'cover.pdf') + + outputStream = file(cover_path, "wb") + output.write(outputStream) + outputStream.close() + + wand = NewMagickWand() + MagickReadImage(wand, cover_path) + MagickSetImageFormat(wand, 'JPEG') + MagickWriteImage(wand, '%s.jpg' % cover_path) + + img = Image.open('%s.jpg' % cover_path) + + img.save(data, 'JPEG') except: import traceback traceback.print_exc()