From 70e1336a90b4bdf7f7a388734d6a58710f1f8b62 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 17 Apr 2009 22:29:00 -0400
Subject: [PATCH] Use FileWrapper instead of StringIO for bug 2112 fix.

---
 src/calibre/__init__.py            | 17 +++++++
 src/calibre/ebooks/metadata/pdf.py | 82 +++++++++++++++---------------
 2 files changed, 58 insertions(+), 41 deletions(-)
diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 807ce1def5..6299bb8782 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -246,6 +246,23 @@ class CurrentDir(object):
         os.chdir(self.cwd)
 
 
+class FileWrapper(object):
+    '''
+    Used primarily with pyPdf to ensure the stream is properly closed.
+    '''
+
+    def __init__(self, stream):
+        for x in ('read', 'seek', 'tell'):
+            setattr(self, x, getattr(stream, x))
+
+    def __exit__(self, *args):
+        for x in ('read', 'seek', 'tell'):
+            setattr(self, x, None)
+
+    def __enter__(self):
+        return self
+
+
 def detect_ncpus():
     """Detects the number of effective CPUs in the system"""
     try:
diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py
index 9946d831af..4dc98509e2 100644
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@@ -4,8 +4,9 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''
 
-import sys, os, StringIO
+import sys, os, cStringIO
 
+from calibre import FileWrapper
 from calibre.ebooks.metadata import MetaInformation, authors_to_string
 from calibre.ptempfile import TemporaryDirectory
 from pyPdf import PdfFileReader, PdfFileWriter
@@ -21,7 +22,6 @@ def get_metadata(stream, extract_cover=True):
     """ Return metadata as a L{MetaInfo} object """
     mi = MetaInformation(_('Unknown'), [_('Unknown')])
     stream.seek(0)
-    stream = StringIO.StringIO(stream.read())
 
     if extract_cover and _imagemagick_loaded:
         try:
@@ -33,18 +33,19 @@ def get_metadata(stream, extract_cover=True):
             traceback.print_exc()
 
     try:
-        info = PdfFileReader(stream).getDocumentInfo()
-        if info.title:
-            mi.title = info.title
-        if info.author:
-            src = info.author.split('&')
-            authors = []
-            for au in src:
-                authors += au.split(',')
-            mi.authors = authors
-            mi.author = info.author
-        if info.subject:
-            mi.category = info.subject
+        with FileWrapper(stream) as stream:
+            info = PdfFileReader(stream).getDocumentInfo()
+            if info.title:
+                mi.title = info.title
+            if info.author:
+                src = info.author.split('&')
+                authors = []
+                for au in src:
+                    authors += au.split(',')
+                mi.authors = authors
+                mi.author = info.author
+            if info.subject:
+                mi.category = info.subject
     except Exception, err:
         msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err))
         print >>sys.stderr, msg.encode('utf8')
@@ -52,17 +53,17 @@ def get_metadata(stream, extract_cover=True):
 
 def set_metadata(stream, mi):
     stream.seek(0)
-    # Use a StringIO object for the pdf because we will want to over
+    # Use a cStringIO object for the pdf because we will want to over
     # write it later and if we are working on the stream directly it
     # could cause some issues.
-    raw = StringIO.StringIO(stream.read())
+    raw = cStringIO.StringIO(stream.read())
     orig_pdf = PdfFileReader(raw)
     title = mi.title if mi.title else orig_pdf.documentInfo.title
     author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
     out_pdf = PdfFileWriter(title=title, author=author)
     for page in orig_pdf.pages:
         out_pdf.addPage(page)
-    out_str = StringIO.StringIO()
+    out_str = cStringIO.StringIO()
     out_pdf.write(out_str)
     stream.seek(0)
     stream.truncate()
@@ -72,33 +73,32 @@ def set_metadata(stream, mi):
 
 def get_cover(stream):
     stream.seek(0)
-    if not isinstance(stream, StringIO.StringIO):
-        stream = StringIO.StringIO(stream.read())
     
-    data = StringIO.StringIO()
+    data = cStringIO.StringIO()
 
     try:
-        pdf = PdfFileReader(stream)
-        output = PdfFileWriter()
-
-        if len(pdf.pages) >= 1:
-            output.addPage(pdf.getPage(0))
-
-        with TemporaryDirectory('_pdfmeta') as tdir:
-            cover_path = os.path.join(tdir, 'cover.pdf')
-
-            outputStream = file(cover_path, "wb")
-            output.write(outputStream)
-            outputStream.close()
-
-            wand = NewMagickWand()
-            MagickReadImage(wand, cover_path)
-            MagickSetImageFormat(wand, 'JPEG')
-            MagickWriteImage(wand, '%s.jpg' % cover_path)
-
-            img = Image.open('%s.jpg' % cover_path)
-
-            img.save(data, 'JPEG')
+        with FileWrapper(stream) as stream:
+            pdf = PdfFileReader(stream)
+            output = PdfFileWriter()
+    
+            if len(pdf.pages) >= 1:
+                output.addPage(pdf.getPage(0))
+    
+            with TemporaryDirectory('_pdfmeta') as tdir:
+                cover_path = os.path.join(tdir, 'cover.pdf')
+    
+                outputStream = file(cover_path, "wb")
+                output.write(outputStream)
+                outputStream.close()
+    
+                wand = NewMagickWand()
+                MagickReadImage(wand, cover_path)
+                MagickSetImageFormat(wand, 'JPEG')
+                MagickWriteImage(wand, '%s.jpg' % cover_path)
+    
+                img = Image.open('%s.jpg' % cover_path)
+    
+                img.save(data, 'JPEG')
     except:
         import traceback
         traceback.print_exc()