Use FileWrapper instead of StringIO for bug 2112 fix.

2025-07-09 03:04:10 -04:00 · 2009-04-17 22:29:00 -04:00 · 2009-04-17 22:29:00 -04:00 · 70e1336a90
commit 70e1336a90
parent a2064499e8
2 changed files with 58 additions and 41 deletions
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -246,6 +246,23 @@ class CurrentDir(object):
        os.chdir(self.cwd)


+class FileWrapper(object):
+    '''
+    Used primarily with pyPdf to ensure the stream is properly closed.
+    '''
+
+    def __init__(self, stream):
+        for x in ('read', 'seek', 'tell'):
+            setattr(self, x, getattr(stream, x))
+
+    def __exit__(self, *args):
+        for x in ('read', 'seek', 'tell'):
+            setattr(self, x, None)
+
+    def __enter__(self):
+        return self
+
+
 def detect_ncpus():
    """Detects the number of effective CPUs in the system"""
    try:
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -4,8 +4,9 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''

-import sys, os, StringIO
+import sys, os, cStringIO

+from calibre import FileWrapper
 from calibre.ebooks.metadata import MetaInformation, authors_to_string
 from calibre.ptempfile import TemporaryDirectory
 from pyPdf import PdfFileReader, PdfFileWriter
@ -21,7 +22,6 @@ def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
-    stream = StringIO.StringIO(stream.read())

    if extract_cover and _imagemagick_loaded:
        try:
@ -33,18 +33,19 @@ def get_metadata(stream, extract_cover=True):
            traceback.print_exc()

    try:
-        info = PdfFileReader(stream).getDocumentInfo()
-        if info.title:
-            mi.title = info.title
-        if info.author:
-            src = info.author.split('&')
-            authors = []
-            for au in src:
-                authors += au.split(',')
-            mi.authors = authors
-            mi.author = info.author
-        if info.subject:
-            mi.category = info.subject
+        with FileWrapper(stream) as stream:
+            info = PdfFileReader(stream).getDocumentInfo()
+            if info.title:
+                mi.title = info.title
+            if info.author:
+                src = info.author.split('&')
+                authors = []
+                for au in src:
+                    authors += au.split(',')
+                mi.authors = authors
+                mi.author = info.author
+            if info.subject:
+                mi.category = info.subject
    except Exception, err:
        msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err))
        print >>sys.stderr, msg.encode('utf8')
@ -52,17 +53,17 @@ def get_metadata(stream, extract_cover=True):

 def set_metadata(stream, mi):
    stream.seek(0)
-    # Use a StringIO object for the pdf because we will want to over
+    # Use a cStringIO object for the pdf because we will want to over
    # write it later and if we are working on the stream directly it
    # could cause some issues.
-    raw = StringIO.StringIO(stream.read())
+    raw = cStringIO.StringIO(stream.read())
    orig_pdf = PdfFileReader(raw)
    title = mi.title if mi.title else orig_pdf.documentInfo.title
    author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
    out_pdf = PdfFileWriter(title=title, author=author)
    for page in orig_pdf.pages:
        out_pdf.addPage(page)
-    out_str = StringIO.StringIO()
+    out_str = cStringIO.StringIO()
    out_pdf.write(out_str)
    stream.seek(0)
    stream.truncate()
@ -72,33 +73,32 @@ def set_metadata(stream, mi):

 def get_cover(stream):
    stream.seek(0)
-    if not isinstance(stream, StringIO.StringIO):
-        stream = StringIO.StringIO(stream.read())
    
-    data = StringIO.StringIO()
+    data = cStringIO.StringIO()

    try:
-        pdf = PdfFileReader(stream)
-        output = PdfFileWriter()
-
-        if len(pdf.pages) >= 1:
-            output.addPage(pdf.getPage(0))
-
-        with TemporaryDirectory('_pdfmeta') as tdir:
-            cover_path = os.path.join(tdir, 'cover.pdf')
-
-            outputStream = file(cover_path, "wb")
-            output.write(outputStream)
-            outputStream.close()
-
-            wand = NewMagickWand()
-            MagickReadImage(wand, cover_path)
-            MagickSetImageFormat(wand, 'JPEG')
-            MagickWriteImage(wand, '%s.jpg' % cover_path)
-
-            img = Image.open('%s.jpg' % cover_path)
-
-            img.save(data, 'JPEG')
+        with FileWrapper(stream) as stream:
+            pdf = PdfFileReader(stream)
+            output = PdfFileWriter()
+    
+            if len(pdf.pages) >= 1:
+                output.addPage(pdf.getPage(0))
+    
+            with TemporaryDirectory('_pdfmeta') as tdir:
+                cover_path = os.path.join(tdir, 'cover.pdf')
+    
+                outputStream = file(cover_path, "wb")
+                output.write(outputStream)
+                outputStream.close()
+    
+                wand = NewMagickWand()
+                MagickReadImage(wand, cover_path)
+                MagickSetImageFormat(wand, 'JPEG')
+                MagickWriteImage(wand, '%s.jpg' % cover_path)
+    
+                img = Image.open('%s.jpg' % cover_path)
+    
+                img.save(data, 'JPEG')
    except:
        import traceback
        traceback.print_exc()