Use FileWrapper instead of StringIO for bug 2112 fix.

2025-07-09 03:04:10 -04:00 · 2009-04-17 22:29:00 -04:00 · 2009-04-17 22:29:00 -04:00 · 70e1336a90
commit 70e1336a90
parent a2064499e8
2 changed files with 58 additions and 41 deletions
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -246,6 +246,23 @@ class CurrentDir(object):
        os.chdir(self.cwd)
 class FileWrapper(object):
    '''
    Used primarily with pyPdf to ensure the stream is properly closed.
    '''
    def __init__(self, stream):
        for x in ('read', 'seek', 'tell'):
            setattr(self, x, getattr(stream, x))
    def __exit__(self, *args):
        for x in ('read', 'seek', 'tell'):
            setattr(self, x, None)
    def __enter__(self):
        return self
 def detect_ncpus():
    """Detects the number of effective CPUs in the system"""
    try:
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -4,8 +4,9 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''
-import sys, os, StringIO
+import sys, os, cStringIO
 from calibre import FileWrapper
 from calibre.ebooks.metadata import MetaInformation, authors_to_string
 from calibre.ptempfile import TemporaryDirectory
 from pyPdf import PdfFileReader, PdfFileWriter
@ -21,7 +22,6 @@ def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
    stream = StringIO.StringIO(stream.read())
    if extract_cover and _imagemagick_loaded:
        try:
@ -33,18 +33,19 @@ def get_metadata(stream, extract_cover=True):
            traceback.print_exc()
    try:
-        info = PdfFileReader(stream).getDocumentInfo()
+        with FileWrapper(stream) as stream:
-        if info.title:
+            info = PdfFileReader(stream).getDocumentInfo()
-            mi.title = info.title
+            if info.title:
-        if info.author:
+                mi.title = info.title
-            src = info.author.split('&')
+            if info.author:
-            authors = []
+                src = info.author.split('&')
-            for au in src:
+                authors = []
-                authors += au.split(',')
+                for au in src:
-            mi.authors = authors
+                    authors += au.split(',')
-            mi.author = info.author
+                mi.authors = authors
-        if info.subject:
+                mi.author = info.author
-            mi.category = info.subject
+            if info.subject:
                mi.category = info.subject
    except Exception, err:
        msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err))
        print >>sys.stderr, msg.encode('utf8')
@ -52,17 +53,17 @@ def get_metadata(stream, extract_cover=True):
 def set_metadata(stream, mi):
    stream.seek(0)
-    # Use a StringIO object for the pdf because we will want to over
+    # Use a cStringIO object for the pdf because we will want to over
    # write it later and if we are working on the stream directly it
    # could cause some issues.
-    raw = StringIO.StringIO(stream.read())
+    raw = cStringIO.StringIO(stream.read())
    orig_pdf = PdfFileReader(raw)
    title = mi.title if mi.title else orig_pdf.documentInfo.title
    author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
    out_pdf = PdfFileWriter(title=title, author=author)
    for page in orig_pdf.pages:
        out_pdf.addPage(page)
-    out_str = StringIO.StringIO()
+    out_str = cStringIO.StringIO()
    out_pdf.write(out_str)
    stream.seek(0)
    stream.truncate()
@ -72,33 +73,32 @@ def set_metadata(stream, mi):
 def get_cover(stream):
    stream.seek(0)
    if not isinstance(stream, StringIO.StringIO):
        stream = StringIO.StringIO(stream.read())
-    data = StringIO.StringIO()
+    data = cStringIO.StringIO()
    try:
-        pdf = PdfFileReader(stream)
+        with FileWrapper(stream) as stream:
-        output = PdfFileWriter()
+            pdf = PdfFileReader(stream)
-
+            output = PdfFileWriter()
-        if len(pdf.pages) >= 1:
+    
-            output.addPage(pdf.getPage(0))
+            if len(pdf.pages) >= 1:
-
+                output.addPage(pdf.getPage(0))
-        with TemporaryDirectory('_pdfmeta') as tdir:
+    
-            cover_path = os.path.join(tdir, 'cover.pdf')
+            with TemporaryDirectory('_pdfmeta') as tdir:
-
+                cover_path = os.path.join(tdir, 'cover.pdf')
-            outputStream = file(cover_path, "wb")
+    
-            output.write(outputStream)
+                outputStream = file(cover_path, "wb")
-            outputStream.close()
+                output.write(outputStream)
-
+                outputStream.close()
-            wand = NewMagickWand()
+    
-            MagickReadImage(wand, cover_path)
+                wand = NewMagickWand()
-            MagickSetImageFormat(wand, 'JPEG')
+                MagickReadImage(wand, cover_path)
-            MagickWriteImage(wand, '%s.jpg' % cover_path)
+                MagickSetImageFormat(wand, 'JPEG')
-
+                MagickWriteImage(wand, '%s.jpg' % cover_path)
-            img = Image.open('%s.jpg' % cover_path)
+    
-
+                img = Image.open('%s.jpg' % cover_path)
-            img.save(data, 'JPEG')
+    
                img.save(data, 'JPEG')
    except:
        import traceback
        traceback.print_exc()