MOBI Input: Dont crash on corrupted JPG images

Use ImageMagick instead of PIL, which handles corrupted JPG files
better. Fixes #1404590 [calibre misbehaves when bad jpeg in mobi file](https://bugs.launchpad.net/calibre/+bug/1404590)
This commit is contained in:
Kovid Goyal 2014-12-21 09:55:07 +05:30
parent 71c11a7de1
commit 1f44713c4b
2 changed files with 15 additions and 29 deletions

View File

@ -9,7 +9,7 @@ __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
'Marshall T. Vandegrift <llasram@gmail.com>' 'Marshall T. Vandegrift <llasram@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, cStringIO import os
from struct import pack, unpack from struct import pack, unpack
from cStringIO import StringIO from cStringIO import StringIO
@ -467,14 +467,9 @@ def get_metadata(stream):
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.headers import MetadataHeader
from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.ebooks.mobi.reader.mobi6 import MobiReader
from calibre.utils.magick.draw import save_cover_data_to
from calibre import CurrentDir from calibre import CurrentDir
try:
from PIL import Image as PILImage
PILImage
except ImportError:
import Image as PILImage
stream.seek(0) stream.seek(0)
try: try:
raw = stream.read(3) raw = stream.read(3)
@ -520,13 +515,9 @@ def get_metadata(stream):
data = mh.section_data(mh.first_image_index) data = mh.section_data(mh.first_image_index)
except: except:
data = '' data = ''
buf = cStringIO.StringIO(data) if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
try: try:
im = PILImage.open(buf) mi.cover_data = ('jpg', save_cover_data_to(data, 'cover.jpg', return_data=True))
except: except Exception:
log.exception('Failed to read MOBI cover') log.exception('Failed to read MOBI cover')
else:
obuf = cStringIO.StringIO()
im.convert('RGB').save(obuf, format='JPEG')
mi.cover_data = ('jpg', obuf.getvalue())
return mi return mi

View File

@ -8,12 +8,6 @@ __docformat__ = 'restructuredtext en'
import shutil, os, re, struct, textwrap, cStringIO import shutil, os, re, struct, textwrap, cStringIO
try:
from PIL import Image as PILImage
PILImage
except ImportError:
import Image as PILImage
from lxml import html, etree from lxml import html, etree
from calibre import (xml_entity_to_unicode, entity_to_unicode) from calibre import (xml_entity_to_unicode, entity_to_unicode)
@ -27,6 +21,8 @@ from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.mobi.reader.headers import BookHeader from calibre.ebooks.mobi.reader.headers import BookHeader
from calibre.utils.magick.draw import save_cover_data_to
from calibre.utils.imghdr import what
class TopazError(ValueError): class TopazError(ValueError):
pass pass
@ -851,16 +847,15 @@ class MobiReader(object):
# This record is a known non image type, not need to try to # This record is a known non image type, not need to try to
# load the image # load the image
continue continue
buf = cStringIO.StringIO(data)
try:
im = PILImage.open(buf)
im = im.convert('RGB')
except IOError:
continue
path = os.path.join(output_dir, '%05d.jpg' % image_index) path = os.path.join(output_dir, '%05d.jpg' % image_index)
try:
if what(None, data) not in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
continue
save_cover_data_to(data, path, minify_to=(10000, 10000))
except Exception:
continue
self.image_names.append(os.path.basename(path)) self.image_names.append(os.path.basename(path))
im.save(open(path, 'wb'), format='JPEG')
def test_mbp_regex(): def test_mbp_regex():
for raw, m in { for raw, m in {