From 1f44713c4be2690bf612211fd6a1f932cd698ff9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 21 Dec 2014 09:55:07 +0530 Subject: [PATCH] MOBI Input: Dont crash on corrupted JPG images Use ImageMagick instead of PIL, which handles corrupted JPG files better. Fixes #1404590 [calibre misbehaves when bad jpeg in mobi file](https://bugs.launchpad.net/calibre/+bug/1404590) --- src/calibre/ebooks/metadata/mobi.py | 23 +++++++---------------- src/calibre/ebooks/mobi/reader/mobi6.py | 21 ++++++++------------- 2 files changed, 15 insertions(+), 29 deletions(-) diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index 956c0cef3e..112fd80acb 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -9,7 +9,7 @@ __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \ 'Marshall T. Vandegrift ' __docformat__ = 'restructuredtext en' -import os, cStringIO +import os from struct import pack, unpack from cStringIO import StringIO @@ -467,14 +467,9 @@ def get_metadata(stream): from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader + from calibre.utils.magick.draw import save_cover_data_to from calibre import CurrentDir - try: - from PIL import Image as PILImage - PILImage - except ImportError: - import Image as PILImage - stream.seek(0) try: raw = stream.read(3) @@ -520,13 +515,9 @@ def get_metadata(stream): data = mh.section_data(mh.first_image_index) except: data = '' - buf = cStringIO.StringIO(data) - try: - im = PILImage.open(buf) - except: - log.exception('Failed to read MOBI cover') - else: - obuf = cStringIO.StringIO() - im.convert('RGB').save(obuf, format='JPEG') - mi.cover_data = ('jpg', obuf.getvalue()) + if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}: + try: + mi.cover_data = ('jpg', save_cover_data_to(data, 'cover.jpg', return_data=True)) + except Exception: + log.exception('Failed to read MOBI cover') return mi diff --git a/src/calibre/ebooks/mobi/reader/mobi6.py b/src/calibre/ebooks/mobi/reader/mobi6.py index 9c9ffe1d3a..ba735944c3 100644 --- a/src/calibre/ebooks/mobi/reader/mobi6.py +++ b/src/calibre/ebooks/mobi/reader/mobi6.py @@ -8,12 +8,6 @@ __docformat__ = 'restructuredtext en' import shutil, os, re, struct, textwrap, cStringIO -try: - from PIL import Image as PILImage - PILImage -except ImportError: - import Image as PILImage - from lxml import html, etree from calibre import (xml_entity_to_unicode, entity_to_unicode) @@ -27,6 +21,8 @@ from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator, OPF from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.mobi.reader.headers import BookHeader +from calibre.utils.magick.draw import save_cover_data_to +from calibre.utils.imghdr import what class TopazError(ValueError): pass @@ -851,16 +847,15 @@ class MobiReader(object): # This record is a known non image type, not need to try to # load the image continue - buf = cStringIO.StringIO(data) - try: - im = PILImage.open(buf) - im = im.convert('RGB') - except IOError: - continue path = os.path.join(output_dir, '%05d.jpg' % image_index) + try: + if what(None, data) not in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}: + continue + save_cover_data_to(data, path, minify_to=(10000, 10000)) + except Exception: + continue self.image_names.append(os.path.basename(path)) - im.save(open(path, 'wb'), format='JPEG') def test_mbp_regex(): for raw, m in {