Do not use ImageMagick in the news download subsystem

2025-07-09 03:04:10 -04:00 · 2016-05-04 14:54:48 +05:30 · 2016-05-04 14:54:48 +05:30 · db47669c10
commit db47669c10
parent 57d53a8a3b
3 changed files with 77 additions and 67 deletions
--- a/src/calibre/utils/img.py
+++ b/src/calibre/utils/img.py
@ -23,23 +23,37 @@ def get_exe_path(name):
    return os.path.join(base, name)
 def image_from_data(data):
    if isinstance(data, QImage):
        return data
    i = QImage()
    if not i.loadFromData(data):
        raise ValueError('Not a valid image')
    return i
 def image_to_data(img, compression_quality=95, fmt='JPEG'):
    ba = QByteArray()
    buf = QBuffer(ba)
    buf.open(QBuffer.WriteOnly)
    fmt = fmt.upper()
    if img.hasAlphaChannel() and fmt in 'JPEG JPG'.split():
        nimg = QImage(img.size(), QImage.Format_RGB32)
        nimg.fill(Qt.white)
        p = QPainter(nimg)
        p.drawImage(0, 0, img)
        p.end()
        img = nimg
    if not img.save(buf, fmt, quality=compression_quality):
        raise ValueError('Failed to export image as ' + fmt)
    return ba.data()
 def scale_image(data, width=60, height=80, compression_quality=70, as_png=False, preserve_aspect_ratio=True):
    ''' Scale an image, returning it as either JPEG or PNG data (bytestring).
    Transparency is alpha blended with white when converting to JPEG. Is thread
    safe and does not require a QApplication. '''
    # We use Qt instead of ImageMagick here because ImageMagick seems to use
    # some kind of memory pool, causing memory consumption to sky rocket.
-    if isinstance(data, QImage):
+    img = image_from_data(data)
        img = data
    else:
        img = QImage()
        if not img.loadFromData(data):
            raise ValueError('Could not load image for thumbnail generation')
    if preserve_aspect_ratio:
        scaled, nwidth, nheight = fit_image(img.width(), img.height(), width, height)
        if scaled:
@ -47,20 +61,9 @@ def scale_image(data, width=60, height=80, compression_quality=70, as_png=False,
    else:
        if img.width() != width or img.height() != height:
            img = img.scaled(width, height, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
    if not as_png and img.hasAlphaChannel():
        nimg = QImage(img.size(), QImage.Format_RGB32)
        nimg.fill(Qt.white)
        p = QPainter(nimg)
        p.drawImage(0, 0, img)
        p.end()
        img = nimg
    ba = QByteArray()
    buf = QBuffer(ba)
    buf.open(QBuffer.WriteOnly)
    fmt = 'PNG' if as_png else 'JPEG'
-    if not img.save(buf, fmt, quality=compression_quality):
+    w, h = img.width(), img.height()
-        raise ValueError('Failed to export thumbnail image to: ' + fmt)
+    return w, h, image_to_data(img, compression_quality=compression_quality, fmt=fmt)
    return img.width(), img.height(), ba.data()
 def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -12,16 +12,16 @@ from urllib import url2pathname, quote
 from httplib import responses
 from base64 import b64decode
-from calibre import browser, relpath, unicode_path, fit_image
+from calibre import browser, relpath, unicode_path
 from calibre.constants import filesystem_encoding, iswindows
 from calibre.utils.filenames import ascii_filename
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
-from calibre.utils.magick import Image
+from calibre.utils.img import image_from_data, image_to_data
 from calibre.utils.magick.draw import identify_data, thumbnail
 from calibre.utils.imghdr import what
 from calibre.web.fetch.utils import rescale_image
 class AbortArticle(Exception):
    pass
@ -349,41 +349,7 @@ class RecursiveFetcher(object):
                        ns.replaceWith(src.replace(m.group(1), stylepath))
    def rescale_image(self, data):
-        orig_w, orig_h, ifmt = identify_data(data)
+        return rescale_image(data, self.scale_news_images, self.compress_news_images_max_size, self.compress_news_images_auto_size)
        orig_data = data  # save it in case compression fails
        if self.scale_news_images is not None:
            wmax, hmax = self.scale_news_images
            scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
            if scale:
                data = thumbnail(data, new_w, new_h, compression_quality=95)[-1]
                orig_w = new_w
                orig_h = new_h
        if self.compress_news_images_max_size is None:
            if self.compress_news_images_auto_size is None:  # not compressing
                return data
            else:
                maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size
        else:
            maxsizeb = self.compress_news_images_max_size * 1024
        scaled_data = data  # save it in case compression fails
        if len(scaled_data) <= maxsizeb:  # no compression required
            return scaled_data
        img = Image()
        quality = 95
        img.load(data)
        while len(data) >= maxsizeb and quality >= 5:
            quality -= 5
            img.set_compression_quality(quality)
            data = img.export('jpg')
        if len(data) >= len(scaled_data):  # compression failed
            return orig_data if len(orig_data) <= len(scaled_data) else scaled_data
        if len(data) >= len(orig_data):  # no improvement
            return orig_data
        return data
    def process_images(self, soup, baseurl):
        diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
@ -430,19 +396,16 @@ class RecursiveFetcher(object):
                tag['src'] = imgpath
            else:
                try:
                    # Ensure image is valid
                    img = image_from_data(data)
                    if itype not in {'png', 'jpg', 'jpeg'}:
-                        itype = 'png' if itype == 'gif' else 'jpg'
+                        itype = 'png' if itype == 'gif' else 'jpeg'
-                        im = Image()
+                        data = image_to_data(img, fmt=itype)
                        im.load(data)
                        data = im.export(itype)
                    if self.compress_news_images and itype in {'jpg','jpeg'}:
                        try:
-                            data = self.rescale_image(data)
+                            data = self.rescale_image(img)
-                        except:
+                        except Exception:
                            self.log.exception('failed to compress image '+iurl)
                            identify_data(data)
                    else:
                        identify_data(data)
                    # Moon+ apparently cannot handle .jpeg files
                    if itype == 'jpeg':
                        itype = 'jpg'
@ -452,7 +415,7 @@ class RecursiveFetcher(object):
                    with open(imgpath, 'wb') as x:
                        x.write(data)
                    tag['src'] = imgpath
-                except:
+                except Exception:
                    traceback.print_exc()
                    continue
--- a/src/calibre/web/fetch/utils.py
+++ b/src/calibre/web/fetch/utils.py
@ -0,0 +1,44 @@
 #!/usr/bin/env python2
 # vim:fileencoding=utf-8
 # License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 from calibre.utils.img import image_from_data, scale_image, image_to_data
 def rescale_image(data, scale_news_images, compress_news_images_max_size, compress_news_images_auto_size):
    orig_data = data  # save it in case compression fails
    img = image_from_data(data)
    orig_w, orig_h = img.width(), img.height()
    if scale_news_images is not None:
        wmax, hmax = scale_news_images
        if wmax < orig_w or hmax < orig_h:
            orig_w, orig_h, data = scale_image(img, wmax, hmax, compression_quality=95)
    if compress_news_images_max_size is None:
        if compress_news_images_auto_size is None:  # not compressing
            return data
        maxsizeb = (orig_w * orig_h)/compress_news_images_auto_size
    else:
        maxsizeb = compress_news_images_max_size * 1024
    if len(data) <= maxsizeb:  # no compression required
        return data
    scaled_data = data  # save it in case compression fails
    quality = 90
    while len(data) >= maxsizeb and quality >= 5:
        data = image_to_data(image_from_data(scaled_data), compression_quality=quality)
        quality -= 5
    if len(data) >= len(scaled_data):  # compression failed
        return orig_data if len(orig_data) <= len(scaled_data) else scaled_data
    if len(data) >= len(orig_data):  # no improvement
        return orig_data
    return data
 if __name__ == '__main__':
    import sys
    data = sys.stdin.read()
    sys.stdout.write(rescale_image(data, (768, 1024), None, 8))