Do not use ImageMagick in the news download subsystem

This commit is contained in:
Kovid Goyal 2016-05-04 14:54:48 +05:30
parent 57d53a8a3b
commit db47669c10
3 changed files with 77 additions and 67 deletions

View File

@ -23,23 +23,37 @@ def get_exe_path(name):
return os.path.join(base, name) return os.path.join(base, name)
def image_from_data(data): def image_from_data(data):
if isinstance(data, QImage):
return data
i = QImage() i = QImage()
if not i.loadFromData(data): if not i.loadFromData(data):
raise ValueError('Not a valid image') raise ValueError('Not a valid image')
return i return i
def image_to_data(img, compression_quality=95, fmt='JPEG'):
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
fmt = fmt.upper()
if img.hasAlphaChannel() and fmt in 'JPEG JPG'.split():
nimg = QImage(img.size(), QImage.Format_RGB32)
nimg.fill(Qt.white)
p = QPainter(nimg)
p.drawImage(0, 0, img)
p.end()
img = nimg
if not img.save(buf, fmt, quality=compression_quality):
raise ValueError('Failed to export image as ' + fmt)
return ba.data()
def scale_image(data, width=60, height=80, compression_quality=70, as_png=False, preserve_aspect_ratio=True): def scale_image(data, width=60, height=80, compression_quality=70, as_png=False, preserve_aspect_ratio=True):
''' Scale an image, returning it as either JPEG or PNG data (bytestring). ''' Scale an image, returning it as either JPEG or PNG data (bytestring).
Transparency is alpha blended with white when converting to JPEG. Is thread Transparency is alpha blended with white when converting to JPEG. Is thread
safe and does not require a QApplication. ''' safe and does not require a QApplication. '''
# We use Qt instead of ImageMagick here because ImageMagick seems to use # We use Qt instead of ImageMagick here because ImageMagick seems to use
# some kind of memory pool, causing memory consumption to sky rocket. # some kind of memory pool, causing memory consumption to sky rocket.
if isinstance(data, QImage): img = image_from_data(data)
img = data
else:
img = QImage()
if not img.loadFromData(data):
raise ValueError('Could not load image for thumbnail generation')
if preserve_aspect_ratio: if preserve_aspect_ratio:
scaled, nwidth, nheight = fit_image(img.width(), img.height(), width, height) scaled, nwidth, nheight = fit_image(img.width(), img.height(), width, height)
if scaled: if scaled:
@ -47,20 +61,9 @@ def scale_image(data, width=60, height=80, compression_quality=70, as_png=False,
else: else:
if img.width() != width or img.height() != height: if img.width() != width or img.height() != height:
img = img.scaled(width, height, Qt.IgnoreAspectRatio, Qt.SmoothTransformation) img = img.scaled(width, height, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
if not as_png and img.hasAlphaChannel():
nimg = QImage(img.size(), QImage.Format_RGB32)
nimg.fill(Qt.white)
p = QPainter(nimg)
p.drawImage(0, 0, img)
p.end()
img = nimg
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
fmt = 'PNG' if as_png else 'JPEG' fmt = 'PNG' if as_png else 'JPEG'
if not img.save(buf, fmt, quality=compression_quality): w, h = img.width(), img.height()
raise ValueError('Failed to export thumbnail image to: ' + fmt) return w, h, image_to_data(img, compression_quality=compression_quality, fmt=fmt)
return img.width(), img.height(), ba.data()
def run_optimizer(file_path, cmd, as_filter=False, input_data=None): def run_optimizer(file_path, cmd, as_filter=False, input_data=None):

View File

@ -12,16 +12,16 @@ from urllib import url2pathname, quote
from httplib import responses from httplib import responses
from base64 import b64decode from base64 import b64decode
from calibre import browser, relpath, unicode_path, fit_image from calibre import browser, relpath, unicode_path
from calibre.constants import filesystem_encoding, iswindows from calibre.constants import filesystem_encoding, iswindows
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.utils.logging import Log from calibre.utils.logging import Log
from calibre.utils.magick import Image from calibre.utils.img import image_from_data, image_to_data
from calibre.utils.magick.draw import identify_data, thumbnail
from calibre.utils.imghdr import what from calibre.utils.imghdr import what
from calibre.web.fetch.utils import rescale_image
class AbortArticle(Exception): class AbortArticle(Exception):
pass pass
@ -349,41 +349,7 @@ class RecursiveFetcher(object):
ns.replaceWith(src.replace(m.group(1), stylepath)) ns.replaceWith(src.replace(m.group(1), stylepath))
def rescale_image(self, data): def rescale_image(self, data):
orig_w, orig_h, ifmt = identify_data(data) return rescale_image(data, self.scale_news_images, self.compress_news_images_max_size, self.compress_news_images_auto_size)
orig_data = data # save it in case compression fails
if self.scale_news_images is not None:
wmax, hmax = self.scale_news_images
scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
if scale:
data = thumbnail(data, new_w, new_h, compression_quality=95)[-1]
orig_w = new_w
orig_h = new_h
if self.compress_news_images_max_size is None:
if self.compress_news_images_auto_size is None: # not compressing
return data
else:
maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size
else:
maxsizeb = self.compress_news_images_max_size * 1024
scaled_data = data # save it in case compression fails
if len(scaled_data) <= maxsizeb: # no compression required
return scaled_data
img = Image()
quality = 95
img.load(data)
while len(data) >= maxsizeb and quality >= 5:
quality -= 5
img.set_compression_quality(quality)
data = img.export('jpg')
if len(data) >= len(scaled_data): # compression failed
return orig_data if len(orig_data) <= len(scaled_data) else scaled_data
if len(data) >= len(orig_data): # no improvement
return orig_data
return data
def process_images(self, soup, baseurl): def process_images(self, soup, baseurl):
diskpath = unicode_path(os.path.join(self.current_dir, 'images')) diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
@ -430,19 +396,16 @@ class RecursiveFetcher(object):
tag['src'] = imgpath tag['src'] = imgpath
else: else:
try: try:
# Ensure image is valid
img = image_from_data(data)
if itype not in {'png', 'jpg', 'jpeg'}: if itype not in {'png', 'jpg', 'jpeg'}:
itype = 'png' if itype == 'gif' else 'jpg' itype = 'png' if itype == 'gif' else 'jpeg'
im = Image() data = image_to_data(img, fmt=itype)
im.load(data)
data = im.export(itype)
if self.compress_news_images and itype in {'jpg','jpeg'}: if self.compress_news_images and itype in {'jpg','jpeg'}:
try: try:
data = self.rescale_image(data) data = self.rescale_image(img)
except: except Exception:
self.log.exception('failed to compress image '+iurl) self.log.exception('failed to compress image '+iurl)
identify_data(data)
else:
identify_data(data)
# Moon+ apparently cannot handle .jpeg files # Moon+ apparently cannot handle .jpeg files
if itype == 'jpeg': if itype == 'jpeg':
itype = 'jpg' itype = 'jpg'
@ -452,7 +415,7 @@ class RecursiveFetcher(object):
with open(imgpath, 'wb') as x: with open(imgpath, 'wb') as x:
x.write(data) x.write(data)
tag['src'] = imgpath tag['src'] = imgpath
except: except Exception:
traceback.print_exc() traceback.print_exc()
continue continue

View File

@ -0,0 +1,44 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from calibre.utils.img import image_from_data, scale_image, image_to_data
def rescale_image(data, scale_news_images, compress_news_images_max_size, compress_news_images_auto_size):
orig_data = data # save it in case compression fails
img = image_from_data(data)
orig_w, orig_h = img.width(), img.height()
if scale_news_images is not None:
wmax, hmax = scale_news_images
if wmax < orig_w or hmax < orig_h:
orig_w, orig_h, data = scale_image(img, wmax, hmax, compression_quality=95)
if compress_news_images_max_size is None:
if compress_news_images_auto_size is None: # not compressing
return data
maxsizeb = (orig_w * orig_h)/compress_news_images_auto_size
else:
maxsizeb = compress_news_images_max_size * 1024
if len(data) <= maxsizeb: # no compression required
return data
scaled_data = data # save it in case compression fails
quality = 90
while len(data) >= maxsizeb and quality >= 5:
data = image_to_data(image_from_data(scaled_data), compression_quality=quality)
quality -= 5
if len(data) >= len(scaled_data): # compression failed
return orig_data if len(orig_data) <= len(scaled_data) else scaled_data
if len(data) >= len(orig_data): # no improvement
return orig_data
return data
if __name__ == '__main__':
import sys
data = sys.stdin.read()
sys.stdout.write(rescale_image(data, (768, 1024), None, 8))