diff --git a/src/calibre/ebooks/oeb/polish/images.py b/src/calibre/ebooks/oeb/polish/images.py new file mode 100644 index 0000000000..26a2488bfa --- /dev/null +++ b/src/calibre/ebooks/oeb/polish/images.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal + +from __future__ import (unicode_literals, division, absolute_import, + print_function) +import os +from functools import partial +from threading import Thread +from Queue import Queue, Empty + +from calibre import detect_ncpus, human_readable + +class Worker(Thread): + + daemon = True + + def __init__(self, name, queue, results, container, jpeg_quality): + Thread.__init__(self, name=name) + self.queue, self.results, self.container = queue, results, container + self.jpeg_quality = jpeg_quality + self.start() + + def run(self): + while True: + try: + name = self.queue.get_nowait() + except Empty: + break + try: + self.compress(name) + except Exception: + import traceback + self.results[name] = (False, traceback.format_exc()) + finally: + self.queue.task_done() + + def compress(self, name): + from calibre.utils.img import optimize_png, optimize_jpeg, encode_jpeg + mt = self.container.mime_map[name] + if 'png' in mt: + func = optimize_png + elif self.jpeg_quality is None: + func = optimize_jpeg + else: + func = partial(encode_jpeg, quality=self.jpeg_quality) + path = self.container.get_file_path_for_processing(name) + before = os.path.getsize(path) + func(path) + after = os.path.getsize(path) + self.results[name] = (True, (before, after)) + + +def compress_images(container, report=None, names=None, jpeg_quality=None): + mt_map = container.manifest_type_map + images = set() + for mt in 'png jpg jpeg'.split(): + images |= set(mt_map.get('image/' + mt, ())) + if names is not None: + images &= set(names) + results = {} + queue = Queue() + for name in images: + queue.put(name) + [Worker('CompressImage%d' % i, queue, results, container, jpeg_quality) for i in xrange(min(detect_ncpus(), len(images)))] + queue.join() + before_total = after_total = 0 + for name, (ok, res) in results.iteritems(): + if ok: + before, after = res + if before != after: + before_total += before + after_total += after + if report: + report(_('{0} compressed from {1} to {2} bytes [{3:.1%}]').format( + name, human_readable(before), human_readable(after), after/before)) + else: + report(_('Failed to process {0} with error:').format(name)) + report(res) + if report: + if before_total > 0: + report('') + report(_('Total image filesize reduced from {0} to {1} [{2:.1%}]').format( + human_readable(before_total), human_readable(after_total), after_total/before_total)) + else: + report(_('Images are already fully optimized')) + return before_total > 0, results diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index 40c0824eda..4aeb099f9f 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -14,6 +14,7 @@ from functools import partial from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.stats import StatsCollector from calibre.ebooks.oeb.polish.subset import subset_all_fonts +from calibre.ebooks.oeb.polish.images import compress_images from calibre.ebooks.oeb.polish.embed import embed_all_fonts from calibre.ebooks.oeb.polish.cover import set_cover from calibre.ebooks.oeb.polish.replace import smarten_punctuation @@ -31,6 +32,7 @@ ALL_OPTS = { 'remove_jacket':False, 'smarten_punctuation':False, 'remove_unused_css':False, + 'compress_images': False, } CUSTOMIZATION = { @@ -103,6 +105,12 @@ created from production templates can have a large number of extra CSS rules that dont match any actual content. These extra rules can slow down readers that need to parse them all.

'''), + +'compress_images': _('''\ +

Losslessly compress images in the book, to reduce the filesize, without +affecting image quality.

+'''), + } def hfix(name, raw): @@ -203,6 +211,12 @@ def polish_one(ebook, opts, report, customization=None): changed = True report('') + if opts.compress_images: + rt(_('Losslessly compressing images')) + if compress_images(ebook, report)[0]: + changed = True + report('') + return changed @@ -265,6 +279,7 @@ def option_parser(): o('--remove-jacket', help=CLI_HELP['remove_jacket']) o('--smarten-punctuation', '-p', help=CLI_HELP['smarten_punctuation']) o('--remove-unused-css', '-u', help=CLI_HELP['remove_unused_css']) + o('--compress-images', '-i', help=CLI_HELP['compress_images']) o('--verbose', help=_('Produce more verbose output, useful for debugging.')) diff --git a/src/calibre/gui2/actions/polish.py b/src/calibre/gui2/actions/polish.py index 680c315852..f7245819fc 100644 --- a/src/calibre/gui2/actions/polish.py +++ b/src/calibre/gui2/actions/polish.py @@ -67,6 +67,7 @@ class Polish(QDialog): # {{{ 'jacket':_('

Book Jacket

%s')%HELP['jacket'], 'remove_jacket':_('

Remove Book Jacket

%s')%HELP['remove_jacket'], 'remove_unused_css':_('

Remove unused CSS rules

%s')%HELP['remove_unused_css'], + 'compress_images': _('

Losslessly compress images

%s') % HELP['compress_images'], } self.l = l = QGridLayout() @@ -85,6 +86,7 @@ class Polish(QDialog): # {{{ ('jacket', _('Add/Replace metadata as a "book &jacket" page')), ('remove_jacket', _('&Remove a previously inserted book jacket')), ('remove_unused_css', _('Remove &unused CSS rules from the book')), + ('compress_images', _('Losslessly compress images')), ]) prefs = gprefs.get('polishing_settings', {}) for name, text in self.all_actions.iteritems(): diff --git a/src/calibre/utils/img.py b/src/calibre/utils/img.py index 605305496c..bd91b64f38 100644 --- a/src/calibre/utils/img.py +++ b/src/calibre/utils/img.py @@ -4,10 +4,23 @@ from __future__ import (unicode_literals, division, absolute_import, print_function) +import os, subprocess, errno, shutil, tempfile +from threading import Thread from PyQt5.Qt import QImage, QByteArray, QBuffer, Qt, QPainter -from calibre import fit_image +from calibre import fit_image, force_unicode +from calibre.constants import iswindows +from calibre.utils.filenames import atomic_rename + +def get_exe_path(name): + from calibre.ebooks.pdf.pdftohtml import PDFTOHTML + base = os.path.dirname(PDFTOHTML) + if iswindows: + name += '-calibre.exe' + if not base: + return name + return os.path.join(base, name) def image_from_data(data): i = QImage() @@ -48,3 +61,80 @@ def scale_image(data, width=60, height=80, compression_quality=70, as_png=False, if not img.save(buf, fmt, quality=compression_quality): raise ValueError('Failed to export thumbnail image to: ' + fmt) return img.width(), img.height(), ba.data() + + +def run_optimizer(file_path, cmd, as_filter=False, input_data=None): + file_path = os.path.abspath(file_path) + cwd = os.path.dirname(file_path) + fd, outfile = tempfile.mkstemp(dir=cwd) + try: + if as_filter: + outf = os.fdopen(fd, 'wb') + else: + os.close(fd) + iname, oname = os.path.basename(file_path), os.path.basename(outfile) + def repl(q, r): + cmd[cmd.index(q)] = r + if not as_filter: + repl(True, iname), repl(False, oname) + if iswindows: + # subprocess in python 2 cannot handle unicode strings that are not + # encodeable in mbcs, so we fail here, where it is more explicit, + # instead. + cmd = [x.encode('mbcs') if isinstance(x, type('')) else x for x in cmd] + if isinstance(cwd, type('')): + cwd = cwd.encode('mbcs') + stdin = subprocess.PIPE if as_filter else None + stderr = subprocess.PIPE if as_filter else subprocess.STDOUT + p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin) + stderr = p.stderr if as_filter else p.stdout + if as_filter: + src = input_data or open(file_path, 'rb') + def copy(src, dest): + try: + shutil.copyfileobj(src, dest) + finally: + src.close(), dest.close() + inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin)) + inw.daemon = True + inw.start() + outw = Thread('CopyOutput', target=copy, args=(p.stdout, outf)) + outw.daemon = True + outw.start() + raw = force_unicode(stderr.read()) + if p.wait() != 0: + return raw + else: + shutil.copystat(file_path, outfile) + atomic_rename(outfile, file_path) + finally: + try: + os.remove(outfile) + except EnvironmentError as err: + if err.errno != errno.ENOENT: + raise + +def optimize_jpeg(file_path): + exe = get_exe_path('jpegtran') + cmd = [exe] + '-copy none -optimize -progressive -maxmemory 100M -outfile'.split() + [False, True] + return run_optimizer(file_path, cmd) + +def optimize_png(file_path): + exe = get_exe_path('optipng') + cmd = [exe] + '-fix -clobber -strip all -o7 -out'.split() + [False, True] + return run_optimizer(file_path, cmd) + +def encode_jpeg(file_path, quality=80): + from calibre.srv.utils import ReadOnlyFileBuffer + quality = max(0, min(100, int(quality))) + exe = get_exe_path('cjpeg') + cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [str(quality)] + img = QImage() + if not img.load(file_path): + raise ValueError('%s is not a valid image file' % file_path) + ba = QByteArray() + buf = QBuffer(ba) + buf.open(QBuffer.WriteOnly) + if not img.save(buf, 'PPM'): + raise ValueError('Failed to export image to PPM') + return run_optimizer(file_path, cmd, as_filter=True, input_data=ReadOnlyFileBuffer(ba.data)) diff --git a/src/pyj/srv.pyj b/src/pyj/srv.pyj index e7fa32610c..2053ef60fe 100644 --- a/src/pyj/srv.pyj +++ b/src/pyj/srv.pyj @@ -44,6 +44,7 @@ def load_book_list(): def on_load(): if window.calibre_entry_point == 'book list': + print('calibre loaded at:', Date().toString()) load_book_list() # We wait for all page elements to load, since this is a single page app