Book polishing: Add tool to losslessly compress images in the book in order to reduce its filesize, without affecting image quality

2025-07-09 03:04:10 -04:00 · 2015-11-27 16:02:56 +05:30 · 2015-11-27 16:02:56 +05:30 · a7489de7cb
commit a7489de7cb
parent b47f7b8b45
5 changed files with 196 additions and 1 deletions
--- a/src/calibre/ebooks/oeb/polish/images.py
+++ b/src/calibre/ebooks/oeb/polish/images.py
@ -0,0 +1,87 @@
 #!/usr/bin/env python2
 # vim:fileencoding=utf-8
 # License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 import os
 from functools import partial
 from threading import Thread
 from Queue import Queue, Empty
 from calibre import detect_ncpus, human_readable
 class Worker(Thread):
    daemon = True
    def __init__(self, name, queue, results, container, jpeg_quality):
        Thread.__init__(self, name=name)
        self.queue, self.results, self.container = queue, results, container
        self.jpeg_quality = jpeg_quality
        self.start()
    def run(self):
        while True:
            try:
                name = self.queue.get_nowait()
            except Empty:
                break
            try:
                self.compress(name)
            except Exception:
                import traceback
                self.results[name] = (False, traceback.format_exc())
            finally:
                self.queue.task_done()
    def compress(self, name):
        from calibre.utils.img import optimize_png, optimize_jpeg, encode_jpeg
        mt = self.container.mime_map[name]
        if 'png' in mt:
            func = optimize_png
        elif self.jpeg_quality is None:
            func = optimize_jpeg
        else:
            func = partial(encode_jpeg, quality=self.jpeg_quality)
        path = self.container.get_file_path_for_processing(name)
        before = os.path.getsize(path)
        func(path)
        after = os.path.getsize(path)
        self.results[name] = (True, (before, after))
 def compress_images(container, report=None, names=None, jpeg_quality=None):
    mt_map = container.manifest_type_map
    images = set()
    for mt in 'png jpg jpeg'.split():
        images |= set(mt_map.get('image/' + mt, ()))
    if names is not None:
        images &= set(names)
    results = {}
    queue = Queue()
    for name in images:
        queue.put(name)
    [Worker('CompressImage%d' % i, queue, results, container, jpeg_quality) for i in xrange(min(detect_ncpus(), len(images)))]
    queue.join()
    before_total = after_total = 0
    for name, (ok, res) in results.iteritems():
        if ok:
            before, after = res
            if before != after:
                before_total += before
                after_total += after
                if report:
                    report(_('{0} compressed from {1} to {2} bytes [{3:.1%}]').format(
                        name, human_readable(before), human_readable(after), after/before))
        else:
            report(_('Failed to process {0} with error:').format(name))
            report(res)
    if report:
        if before_total > 0:
            report('')
            report(_('Total image filesize reduced from {0} to {1} [{2:.1%}]').format(
                human_readable(before_total), human_readable(after_total), after_total/before_total))
        else:
            report(_('Images are already fully optimized'))
    return before_total > 0, results
--- a/src/calibre/ebooks/oeb/polish/main.py
+++ b/src/calibre/ebooks/oeb/polish/main.py
@ -14,6 +14,7 @@ from functools import partial
 from calibre.ebooks.oeb.polish.container import get_container
 from calibre.ebooks.oeb.polish.stats import StatsCollector
 from calibre.ebooks.oeb.polish.subset import subset_all_fonts
 from calibre.ebooks.oeb.polish.images import compress_images
 from calibre.ebooks.oeb.polish.embed import embed_all_fonts
 from calibre.ebooks.oeb.polish.cover import set_cover
 from calibre.ebooks.oeb.polish.replace import smarten_punctuation
@ -31,6 +32,7 @@ ALL_OPTS = {
    'remove_jacket':False,
    'smarten_punctuation':False,
    'remove_unused_css':False,
    'compress_images': False,
 }
 CUSTOMIZATION = {
@ -103,6 +105,12 @@ created from production templates can have a large number of extra CSS rules
 that dont match any actual content. These extra rules can slow down readers
 that need to parse them all.</p>
 '''),
 'compress_images': _('''\
 <p>Losslessly compress images in the book, to reduce the filesize, without
 affecting image quality.</p>
 '''),
 }
 def hfix(name, raw):
@ -203,6 +211,12 @@ def polish_one(ebook, opts, report, customization=None):
            changed = True
        report('')
    if opts.compress_images:
        rt(_('Losslessly compressing images'))
        if compress_images(ebook, report)[0]:
            changed = True
        report('')
    return changed
@ -265,6 +279,7 @@ def option_parser():
    o('--remove-jacket', help=CLI_HELP['remove_jacket'])
    o('--smarten-punctuation', '-p', help=CLI_HELP['smarten_punctuation'])
    o('--remove-unused-css', '-u', help=CLI_HELP['remove_unused_css'])
    o('--compress-images', '-i', help=CLI_HELP['compress_images'])
    o('--verbose', help=_('Produce more verbose output, useful for debugging.'))
--- a/src/calibre/gui2/actions/polish.py
+++ b/src/calibre/gui2/actions/polish.py
@ -67,6 +67,7 @@ class Polish(QDialog):  # {{{
            'jacket':_('<h3>Book Jacket</h3>%s')%HELP['jacket'],
            'remove_jacket':_('<h3>Remove Book Jacket</h3>%s')%HELP['remove_jacket'],
            'remove_unused_css':_('<h3>Remove unused CSS rules</h3>%s')%HELP['remove_unused_css'],
            'compress_images': _('<h3>Losslessly compress images</h3>%s') % HELP['compress_images'],
        }
        self.l = l = QGridLayout()
@ -85,6 +86,7 @@ class Polish(QDialog):  # {{{
            ('jacket', _('Add/Replace metadata as a "book &jacket" page')),
            ('remove_jacket', _('&Remove a previously inserted book jacket')),
            ('remove_unused_css', _('Remove &unused CSS rules from the book')),
            ('compress_images', _('Losslessly compress images')),
        ])
        prefs = gprefs.get('polishing_settings', {})
        for name, text in self.all_actions.iteritems():
--- a/src/calibre/utils/img.py
+++ b/src/calibre/utils/img.py
@ -4,10 +4,23 @@
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 import os, subprocess, errno, shutil, tempfile
 from threading import Thread
 from PyQt5.Qt import QImage, QByteArray, QBuffer, Qt, QPainter
-from calibre import fit_image
+from calibre import fit_image, force_unicode
 from calibre.constants import iswindows
 from calibre.utils.filenames import atomic_rename
 def get_exe_path(name):
    from calibre.ebooks.pdf.pdftohtml import PDFTOHTML
    base = os.path.dirname(PDFTOHTML)
    if iswindows:
        name += '-calibre.exe'
    if not base:
        return name
    return os.path.join(base, name)
 def image_from_data(data):
    i = QImage()
@ -48,3 +61,80 @@ def scale_image(data, width=60, height=80, compression_quality=70, as_png=False,
    if not img.save(buf, fmt, quality=compression_quality):
        raise ValueError('Failed to export thumbnail image to: ' + fmt)
    return img.width(), img.height(), ba.data()
 def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
    file_path = os.path.abspath(file_path)
    cwd = os.path.dirname(file_path)
    fd, outfile = tempfile.mkstemp(dir=cwd)
    try:
        if as_filter:
            outf = os.fdopen(fd, 'wb')
        else:
            os.close(fd)
        iname, oname = os.path.basename(file_path), os.path.basename(outfile)
        def repl(q, r):
            cmd[cmd.index(q)] = r
        if not as_filter:
            repl(True, iname), repl(False, oname)
        if iswindows:
            # subprocess in python 2 cannot handle unicode strings that are not
            # encodeable in mbcs, so we fail here, where it is more explicit,
            # instead.
            cmd = [x.encode('mbcs') if isinstance(x, type('')) else x for x in cmd]
            if isinstance(cwd, type('')):
                cwd = cwd.encode('mbcs')
        stdin = subprocess.PIPE if as_filter else None
        stderr = subprocess.PIPE if as_filter else subprocess.STDOUT
        p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin)
        stderr = p.stderr if as_filter else p.stdout
        if as_filter:
            src = input_data or open(file_path, 'rb')
            def copy(src, dest):
                try:
                    shutil.copyfileobj(src, dest)
                finally:
                    src.close(), dest.close()
            inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin))
            inw.daemon = True
            inw.start()
            outw = Thread('CopyOutput', target=copy, args=(p.stdout, outf))
            outw.daemon = True
            outw.start()
        raw = force_unicode(stderr.read())
        if p.wait() != 0:
            return raw
        else:
            shutil.copystat(file_path, outfile)
            atomic_rename(outfile, file_path)
    finally:
        try:
            os.remove(outfile)
        except EnvironmentError as err:
            if err.errno != errno.ENOENT:
                raise
 def optimize_jpeg(file_path):
    exe = get_exe_path('jpegtran')
    cmd = [exe] + '-copy none -optimize -progressive -maxmemory 100M -outfile'.split() + [False, True]
    return run_optimizer(file_path, cmd)
 def optimize_png(file_path):
    exe = get_exe_path('optipng')
    cmd = [exe] + '-fix -clobber -strip all -o7 -out'.split() + [False, True]
    return run_optimizer(file_path, cmd)
 def encode_jpeg(file_path, quality=80):
    from calibre.srv.utils import ReadOnlyFileBuffer
    quality = max(0, min(100, int(quality)))
    exe = get_exe_path('cjpeg')
    cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [str(quality)]
    img = QImage()
    if not img.load(file_path):
        raise ValueError('%s is not a valid image file' % file_path)
    ba = QByteArray()
    buf = QBuffer(ba)
    buf.open(QBuffer.WriteOnly)
    if not img.save(buf, 'PPM'):
        raise ValueError('Failed to export image to PPM')
    return run_optimizer(file_path, cmd, as_filter=True, input_data=ReadOnlyFileBuffer(ba.data))
--- a/src/pyj/srv.pyj
+++ b/src/pyj/srv.pyj
@ -44,6 +44,7 @@ def load_book_list():
 def on_load():
    if window.calibre_entry_point == 'book list':
        print('calibre loaded at:', Date().toString())
        load_book_list()
 # We wait for all page elements to load, since this is a single page app