Book polishing: Add tool to losslessly compress images in the book in order to reduce its filesize, without affecting image quality

This commit is contained in:
Kovid Goyal 2015-11-27 16:02:56 +05:30
parent b47f7b8b45
commit a7489de7cb
5 changed files with 196 additions and 1 deletions

View File

@ -0,0 +1,87 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import (unicode_literals, division, absolute_import,
print_function)
import os
from functools import partial
from threading import Thread
from Queue import Queue, Empty
from calibre import detect_ncpus, human_readable
class Worker(Thread):
daemon = True
def __init__(self, name, queue, results, container, jpeg_quality):
Thread.__init__(self, name=name)
self.queue, self.results, self.container = queue, results, container
self.jpeg_quality = jpeg_quality
self.start()
def run(self):
while True:
try:
name = self.queue.get_nowait()
except Empty:
break
try:
self.compress(name)
except Exception:
import traceback
self.results[name] = (False, traceback.format_exc())
finally:
self.queue.task_done()
def compress(self, name):
from calibre.utils.img import optimize_png, optimize_jpeg, encode_jpeg
mt = self.container.mime_map[name]
if 'png' in mt:
func = optimize_png
elif self.jpeg_quality is None:
func = optimize_jpeg
else:
func = partial(encode_jpeg, quality=self.jpeg_quality)
path = self.container.get_file_path_for_processing(name)
before = os.path.getsize(path)
func(path)
after = os.path.getsize(path)
self.results[name] = (True, (before, after))
def compress_images(container, report=None, names=None, jpeg_quality=None):
mt_map = container.manifest_type_map
images = set()
for mt in 'png jpg jpeg'.split():
images |= set(mt_map.get('image/' + mt, ()))
if names is not None:
images &= set(names)
results = {}
queue = Queue()
for name in images:
queue.put(name)
[Worker('CompressImage%d' % i, queue, results, container, jpeg_quality) for i in xrange(min(detect_ncpus(), len(images)))]
queue.join()
before_total = after_total = 0
for name, (ok, res) in results.iteritems():
if ok:
before, after = res
if before != after:
before_total += before
after_total += after
if report:
report(_('{0} compressed from {1} to {2} bytes [{3:.1%}]').format(
name, human_readable(before), human_readable(after), after/before))
else:
report(_('Failed to process {0} with error:').format(name))
report(res)
if report:
if before_total > 0:
report('')
report(_('Total image filesize reduced from {0} to {1} [{2:.1%}]').format(
human_readable(before_total), human_readable(after_total), after_total/before_total))
else:
report(_('Images are already fully optimized'))
return before_total > 0, results

View File

@ -14,6 +14,7 @@ from functools import partial
from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.stats import StatsCollector
from calibre.ebooks.oeb.polish.subset import subset_all_fonts
from calibre.ebooks.oeb.polish.images import compress_images
from calibre.ebooks.oeb.polish.embed import embed_all_fonts
from calibre.ebooks.oeb.polish.cover import set_cover
from calibre.ebooks.oeb.polish.replace import smarten_punctuation
@ -31,6 +32,7 @@ ALL_OPTS = {
'remove_jacket':False,
'smarten_punctuation':False,
'remove_unused_css':False,
'compress_images': False,
}
CUSTOMIZATION = {
@ -103,6 +105,12 @@ created from production templates can have a large number of extra CSS rules
that dont match any actual content. These extra rules can slow down readers
that need to parse them all.</p>
'''),
'compress_images': _('''\
<p>Losslessly compress images in the book, to reduce the filesize, without
affecting image quality.</p>
'''),
}
def hfix(name, raw):
@ -203,6 +211,12 @@ def polish_one(ebook, opts, report, customization=None):
changed = True
report('')
if opts.compress_images:
rt(_('Losslessly compressing images'))
if compress_images(ebook, report)[0]:
changed = True
report('')
return changed
@ -265,6 +279,7 @@ def option_parser():
o('--remove-jacket', help=CLI_HELP['remove_jacket'])
o('--smarten-punctuation', '-p', help=CLI_HELP['smarten_punctuation'])
o('--remove-unused-css', '-u', help=CLI_HELP['remove_unused_css'])
o('--compress-images', '-i', help=CLI_HELP['compress_images'])
o('--verbose', help=_('Produce more verbose output, useful for debugging.'))

View File

@ -67,6 +67,7 @@ class Polish(QDialog): # {{{
'jacket':_('<h3>Book Jacket</h3>%s')%HELP['jacket'],
'remove_jacket':_('<h3>Remove Book Jacket</h3>%s')%HELP['remove_jacket'],
'remove_unused_css':_('<h3>Remove unused CSS rules</h3>%s')%HELP['remove_unused_css'],
'compress_images': _('<h3>Losslessly compress images</h3>%s') % HELP['compress_images'],
}
self.l = l = QGridLayout()
@ -85,6 +86,7 @@ class Polish(QDialog): # {{{
('jacket', _('Add/Replace metadata as a "book &jacket" page')),
('remove_jacket', _('&Remove a previously inserted book jacket')),
('remove_unused_css', _('Remove &unused CSS rules from the book')),
('compress_images', _('Losslessly compress images')),
])
prefs = gprefs.get('polishing_settings', {})
for name, text in self.all_actions.iteritems():

View File

@ -4,10 +4,23 @@
from __future__ import (unicode_literals, division, absolute_import,
print_function)
import os, subprocess, errno, shutil, tempfile
from threading import Thread
from PyQt5.Qt import QImage, QByteArray, QBuffer, Qt, QPainter
from calibre import fit_image
from calibre import fit_image, force_unicode
from calibre.constants import iswindows
from calibre.utils.filenames import atomic_rename
def get_exe_path(name):
from calibre.ebooks.pdf.pdftohtml import PDFTOHTML
base = os.path.dirname(PDFTOHTML)
if iswindows:
name += '-calibre.exe'
if not base:
return name
return os.path.join(base, name)
def image_from_data(data):
i = QImage()
@ -48,3 +61,80 @@ def scale_image(data, width=60, height=80, compression_quality=70, as_png=False,
if not img.save(buf, fmt, quality=compression_quality):
raise ValueError('Failed to export thumbnail image to: ' + fmt)
return img.width(), img.height(), ba.data()
def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
file_path = os.path.abspath(file_path)
cwd = os.path.dirname(file_path)
fd, outfile = tempfile.mkstemp(dir=cwd)
try:
if as_filter:
outf = os.fdopen(fd, 'wb')
else:
os.close(fd)
iname, oname = os.path.basename(file_path), os.path.basename(outfile)
def repl(q, r):
cmd[cmd.index(q)] = r
if not as_filter:
repl(True, iname), repl(False, oname)
if iswindows:
# subprocess in python 2 cannot handle unicode strings that are not
# encodeable in mbcs, so we fail here, where it is more explicit,
# instead.
cmd = [x.encode('mbcs') if isinstance(x, type('')) else x for x in cmd]
if isinstance(cwd, type('')):
cwd = cwd.encode('mbcs')
stdin = subprocess.PIPE if as_filter else None
stderr = subprocess.PIPE if as_filter else subprocess.STDOUT
p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin)
stderr = p.stderr if as_filter else p.stdout
if as_filter:
src = input_data or open(file_path, 'rb')
def copy(src, dest):
try:
shutil.copyfileobj(src, dest)
finally:
src.close(), dest.close()
inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin))
inw.daemon = True
inw.start()
outw = Thread('CopyOutput', target=copy, args=(p.stdout, outf))
outw.daemon = True
outw.start()
raw = force_unicode(stderr.read())
if p.wait() != 0:
return raw
else:
shutil.copystat(file_path, outfile)
atomic_rename(outfile, file_path)
finally:
try:
os.remove(outfile)
except EnvironmentError as err:
if err.errno != errno.ENOENT:
raise
def optimize_jpeg(file_path):
exe = get_exe_path('jpegtran')
cmd = [exe] + '-copy none -optimize -progressive -maxmemory 100M -outfile'.split() + [False, True]
return run_optimizer(file_path, cmd)
def optimize_png(file_path):
exe = get_exe_path('optipng')
cmd = [exe] + '-fix -clobber -strip all -o7 -out'.split() + [False, True]
return run_optimizer(file_path, cmd)
def encode_jpeg(file_path, quality=80):
from calibre.srv.utils import ReadOnlyFileBuffer
quality = max(0, min(100, int(quality)))
exe = get_exe_path('cjpeg')
cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [str(quality)]
img = QImage()
if not img.load(file_path):
raise ValueError('%s is not a valid image file' % file_path)
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
if not img.save(buf, 'PPM'):
raise ValueError('Failed to export image to PPM')
return run_optimizer(file_path, cmd, as_filter=True, input_data=ReadOnlyFileBuffer(ba.data))

View File

@ -44,6 +44,7 @@ def load_book_list():
def on_load():
if window.calibre_entry_point == 'book list':
print('calibre loaded at:', Date().toString())
load_book_list()
# We wait for all page elements to load, since this is a single page app