Book polishing: Add tool to losslessly compress images in the book in order to reduce its filesize, without affecting image quality

This commit is contained in:
Kovid Goyal 2015-11-27 16:02:56 +05:30
parent b47f7b8b45
commit a7489de7cb
5 changed files with 196 additions and 1 deletions

View File

@ -0,0 +1,87 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import (unicode_literals, division, absolute_import,
print_function)
import os
from functools import partial
from threading import Thread
from Queue import Queue, Empty
from calibre import detect_ncpus, human_readable
class Worker(Thread):
daemon = True
def __init__(self, name, queue, results, container, jpeg_quality):
Thread.__init__(self, name=name)
self.queue, self.results, self.container = queue, results, container
self.jpeg_quality = jpeg_quality
self.start()
def run(self):
while True:
try:
name = self.queue.get_nowait()
except Empty:
break
try:
self.compress(name)
except Exception:
import traceback
self.results[name] = (False, traceback.format_exc())
finally:
self.queue.task_done()
def compress(self, name):
from calibre.utils.img import optimize_png, optimize_jpeg, encode_jpeg
mt = self.container.mime_map[name]
if 'png' in mt:
func = optimize_png
elif self.jpeg_quality is None:
func = optimize_jpeg
else:
func = partial(encode_jpeg, quality=self.jpeg_quality)
path = self.container.get_file_path_for_processing(name)
before = os.path.getsize(path)
func(path)
after = os.path.getsize(path)
self.results[name] = (True, (before, after))
def compress_images(container, report=None, names=None, jpeg_quality=None):
mt_map = container.manifest_type_map
images = set()
for mt in 'png jpg jpeg'.split():
images |= set(mt_map.get('image/' + mt, ()))
if names is not None:
images &= set(names)
results = {}
queue = Queue()
for name in images:
queue.put(name)
[Worker('CompressImage%d' % i, queue, results, container, jpeg_quality) for i in xrange(min(detect_ncpus(), len(images)))]
queue.join()
before_total = after_total = 0
for name, (ok, res) in results.iteritems():
if ok:
before, after = res
if before != after:
before_total += before
after_total += after
if report:
report(_('{0} compressed from {1} to {2} bytes [{3:.1%}]').format(
name, human_readable(before), human_readable(after), after/before))
else:
report(_('Failed to process {0} with error:').format(name))
report(res)
if report:
if before_total > 0:
report('')
report(_('Total image filesize reduced from {0} to {1} [{2:.1%}]').format(
human_readable(before_total), human_readable(after_total), after_total/before_total))
else:
report(_('Images are already fully optimized'))
return before_total > 0, results

View File

@ -14,6 +14,7 @@ from functools import partial
from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.stats import StatsCollector from calibre.ebooks.oeb.polish.stats import StatsCollector
from calibre.ebooks.oeb.polish.subset import subset_all_fonts from calibre.ebooks.oeb.polish.subset import subset_all_fonts
from calibre.ebooks.oeb.polish.images import compress_images
from calibre.ebooks.oeb.polish.embed import embed_all_fonts from calibre.ebooks.oeb.polish.embed import embed_all_fonts
from calibre.ebooks.oeb.polish.cover import set_cover from calibre.ebooks.oeb.polish.cover import set_cover
from calibre.ebooks.oeb.polish.replace import smarten_punctuation from calibre.ebooks.oeb.polish.replace import smarten_punctuation
@ -31,6 +32,7 @@ ALL_OPTS = {
'remove_jacket':False, 'remove_jacket':False,
'smarten_punctuation':False, 'smarten_punctuation':False,
'remove_unused_css':False, 'remove_unused_css':False,
'compress_images': False,
} }
CUSTOMIZATION = { CUSTOMIZATION = {
@ -103,6 +105,12 @@ created from production templates can have a large number of extra CSS rules
that dont match any actual content. These extra rules can slow down readers that dont match any actual content. These extra rules can slow down readers
that need to parse them all.</p> that need to parse them all.</p>
'''), '''),
'compress_images': _('''\
<p>Losslessly compress images in the book, to reduce the filesize, without
affecting image quality.</p>
'''),
} }
def hfix(name, raw): def hfix(name, raw):
@ -203,6 +211,12 @@ def polish_one(ebook, opts, report, customization=None):
changed = True changed = True
report('') report('')
if opts.compress_images:
rt(_('Losslessly compressing images'))
if compress_images(ebook, report)[0]:
changed = True
report('')
return changed return changed
@ -265,6 +279,7 @@ def option_parser():
o('--remove-jacket', help=CLI_HELP['remove_jacket']) o('--remove-jacket', help=CLI_HELP['remove_jacket'])
o('--smarten-punctuation', '-p', help=CLI_HELP['smarten_punctuation']) o('--smarten-punctuation', '-p', help=CLI_HELP['smarten_punctuation'])
o('--remove-unused-css', '-u', help=CLI_HELP['remove_unused_css']) o('--remove-unused-css', '-u', help=CLI_HELP['remove_unused_css'])
o('--compress-images', '-i', help=CLI_HELP['compress_images'])
o('--verbose', help=_('Produce more verbose output, useful for debugging.')) o('--verbose', help=_('Produce more verbose output, useful for debugging.'))

View File

@ -67,6 +67,7 @@ class Polish(QDialog): # {{{
'jacket':_('<h3>Book Jacket</h3>%s')%HELP['jacket'], 'jacket':_('<h3>Book Jacket</h3>%s')%HELP['jacket'],
'remove_jacket':_('<h3>Remove Book Jacket</h3>%s')%HELP['remove_jacket'], 'remove_jacket':_('<h3>Remove Book Jacket</h3>%s')%HELP['remove_jacket'],
'remove_unused_css':_('<h3>Remove unused CSS rules</h3>%s')%HELP['remove_unused_css'], 'remove_unused_css':_('<h3>Remove unused CSS rules</h3>%s')%HELP['remove_unused_css'],
'compress_images': _('<h3>Losslessly compress images</h3>%s') % HELP['compress_images'],
} }
self.l = l = QGridLayout() self.l = l = QGridLayout()
@ -85,6 +86,7 @@ class Polish(QDialog): # {{{
('jacket', _('Add/Replace metadata as a "book &jacket" page')), ('jacket', _('Add/Replace metadata as a "book &jacket" page')),
('remove_jacket', _('&Remove a previously inserted book jacket')), ('remove_jacket', _('&Remove a previously inserted book jacket')),
('remove_unused_css', _('Remove &unused CSS rules from the book')), ('remove_unused_css', _('Remove &unused CSS rules from the book')),
('compress_images', _('Losslessly compress images')),
]) ])
prefs = gprefs.get('polishing_settings', {}) prefs = gprefs.get('polishing_settings', {})
for name, text in self.all_actions.iteritems(): for name, text in self.all_actions.iteritems():

View File

@ -4,10 +4,23 @@
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
import os, subprocess, errno, shutil, tempfile
from threading import Thread
from PyQt5.Qt import QImage, QByteArray, QBuffer, Qt, QPainter from PyQt5.Qt import QImage, QByteArray, QBuffer, Qt, QPainter
from calibre import fit_image from calibre import fit_image, force_unicode
from calibre.constants import iswindows
from calibre.utils.filenames import atomic_rename
def get_exe_path(name):
from calibre.ebooks.pdf.pdftohtml import PDFTOHTML
base = os.path.dirname(PDFTOHTML)
if iswindows:
name += '-calibre.exe'
if not base:
return name
return os.path.join(base, name)
def image_from_data(data): def image_from_data(data):
i = QImage() i = QImage()
@ -48,3 +61,80 @@ def scale_image(data, width=60, height=80, compression_quality=70, as_png=False,
if not img.save(buf, fmt, quality=compression_quality): if not img.save(buf, fmt, quality=compression_quality):
raise ValueError('Failed to export thumbnail image to: ' + fmt) raise ValueError('Failed to export thumbnail image to: ' + fmt)
return img.width(), img.height(), ba.data() return img.width(), img.height(), ba.data()
def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
file_path = os.path.abspath(file_path)
cwd = os.path.dirname(file_path)
fd, outfile = tempfile.mkstemp(dir=cwd)
try:
if as_filter:
outf = os.fdopen(fd, 'wb')
else:
os.close(fd)
iname, oname = os.path.basename(file_path), os.path.basename(outfile)
def repl(q, r):
cmd[cmd.index(q)] = r
if not as_filter:
repl(True, iname), repl(False, oname)
if iswindows:
# subprocess in python 2 cannot handle unicode strings that are not
# encodeable in mbcs, so we fail here, where it is more explicit,
# instead.
cmd = [x.encode('mbcs') if isinstance(x, type('')) else x for x in cmd]
if isinstance(cwd, type('')):
cwd = cwd.encode('mbcs')
stdin = subprocess.PIPE if as_filter else None
stderr = subprocess.PIPE if as_filter else subprocess.STDOUT
p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin)
stderr = p.stderr if as_filter else p.stdout
if as_filter:
src = input_data or open(file_path, 'rb')
def copy(src, dest):
try:
shutil.copyfileobj(src, dest)
finally:
src.close(), dest.close()
inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin))
inw.daemon = True
inw.start()
outw = Thread('CopyOutput', target=copy, args=(p.stdout, outf))
outw.daemon = True
outw.start()
raw = force_unicode(stderr.read())
if p.wait() != 0:
return raw
else:
shutil.copystat(file_path, outfile)
atomic_rename(outfile, file_path)
finally:
try:
os.remove(outfile)
except EnvironmentError as err:
if err.errno != errno.ENOENT:
raise
def optimize_jpeg(file_path):
exe = get_exe_path('jpegtran')
cmd = [exe] + '-copy none -optimize -progressive -maxmemory 100M -outfile'.split() + [False, True]
return run_optimizer(file_path, cmd)
def optimize_png(file_path):
exe = get_exe_path('optipng')
cmd = [exe] + '-fix -clobber -strip all -o7 -out'.split() + [False, True]
return run_optimizer(file_path, cmd)
def encode_jpeg(file_path, quality=80):
from calibre.srv.utils import ReadOnlyFileBuffer
quality = max(0, min(100, int(quality)))
exe = get_exe_path('cjpeg')
cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [str(quality)]
img = QImage()
if not img.load(file_path):
raise ValueError('%s is not a valid image file' % file_path)
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
if not img.save(buf, 'PPM'):
raise ValueError('Failed to export image to PPM')
return run_optimizer(file_path, cmd, as_filter=True, input_data=ReadOnlyFileBuffer(ba.data))

View File

@ -44,6 +44,7 @@ def load_book_list():
def on_load(): def on_load():
if window.calibre_entry_point == 'book list': if window.calibre_entry_point == 'book list':
print('calibre loaded at:', Date().toString())
load_book_list() load_book_list()
# We wait for all page elements to load, since this is a single page app # We wait for all page elements to load, since this is a single page app