diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py new file mode 100644 index 0000000000..cba24c2a07 --- /dev/null +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import re +from collections import namedtuple +from functools import partial + +from calibre.ebooks.oeb.polish.container import get_container +from calibre.ebooks.oeb.polish.stats import StatsCollector +from calibre.ebooks.oeb.polish.subset import subset_all_fonts +from calibre.utils.logging import Log + +ALL_OPTS = { + 'subset': False, + 'opf': None, + 'cover': None, +} + +SUPPORTED = {'EPUB', 'AZW3'} + +# Help {{{ +HELP = {'about': _( +'''\ +

Polishing books is all about putting the shine of perfection onto +your carefully crafted ebooks.

+ +

Polishing tries to minimize the changes to the internal code of your ebook. +Unlike conversion, it does not flatten CSS, rename files, change font +sizes, adjust margins, etc. Every action performs only the minimum set of +changes needed for the desired effect.

+ +

You should use this tool as the last step in your ebook creation process.

+ +

Note that polishing only works on files in the %s formats.

+''')%_(' or ').join(SUPPORTED), + +'subset': _('''\ +

Subsetting fonts means reducing an embedded font to contain +only the characters used from that font in the book. This +greatly reduces the size of the font files (halving the font +file sizes is common).

+ +

For example, if the book uses a specific font for headers, +then subsetting will reduce that font to contain only the +characters present in the actual headers in the book. Or if the +book embeds the bold and italic versions of a font, but bold +and italic text is relatively rare, or absent altogether, then +the bold and italic fonts can either be reduced to only a few +characters or completely removed.

+ +

The only downside to subsetting fonts is that if, at a later +date you decide to add more text to your books, the newly added +text might not be covered by the subset font.

+'''), +} + +def hfix(name, raw): + if name == 'about': + return raw + raw = raw.replace('\n\n', '__XX__') + raw = raw.replace('\n', ' ') + raw = raw.replace('__XX__', '\n') + return raw + +CLI_HELP = {x:hfix(x, re.sub('<.*?>', '', y)) for x, y in HELP.iteritems()} +# }}} + +def polish(file_map, opts, log, report): + for inbook, outbook in file_map.iteritems(): + report('Polishing: %s'%(inbook.rpartition('.')[-1].upper())) + ebook = get_container(inbook, log) + + if opts.subset: + stats = StatsCollector(ebook) + + if opts.subset: + report('\n### Subsetting embedded fonts') + subset_all_fonts(ebook, stats.font_stats, report) + report('') + + ebook.commit(outbook) + +def gui_polish(data): + files = data.pop('files') + file_map = {x:x for x in files} + opts = ALL_OPTS.copy() + opts.update(data) + O = namedtuple('Options', ' '.join(data.iterkeys())) + opts = O(**opts) + log = Log(level=Log.DEBUG) + report = [] + polish(file_map, opts, log, report.append) + log('\n', '-'*30, ' REPORT ', '-'*30) + for msg in report: + log(msg) + +def option_parser(): + from calibre.utils.config import OptionParser + USAGE = '%prog [options] input_file [output_file]\n\n' + re.sub( + r'<.*?>', '', CLI_HELP['about']) + parser = OptionParser(usage=USAGE) + o = partial(parser.add_option, default=False, action='store_true') + o('--subset-fonts', '-f', dest='subset', help=CLI_HELP['subset']) + o('--verbose', help=_('Produce more verbose output, useful for debugging.')) + + return parser + +def cli_polish(): + parser = option_parser() + opts, args = parser.parse_args() + log = Log(level=Log.DEBUG if opts.verbose else Log.INFO) + if not args: + parser.print_help() + log.error(_('You must provide the input file to polish')) + raise SystemExit(1) + if len(args) > 2: + parser.print_help() + log.error(_('Unknown extra arguments')) + raise SystemExit(1) + if len(args) == 1: + inbook = args[0] + base, ext = inbook.rpartition('.')[0::2] + outbook = base + '_polished.' + ext + else: + inbook, outbook = args + + popts = ALL_OPTS.copy() + for k, v in popts.iteritems(): + popts[k] = getattr(opts, k, None) + + O = namedtuple('Options', ' '.join(popts.iterkeys())) + popts = O(**popts) + report = [] + something = False + for name in ALL_OPTS: + if name not in {'opf', 'cover'}: + if getattr(popts, name): + something = True + + if not something: + parser.print_help() + log.error(_('You must specify at least one action to perform')) + raise SystemExit(1) + + polish({inbook:outbook}, popts, log, report.append) + log('\n', '-'*30, ' REPORT ', '-'*30) + for msg in report: + log(msg) + + log('Output written to:', outbook) + +if __name__ == '__main__': + cli_polish() + diff --git a/src/calibre/ebooks/oeb/polish/subset.py b/src/calibre/ebooks/oeb/polish/subset.py index 2799fc04f6..eb4f4c4b1f 100644 --- a/src/calibre/ebooks/oeb/polish/subset.py +++ b/src/calibre/ebooks/oeb/polish/subset.py @@ -74,8 +74,11 @@ def subset_all_fonts(container, font_stats, report): if remove_font_face_rules(container, sheet, remove): style.text = sheet.cssText container.dirty(name) - report('Reduced total font size to %.1f%% of original'%( - total_new/total_old*100)) + if total_old > 0: + report('Reduced total font size to %.1f%% of original'%( + total_new/total_old*100)) + else: + report('No embedded fonts found') if __name__ == '__main__': from calibre.ebooks.oeb.polish.container import get_container diff --git a/src/calibre/gui2/actions/polish.py b/src/calibre/gui2/actions/polish.py index 6e488371fe..90656a746d 100644 --- a/src/calibre/gui2/actions/polish.py +++ b/src/calibre/gui2/actions/polish.py @@ -7,67 +7,36 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import os, weakref, shutil +from collections import OrderedDict + from PyQt4.Qt import (QDialog, QGridLayout, QIcon, QCheckBox, QLabel, QFrame, QApplication, QDialogButtonBox, Qt, QSize, QSpacerItem, - QSizePolicy) + QSizePolicy, QTimer) -from calibre.gui2 import error_dialog +from calibre.gui2 import error_dialog, Dispatcher from calibre.gui2.actions import InterfaceAction +from calibre.gui2.convert.metadata import create_opf_file +from calibre.gui2.dialogs.progress import ProgressDialog +from calibre.ptempfile import PersistentTemporaryDirectory +from calibre.utils.config_base import tweaks -SUPPORTED = {'EPUB', 'AZW3'} class Polish(QDialog): def __init__(self, db, book_id_map, parent=None): + from calibre.ebooks.oeb.polish.main import HELP QDialog.__init__(self, parent) + self.db, self.book_id_map = weakref.ref(db), book_id_map self.setWindowIcon(QIcon(I('polish.png'))) self.setWindowTitle(ngettext( 'Polish book', _('Polish %d books')%len(book_id_map), len(book_id_map))) - # Help {{{ self.help_text = { - 'polish':_( - ''' -

About Polishing books

+ 'polish': _('

About Polishing books

%s')%HELP['about'], -

Polishing books is all about putting the shine of - perfection onto your carefully crafted ebooks.

- -

Polishing tries to minimize the changes to the internal code - of your ebook. Unlike conversion, it does not flatten CSS, - rename files, change font sizes, adjust margins, etc. Every - action to the left performs only the minimum set of changes - needed for the desired effect.

- -

You should use this tool as the last step in your ebook - creation process.

- -

Note that polishing only works on files in the - %s formats.

- ''')%_(' or ').join(SUPPORTED), - - 'subset':_( - ''' -

Subsetting fonts

- -

Subsetting fonts means reducing an embedded font to contain - only the characters used from that font in the book. This - greatly reduces the size of the font files (halving the font - file sizes is common).

- -

For example, if the book uses a specific font for headers, - then subsetting will reduce that font to contain only the - characters present in the actual headers in the book. Or if the - book embeds the bold and italic versions of a font, but bold - and italic text is relatively rare, or absent altogether, then - the bold and italic fonts can either be reduced to only a few - characters or completely removed.

- -

The only downside to subsetting fonts is that if, at a later - date you decide to add more text to your books, the newly added - text might not be covered by the subset font.

- '''), - } # }}} + 'subset':_('

Subsetting fonts

%s')%HELP['subset'], + } self.l = l = QGridLayout() self.setLayout(l) @@ -76,10 +45,10 @@ class Polish(QDialog): l.addWidget(la, 0, 0, 1, 2) count = 0 - - for name, text in ( + self.actions = OrderedDict([ ('subset', _('Subset all embedded fonts')), - ): + ]) + for name, text in self.actions.iteritems(): count += 1 x = QCheckBox(text, self) l.addWidget(x, count, 0, 1, 1) @@ -117,7 +86,7 @@ class Polish(QDialog): def accept(self): self.actions = ac = {} something = False - for action in ('subset',): + for action in self.actions: ac[action] = bool(getattr(self, 'opt_'+action).isChecked()) if ac[action]: something = True @@ -125,8 +94,68 @@ class Polish(QDialog): return error_dialog(self, _('No actions selected'), _('You must select at least one action, or click Cancel.'), show=True) + self.queue_files() return super(Polish, self).accept() + def queue_files(self): + self.tdir = PersistentTemporaryDirectory('_queue_polish') + self.jobs = [] + if len(self.book_id_map) <= 5: + for i, (book_id, formats) in enumerate(self.book_id_map.iteritems()): + self.do_book(i+1, book_id, formats) + else: + self.queue = [(i+1, id_) for i, id_ in enumerate(self.book_id_map)] + self.pd = ProgressDialog(_('Queueing books for polishing'), + max=len(self.queue), parent=self) + QTimer.singleShot(0, self.do_one) + self.pd.exec_() + + def do_one(self): + if not self.queue: + self.pd.accept() + return + if self.pd.canceled: + self.jobs = [] + self.pd.reject() + return + num, book_id = self.queue.pop() + try: + self.do_book(num, book_id, self.book_id_map[book_id]) + except: + self.pd.reject() + else: + self.pd.set_value(num) + QTimer.singleShot(0, self.do_one) + + def do_book(self, num, book_id, formats): + base = os.path.join(self.tdir, unicode(book_id)) + os.mkdir(base) + db = self.db() + opf = os.path.join(base, 'metadata.opf') + with open(opf, 'wb') as opf_file: + mi = create_opf_file(db, book_id, opf_file=opf_file)[0] + data = {'opf':opf, 'files':[]} + for action in self.actions: + data[action] = bool(getattr(self, 'opt_'+action).isChecked()) + cover = os.path.join(base, 'cover.jpg') + if db.copy_cover_to(book_id, cover, index_is_id=True): + data['cover'] = cover + for fmt in formats: + ext = fmt.replace('ORIGINAL_', '').lower() + with open(os.path.join(base, '%s.%s'%(book_id, ext)), 'wb') as f: + db.copy_format_to(book_id, fmt, f, index_is_id=True) + data['files'].append(f.name) + + desc = ngettext(_('Polish %s')%mi.title, + _('Polish book %(nums)s of %(tot)s (%(title)s)')%dict( + num=num, tot=len(self.book_id_map), + title=mi.title), len(self.book_id_map)) + if hasattr(self, 'pd'): + self.pd.set_msg(_('Queueing book %(nums)s of %(tot)s (%(title)s)')%dict( + num=num, tot=len(self.book_id_map), title=mi.title)) + + self.jobs.append((desc, data, book_id, base)) + class PolishAction(InterfaceAction): name = 'Polish Books' @@ -142,6 +171,7 @@ class PolishAction(InterfaceAction): self.qaction.setEnabled(enabled) def get_books_for_polishing(self): + from calibre.ebooks.oeb.polish.main import SUPPORTED rows = [r.row() for r in self.gui.library_view.selectionModel().selectedRows()] if not rows or len(rows) == 0: @@ -154,14 +184,15 @@ class PolishAction(InterfaceAction): supported = set(SUPPORTED) for x in SUPPORTED: supported.add('ORIGINAL_'+x) - ans = {x:set( (db.formats(x, index_is_id=True) or '').split(',') ) - .intersection(supported) for x in ans} - ans = {x:fmts for x, fmts in ans.iteritems() if fmts} + ans = [(x, set( (db.formats(x, index_is_id=True) or '').split(',') ) + .intersection(supported)) for x in ans] + ans = [x for x in ans if x[1]] if not ans: error_dialog(self.gui, _('Cannot polish'), _('Polishing is only supported for books in the %s' ' formats. Convert to one of those formats before polishing.') %_(' or ').join(sorted(SUPPORTED)), show=True) + ans = OrderedDict(ans) for fmts in ans.itervalues(): for x in SUPPORTED: if ('ORIGINAL_'+x) in fmts: @@ -173,7 +204,32 @@ class PolishAction(InterfaceAction): if not book_id_map: return d = Polish(self.gui.library_view.model().db, book_id_map, parent=self.gui) - if d.exec_() == d.Accepted: + if d.exec_() == d.Accepted and d.jobs: + for desc, data, book_id, base, files in reversed(d.jobs): + job = self.gui.job_manager.run_job( + Dispatcher(self.book_polished), 'gui_polish', args=(data,), + description=desc) + job.polish_args = (book_id, base, data['files']) + + def book_polished(self, job): + if job.failed: + self.gui.job_exception(job) + return + db = self.gui.current_db + book_id, base, files = job.polish_args + for path in files: + fmt = path.rpartition('.')[-1].upper() + if tweaks['save_original_format']: + db.save_original_format(book_id, fmt, notify=False) + with open(path, 'rb') as f: + db.add_format(book_id, fmt, f, index_is_id=True) + self.gui.status_bar.show_message(job.description + \ + (' completed'), 2000) + try: + shutil.rmtree(base) + parent = os.path.dirname(base) + os.rmdir(parent) + except: pass if __name__ == '__main__': diff --git a/src/calibre/gui2/convert/metadata.py b/src/calibre/gui2/convert/metadata.py index 9af287f641..1d354a7881 100644 --- a/src/calibre/gui2/convert/metadata.py +++ b/src/calibre/gui2/convert/metadata.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, uuid, re +import os, re from PyQt4.Qt import QPixmap, SIGNAL @@ -21,15 +21,15 @@ from calibre.utils.icu import sort_key from calibre.library.comments import comments_to_html from calibre.utils.config import tweaks -def create_opf_file(db, book_id): +def create_opf_file(db, book_id, opf_file=None): mi = db.get_metadata(book_id, index_is_id=True) - mi.application_id = uuid.uuid4() old_cover = mi.cover mi.cover = None mi.application_id = mi.uuid raw = metadata_to_opf(mi) mi.cover = old_cover - opf_file = PersistentTemporaryFile('.opf') + if opf_file is None: + opf_file = PersistentTemporaryFile('.opf') opf_file.write(raw) opf_file.close() return mi, opf_file diff --git a/src/calibre/utils/ipc/worker.py b/src/calibre/utils/ipc/worker.py index 7d13a76cf0..0cebdfee07 100644 --- a/src/calibre/utils/ipc/worker.py +++ b/src/calibre/utils/ipc/worker.py @@ -31,6 +31,9 @@ PARALLEL_FUNCS = { 'gui_convert' : ('calibre.gui2.convert.gui_conversion', 'gui_convert', 'notification'), + 'gui_polish' : + ('calibre.ebooks.oeb.polish.main', 'gui_polish', None), + 'gui_convert_override' : ('calibre.gui2.convert.gui_conversion', 'gui_convert_override', 'notification'),