diff --git a/src/calibre/ebooks/oeb/polish/css.py b/src/calibre/ebooks/oeb/polish/css.py index 2ad7154d80..bb2599b0bf 100644 --- a/src/calibre/ebooks/oeb/polish/css.py +++ b/src/calibre/ebooks/oeb/polish/css.py @@ -9,8 +9,10 @@ __copyright__ = '2014, Kovid Goyal ' import re from lxml import etree -from cssselect import HTMLTranslator +from cssutils.css import CSSRule +from cssselect import HTMLTranslator, parse from cssselect.xpath import XPathExpr, is_safe_name +from cssselect.parser import SelectorSyntaxError from calibre import force_unicode from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPNSMAP, XHTML_NS @@ -118,8 +120,21 @@ def preserve_htmlns_prefix(sheet, prefix): else: sheet.namespaces[prefix] = XHTML_NS -def remove_unused_css(container, report): - from cssutils.css import CSSRule +def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None): + ans = set() + sheet = sheet or sheets[name] + for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE): + if rule.href: + iname = container.href_to_name(rule.href, name) + if iname in sheets: + ans.add(iname) + if recursion_level > 0: + for imported_sheet in tuple(ans): + ans |= get_imported_sheets(imported_sheet, container, sheets, recursion_level=recursion_level-1) + ans.discard(name) + return ans + +def remove_unused_css(container, report, remove_unused_classes=False): def safe_parse(name): try: return container.parsed(name) @@ -127,13 +142,16 @@ def remove_unused_css(container, report): pass sheets = {name:safe_parse(name) for name, mt in container.mime_map.iteritems() if mt in OEB_STYLES} sheets = {k:v for k, v in sheets.iteritems() if v is not None} + import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets} + if remove_unused_classes: + class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.iteritems()} sheet_namespace = {} for sheet in sheets.itervalues(): sheet_namespace[sheet] = process_namespaces(sheet) sheet.namespaces['h'] = XHTML_NS style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()} - num_of_removed_rules = 0 + num_of_removed_rules = num_of_removed_classes = 0 pseudo_pat = re.compile(r':(first-letter|first-line|link|hover|visited|active|focus|before|after)', re.I) cache = {} @@ -141,9 +159,17 @@ def remove_unused_css(container, report): if mt not in OEB_DOCS: continue root = container.parsed(name) + used_classes = set() for style in root.xpath('//*[local-name()="style"]'): if style.get('type', 'text/css') == 'text/css' and style.text: sheet = container.parse_css(style.text) + if remove_unused_classes: + used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} + imports = get_imported_sheets(name, container, sheets, sheet=sheet) + for imported_sheet in imports: + style_rules[imported_sheet] = tuple(filter_used_rules(root, style_rules[imported_sheet], container.log, pseudo_pat, cache)) + if remove_unused_classes: + used_classes |= class_map[imported_sheet] ns = process_namespaces(sheet) sheet.namespaces['h'] = XHTML_NS rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) @@ -160,6 +186,27 @@ def remove_unused_css(container, report): sname = container.href_to_name(link.get('href'), name) if sname in sheets: style_rules[sname] = tuple(filter_used_rules(root, style_rules[sname], container.log, pseudo_pat, cache)) + if remove_unused_classes: + used_classes |= class_map[sname] + + for iname in import_map[sname]: + style_rules[iname] = tuple(filter_used_rules(root, style_rules[iname], container.log, pseudo_pat, cache)) + if remove_unused_classes: + used_classes |= class_map[iname] + + if remove_unused_classes: + for elem in root.xpath('//*[@class]'): + original_classes, classes = elem.get('class', '').split(), [] + for x in original_classes: + if icu_lower(x) in used_classes: + classes.append(x) + if len(classes) != len(original_classes): + if classes: + elem.set('class', ' '.join(classes)) + else: + del elem.attrib['class'] + num_of_removed_classes += len(original_classes) - len(classes) + container.dirty(name) for name, sheet in sheets.iteritems(): preserve_htmlns_prefix(sheet, sheet_namespace[sheet]) @@ -170,10 +217,17 @@ def remove_unused_css(container, report): container.dirty(name) if num_of_removed_rules > 0: - report(_('Removed %d unused CSS style rules') % num_of_removed_rules) + report(ngettext('Removed %d unused CSS style rule', 'Removed %d unused CSS style rules', + num_of_removed_rules) % num_of_removed_rules) else: report(_('No unused CSS style rules found')) - return num_of_removed_rules > 0 + if remove_unused_classes: + if num_of_removed_classes > 0: + report(ngettext('Removed %d unused class from the HTML', 'Removed %d unused classes from the HTML', + num_of_removed_classes) % num_of_removed_classes) + else: + report(_('No unused class attributes found')) + return num_of_removed_rules + num_of_removed_classes > 0 def filter_declaration(style, properties): changed = False @@ -251,3 +305,30 @@ def filter_css(container, properties, names=()): return doc_changed +def _classes_in_selector(selector, classes): + for attr in ('selector', 'subselector', 'parsed_tree'): + s = getattr(selector, attr, None) + if s is not None: + _classes_in_selector(s, classes) + cn = getattr(selector, 'class_name', None) + if cn is not None: + classes.add(cn) + +def classes_in_selector(text): + classes = set() + try: + for selector in parse(text): + _classes_in_selector(selector, classes) + except SelectorSyntaxError: + pass + return classes + +def classes_in_rule_list(css_rules): + classes = set() + for rule in css_rules: + if rule.type == rule.STYLE_RULE: + classes |= classes_in_selector(rule.selectorText) + elif hasattr(rule, 'cssRules'): + classes |= classes_in_rule_list(rule.cssRules) + return classes + diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index 2934f3cc9d..f359c2afa8 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -33,6 +33,10 @@ ALL_OPTS = { 'remove_unused_css':False, } +CUSTOMIZATION = { + 'remove_unused_classes': False, +} + SUPPORTED = {'EPUB', 'AZW3'} # Help {{{ @@ -129,10 +133,11 @@ def update_metadata(ebook, new_opf): stream.truncate() stream.write(opf.render()) -def polish_one(ebook, opts, report): +def polish_one(ebook, opts, report, customization=None): rt = lambda x: report('\n### ' + x) jacket = None changed = False + customization = customization or CUSTOMIZATION.copy() if opts.subset or opts.embed: stats = StatsCollector(ebook, do_embed=opts.embed) @@ -194,7 +199,7 @@ def polish_one(ebook, opts, report): if opts.remove_unused_css: rt(_('Removing unused CSS rules')) - if remove_unused_css(ebook, report): + if remove_unused_css(ebook, report, remove_unused_classes=customization['remove_unused_classes']): changed = True report('') @@ -233,13 +238,13 @@ def gui_polish(data): log(msg) return '\n\n'.join(report) -def tweak_polish(container, actions): +def tweak_polish(container, actions, customization=None): opts = ALL_OPTS.copy() opts.update(actions) O = namedtuple('Options', ' '.join(ALL_OPTS.iterkeys())) opts = O(**opts) report = [] - changed = polish_one(container, opts, report.append) + changed = polish_one(container, opts, report.append, customization=customization) return report, changed def option_parser(): diff --git a/src/calibre/gui2/tweak_book/__init__.py b/src/calibre/gui2/tweak_book/__init__.py index 31deac68ac..0dae8c0c8e 100644 --- a/src/calibre/gui2/tweak_book/__init__.py +++ b/src/calibre/gui2/tweak_book/__init__.py @@ -48,6 +48,7 @@ d['insert_tag_mru'] = ['p', 'div', 'li', 'h1', 'h2', 'h3', 'h4', 'em', 'strong', d['spell_check_case_sensitive_sort'] = False d['inline_spell_check'] = True d['custom_themes'] = {} +d['remove_unused_classes'] = False del d diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py index 93551a43fe..efed019554 100644 --- a/src/calibre/gui2/tweak_book/boss.py +++ b/src/calibre/gui2/tweak_book/boss.py @@ -11,8 +11,7 @@ from functools import partial, wraps from PyQt4.Qt import ( QObject, QApplication, QDialog, QGridLayout, QLabel, QSize, Qt, - QDialogButtonBox, QIcon, QTimer, QPixmap, QTextBrowser, QVBoxLayout, - QInputDialog) + QDialogButtonBox, QIcon, QTimer, QPixmap, QInputDialog) from calibre import prints, isbytestring from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory @@ -425,36 +424,22 @@ class Boss(QObject): self.edit_file(name, 'html') def polish(self, action, name, parent=None): + from calibre.gui2.tweak_book.polish import get_customization, show_report + customization = get_customization(action, name, parent or self.gui) + if customization is None: + return with BusyCursor(): self.add_savepoint(_('Before: %s') % name) try: - report, changed = tweak_polish(current_container(), {action:True}) + report, changed = tweak_polish(current_container(), {action:True}, customization=customization) except: self.rewind_savepoint() raise if changed: self.apply_container_update_to_gui() - from calibre.ebooks.markdown import markdown - report = markdown('# %s\n\n'%self.current_metadata.title + '\n\n'.join(report), output_format='html4') if not changed: self.rewind_savepoint() - d = QDialog(parent or self.gui) - d.l = QVBoxLayout() - d.setLayout(d.l) - d.e = QTextBrowser(d) - d.l.addWidget(d.e) - d.e.setHtml(report) - d.bb = QDialogButtonBox(QDialogButtonBox.Close) - if changed: - b = d.b = d.bb.addButton(_('See what &changed'), d.bb.AcceptRole) - b.setIcon(QIcon(I('diff.png'))), b.setAutoDefault(False) - b.clicked.connect(partial(self.show_current_diff, allow_revert=True)) - d.bb.button(d.bb.Close).setDefault(True) - d.l.addWidget(d.bb) - d.bb.rejected.connect(d.reject) - d.bb.accepted.connect(d.accept) - d.resize(600, 400) - d.exec_() + show_report(changed, self.current_metadata.title, report, parent or self.gui, self.show_current_diff) def manage_fonts(self): self.commit_all_editors_to_container() diff --git a/src/calibre/gui2/tweak_book/polish.py b/src/calibre/gui2/tweak_book/polish.py new file mode 100644 index 0000000000..ee5817b692 --- /dev/null +++ b/src/calibre/gui2/tweak_book/polish.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2014, Kovid Goyal ' + +from functools import partial + +from PyQt4.Qt import ( + QTextBrowser, QVBoxLayout, QDialog, QDialogButtonBox, QIcon, QLabel, QCheckBox) + +from calibre.ebooks.oeb.polish.main import CUSTOMIZATION +from calibre.gui2.tweak_book import tprefs + +class Abort(Exception): + pass + +def customize_remove_unused_css(name, parent, ans): + d = QDialog(parent) + d.l = l = QVBoxLayout() + d.setLayout(d.l) + d.setWindowTitle(_('Remove unused CSS')) + d.la = la = QLabel(_( + 'This will remove all CSS rules that do not match any actual content. You' + ' can also have it automatically remove any class attributes from the HTML' + ' that do not match any CSS rules, by using the check box below:')) + la.setWordWrap(True), l.addWidget(la) + d.c = c = QCheckBox(_('Remove unused &class attributes')) + c.setChecked(tprefs['remove_unused_classes']) + l.addWidget(c) + d.bb = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + d.l.addWidget(d.bb) + d.bb.rejected.connect(d.reject) + d.bb.accepted.connect(d.accept) + if d.exec_() != d.Accepted: + raise Abort() + ans['remove_unused_classes'] = tprefs['remove_unused_classes'] = c.isChecked() + +def get_customization(action, name, parent): + ans = CUSTOMIZATION.copy() + try: + if action == 'remove_unused_css': + customize_remove_unused_css(name, parent, ans) + except Abort: + return None + return ans + +def format_report(title, report): + from calibre.ebooks.markdown import markdown + return markdown('# %s\n\n'%title + '\n\n'.join(report), output_format='html4') + +def show_report(changed, title, report, parent, show_current_diff): + report = format_report(title, report) + d = QDialog(parent) + d.l = QVBoxLayout() + d.setLayout(d.l) + d.e = QTextBrowser(d) + d.l.addWidget(d.e) + d.e.setHtml(report) + d.bb = QDialogButtonBox(QDialogButtonBox.Close) + if changed: + b = d.b = d.bb.addButton(_('See what &changed'), d.bb.AcceptRole) + b.setIcon(QIcon(I('diff.png'))), b.setAutoDefault(False) + b.clicked.connect(partial(show_current_diff, allow_revert=True)) + d.bb.button(d.bb.Close).setDefault(True) + d.l.addWidget(d.bb) + d.bb.rejected.connect(d.reject) + d.bb.accepted.connect(d.accept) + d.resize(600, 400) + d.exec_()