From 2644d1f4ec529c3176c23edfc6dda33044f7482a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 11 Aug 2021 10:09:13 +0530 Subject: [PATCH] Edit/Polish book: Remove unused CSS now also removes unreferenced stylesheets. Fixes #1939469 [Remove unused CSS leaves unreferenced stylesheets intact](https://bugs.launchpad.net/calibre/+bug/1939469) --- src/calibre/ebooks/oeb/polish/css.py | 58 ++++++++++++++++++++----- src/calibre/ebooks/oeb/polish/main.py | 4 +- src/calibre/gui2/tweak_book/__init__.py | 1 + src/calibre/gui2/tweak_book/polish.py | 11 ++++- 4 files changed, 59 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/css.py b/src/calibre/ebooks/oeb/polish/css.py index d0ba2ca32f..5c8a17cd5e 100644 --- a/src/calibre/ebooks/oeb/polish/css.py +++ b/src/calibre/ebooks/oeb/polish/css.py @@ -23,23 +23,31 @@ from polyglot.builtins import iteritems, itervalues, unicode_type, filter from polyglot.functools import lru_cache +class SelectorStatus: + any_used: bool = False + any_unused: bool = False + + def mark_used_selectors(rules, log, select): - any_unused = False + ans = SelectorStatus() for rule in rules: for selector in rule.selectorList: if getattr(selector, 'calibre_used', False): + ans.any_used = True continue try: if select.has_matches(selector.selectorText): selector.calibre_used = True + ans.any_used = True else: - any_unused = True + ans.any_unused = True selector.calibre_used = False except SelectorError: # Cannot parse/execute this selector, be safe and assume it # matches something selector.calibre_used = True - return any_unused + ans.any_used = True + return ans def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None): @@ -115,14 +123,16 @@ def merge_identical_properties(sheet): def remove_unused_selectors_and_rules(rules_container, rules, removal_stats): - found_any = False + ans = SelectorStatus() for r in rules: removals = [] for i, sel in enumerate(r.selectorList): - if not getattr(sel, 'calibre_used', True): + if getattr(sel, 'calibre_used', True): + ans.any_used = True + else: removals.append(i) if removals: - found_any = True + ans.any_unused = True if len(removals) == len(r.selectorList): rules_container.remove(r) removal_stats['rules'] += 1 @@ -130,16 +140,24 @@ def remove_unused_selectors_and_rules(rules_container, rules, removal_stats): removal_stats['selectors'] += len(removals) for i in reversed(removals): del r.selectorList[i] - return found_any + return ans -def remove_unused_css(container, report=None, remove_unused_classes=False, merge_rules=False, merge_rules_with_identical_properties=False): +def remove_unused_css( + container, report=None, + remove_unused_classes=False, + merge_rules=False, + merge_rules_with_identical_properties=False, + remove_unreferenced_sheets=False, +): ''' Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content. :param report: An optional callable that takes a single argument. It is called with information about the operations being performed. :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed. :param merge_rules: If True, rules with identical selectors are merged. + :param merge_rules_with_identical_properties: If True, rules with identical properties are merged. + :param remove_unreferenced_sheets: If True, stylesheets that are not referenced by any content are removed ''' report = report or (lambda x:x) @@ -164,6 +182,7 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge container.dirty(name) num_rules_merged += num import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets} + unreferenced_sheets = set(sheets) if remove_unused_classes: class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in iteritems(sheets)} style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in iteritems(sheets)} @@ -194,11 +213,12 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} imports = get_imported_sheets(name, container, sheets, sheet=sheet) for imported_sheet in imports: + unreferenced_sheets.discard(imported_sheet) mark_used_selectors(style_rules[imported_sheet], container.log, select) if remove_unused_classes: used_classes |= class_map[imported_sheet] rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) - if mark_used_selectors(rules, container.log, select): + if mark_used_selectors(rules, container.log, select).any_unused: remove_unused_selectors_and_rules(sheet.cssRules, rules, removal_stats) style.text = force_unicode(sheet.cssText, 'utf-8') pretty_script_or_style(container, style) @@ -211,8 +231,10 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge mark_used_selectors(style_rules[sname], container.log, select) if remove_unused_classes: used_classes |= class_map[sname] + unreferenced_sheets.discard(sname) for iname in import_map[sname]: + unreferenced_sheets.discard(iname) mark_used_selectors(style_rules[iname], container.log, select) if remove_unused_classes: used_classes |= class_map[iname] @@ -232,11 +254,18 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge container.dirty(name) for name, sheet in iteritems(sheets): - any_found = remove_unused_selectors_and_rules(sheet.cssRules, style_rules[name], removal_stats) - if any_found: + if name in unreferenced_sheets: + continue + q = remove_unused_selectors_and_rules(sheet.cssRules, style_rules[name], removal_stats) + if q.any_unused: container.dirty(name) + num_sheets_removed = 0 + if remove_unreferenced_sheets and len(unreferenced_sheets): + num_sheets_removed += len(unreferenced_sheets) + for uname in unreferenced_sheets: + container.remove_item(uname) - num_changes = num_merged + num_of_removed_classes + num_rules_merged + removal_stats['rules'] + removal_stats['selectors'] + num_changes = num_merged + num_of_removed_classes + num_rules_merged + removal_stats['rules'] + removal_stats['selectors'] + num_sheets_removed if num_changes > 0: if removal_stats['rules']: report(ngettext('Removed one unused CSS style rule', 'Removed {} unused CSS style rules', @@ -253,6 +282,9 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge if num_rules_merged > 0: report(ngettext('Merged one CSS style rule with identical properties', 'Merged {} CSS style rules with identical properties', num_rules_merged).format(num_rules_merged)) + if num_sheets_removed: + report(ngettext('Removed one unreferenced stylesheet', 'Removed {} unreferenced stylesheets', + num_sheets_removed).format(num_sheets_removed)) if not removal_stats['rules']: report(_('No unused CSS style rules found')) if not removal_stats['selectors']: @@ -261,6 +293,8 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge report(_('No unused class attributes found')) if merge_rules and num_merged == 0: report(_('No style rules that could be merged found')) + if remove_unreferenced_sheets and num_sheets_removed == 0: + report(_('No unused stylesheets found')) return num_changes > 0 diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index 509a9c761a..746601ba69 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -44,6 +44,7 @@ CUSTOMIZATION = { 'remove_unused_classes': False, 'merge_identical_selectors': False, 'merge_rules_with_identical_properties': False, + 'remove_unreferenced_sheets': True, 'remove_ncx': True, } @@ -238,7 +239,8 @@ def polish_one(ebook, opts, report, customization=None): ebook, report, remove_unused_classes=customization['remove_unused_classes'], merge_rules=customization['merge_identical_selectors'], - merge_rules_with_identical_properties=customization['merge_rules_with_identical_properties'] + merge_rules_with_identical_properties=customization['merge_rules_with_identical_properties'], + remove_unreferenced_sheets=customization['remove_unreferenced_sheets'] ): changed = True report('') diff --git a/src/calibre/gui2/tweak_book/__init__.py b/src/calibre/gui2/tweak_book/__init__.py index 445e57199f..02839b60b6 100644 --- a/src/calibre/gui2/tweak_book/__init__.py +++ b/src/calibre/gui2/tweak_book/__init__.py @@ -56,6 +56,7 @@ d['remove_unused_classes'] = False d['merge_identical_selectors'] = False d['merge_identical_selectors'] = False d['merge_rules_with_identical_properties'] = False +d['remove_unreferenced_sheets'] = True d['global_book_toolbar'] = [ 'new-file', 'open-book', 'save-book', None, 'global-undo', 'global-redo', 'create-checkpoint', None, 'donate', 'user-manual'] d['global_tools_toolbar'] = [ diff --git a/src/calibre/gui2/tweak_book/polish.py b/src/calibre/gui2/tweak_book/polish.py index 0d71258cc6..8e70e615b9 100644 --- a/src/calibre/gui2/tweak_book/polish.py +++ b/src/calibre/gui2/tweak_book/polish.py @@ -47,20 +47,26 @@ def customize_remove_unused_css(name, parent, ans): l.addWidget(c) d.la2 = label('' + _( 'Remove all class attributes from the HTML that do not match any existing CSS rules')) - d.m = m = QCheckBox(_('Merge CSS rules with identical selectors')) + d.m = m = QCheckBox(_('Merge CSS rules with identical &selectors')) m.setChecked(tprefs['merge_identical_selectors']) l.addWidget(m) d.la3 = label('' + _( 'Merge CSS rules in the same stylesheet that have identical selectors.' ' Note that in rare cases merging can result in a change to the effective styling' ' of the book, so use with care.')) - d.p = p = QCheckBox(_('Merge CSS rules with identical properties')) + d.p = p = QCheckBox(_('Merge CSS rules with identical &properties')) p.setChecked(tprefs['merge_rules_with_identical_properties']) l.addWidget(p) d.la4 = label('' + _( 'Merge CSS rules in the same stylesheet that have identical properties.' ' Note that in rare cases merging can result in a change to the effective styling' ' of the book, so use with care.')) + d.p = p = QCheckBox(_('Remove &unreferenced style sheets')) + p.setChecked(tprefs['remove_unreferenced_sheets']) + l.addWidget(p) + d.la4 = label('' + _( + 'Remove stylesheets that are not referenced by any content.' + )) d.bb = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel) d.l.addWidget(d.bb) @@ -70,6 +76,7 @@ def customize_remove_unused_css(name, parent, ans): ans['remove_unused_classes'] = tprefs['remove_unused_classes'] = c.isChecked() ans['merge_identical_selectors'] = tprefs['merge_identical_selectors'] = m.isChecked() ans['merge_rules_with_identical_properties'] = tprefs['merge_rules_with_identical_properties'] = p.isChecked() + ans['remove_unreferenced_sheets'] = tprefs['remove_unreferenced_sheets'] = p.isChecked() if ret != QDialog.DialogCode.Accepted: raise Abort()