Edit/Polish book: Remove unused CSS now also removes unreferenced stylesheets. Fixes #1939469 [Remove unused CSS leaves unreferenced stylesheets intact](https://bugs.launchpad.net/calibre/+bug/1939469)

This commit is contained in:
Kovid Goyal 2021-08-11 10:09:13 +05:30
parent e2a088a53b
commit 2644d1f4ec
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 59 additions and 15 deletions

View File

@ -23,23 +23,31 @@ from polyglot.builtins import iteritems, itervalues, unicode_type, filter
from polyglot.functools import lru_cache from polyglot.functools import lru_cache
class SelectorStatus:
any_used: bool = False
any_unused: bool = False
def mark_used_selectors(rules, log, select): def mark_used_selectors(rules, log, select):
any_unused = False ans = SelectorStatus()
for rule in rules: for rule in rules:
for selector in rule.selectorList: for selector in rule.selectorList:
if getattr(selector, 'calibre_used', False): if getattr(selector, 'calibre_used', False):
ans.any_used = True
continue continue
try: try:
if select.has_matches(selector.selectorText): if select.has_matches(selector.selectorText):
selector.calibre_used = True selector.calibre_used = True
ans.any_used = True
else: else:
any_unused = True ans.any_unused = True
selector.calibre_used = False selector.calibre_used = False
except SelectorError: except SelectorError:
# Cannot parse/execute this selector, be safe and assume it # Cannot parse/execute this selector, be safe and assume it
# matches something # matches something
selector.calibre_used = True selector.calibre_used = True
return any_unused ans.any_used = True
return ans
def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None): def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None):
@ -115,14 +123,16 @@ def merge_identical_properties(sheet):
def remove_unused_selectors_and_rules(rules_container, rules, removal_stats): def remove_unused_selectors_and_rules(rules_container, rules, removal_stats):
found_any = False ans = SelectorStatus()
for r in rules: for r in rules:
removals = [] removals = []
for i, sel in enumerate(r.selectorList): for i, sel in enumerate(r.selectorList):
if not getattr(sel, 'calibre_used', True): if getattr(sel, 'calibre_used', True):
ans.any_used = True
else:
removals.append(i) removals.append(i)
if removals: if removals:
found_any = True ans.any_unused = True
if len(removals) == len(r.selectorList): if len(removals) == len(r.selectorList):
rules_container.remove(r) rules_container.remove(r)
removal_stats['rules'] += 1 removal_stats['rules'] += 1
@ -130,16 +140,24 @@ def remove_unused_selectors_and_rules(rules_container, rules, removal_stats):
removal_stats['selectors'] += len(removals) removal_stats['selectors'] += len(removals)
for i in reversed(removals): for i in reversed(removals):
del r.selectorList[i] del r.selectorList[i]
return found_any return ans
def remove_unused_css(container, report=None, remove_unused_classes=False, merge_rules=False, merge_rules_with_identical_properties=False): def remove_unused_css(
container, report=None,
remove_unused_classes=False,
merge_rules=False,
merge_rules_with_identical_properties=False,
remove_unreferenced_sheets=False,
):
''' '''
Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content. Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content.
:param report: An optional callable that takes a single argument. It is called with information about the operations being performed. :param report: An optional callable that takes a single argument. It is called with information about the operations being performed.
:param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed. :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed.
:param merge_rules: If True, rules with identical selectors are merged. :param merge_rules: If True, rules with identical selectors are merged.
:param merge_rules_with_identical_properties: If True, rules with identical properties are merged.
:param remove_unreferenced_sheets: If True, stylesheets that are not referenced by any content are removed
''' '''
report = report or (lambda x:x) report = report or (lambda x:x)
@ -164,6 +182,7 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
container.dirty(name) container.dirty(name)
num_rules_merged += num num_rules_merged += num
import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets} import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
unreferenced_sheets = set(sheets)
if remove_unused_classes: if remove_unused_classes:
class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in iteritems(sheets)} class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in iteritems(sheets)}
style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in iteritems(sheets)} style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in iteritems(sheets)}
@ -194,11 +213,12 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
imports = get_imported_sheets(name, container, sheets, sheet=sheet) imports = get_imported_sheets(name, container, sheets, sheet=sheet)
for imported_sheet in imports: for imported_sheet in imports:
unreferenced_sheets.discard(imported_sheet)
mark_used_selectors(style_rules[imported_sheet], container.log, select) mark_used_selectors(style_rules[imported_sheet], container.log, select)
if remove_unused_classes: if remove_unused_classes:
used_classes |= class_map[imported_sheet] used_classes |= class_map[imported_sheet]
rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
if mark_used_selectors(rules, container.log, select): if mark_used_selectors(rules, container.log, select).any_unused:
remove_unused_selectors_and_rules(sheet.cssRules, rules, removal_stats) remove_unused_selectors_and_rules(sheet.cssRules, rules, removal_stats)
style.text = force_unicode(sheet.cssText, 'utf-8') style.text = force_unicode(sheet.cssText, 'utf-8')
pretty_script_or_style(container, style) pretty_script_or_style(container, style)
@ -211,8 +231,10 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
mark_used_selectors(style_rules[sname], container.log, select) mark_used_selectors(style_rules[sname], container.log, select)
if remove_unused_classes: if remove_unused_classes:
used_classes |= class_map[sname] used_classes |= class_map[sname]
unreferenced_sheets.discard(sname)
for iname in import_map[sname]: for iname in import_map[sname]:
unreferenced_sheets.discard(iname)
mark_used_selectors(style_rules[iname], container.log, select) mark_used_selectors(style_rules[iname], container.log, select)
if remove_unused_classes: if remove_unused_classes:
used_classes |= class_map[iname] used_classes |= class_map[iname]
@ -232,11 +254,18 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
container.dirty(name) container.dirty(name)
for name, sheet in iteritems(sheets): for name, sheet in iteritems(sheets):
any_found = remove_unused_selectors_and_rules(sheet.cssRules, style_rules[name], removal_stats) if name in unreferenced_sheets:
if any_found: continue
q = remove_unused_selectors_and_rules(sheet.cssRules, style_rules[name], removal_stats)
if q.any_unused:
container.dirty(name) container.dirty(name)
num_sheets_removed = 0
if remove_unreferenced_sheets and len(unreferenced_sheets):
num_sheets_removed += len(unreferenced_sheets)
for uname in unreferenced_sheets:
container.remove_item(uname)
num_changes = num_merged + num_of_removed_classes + num_rules_merged + removal_stats['rules'] + removal_stats['selectors'] num_changes = num_merged + num_of_removed_classes + num_rules_merged + removal_stats['rules'] + removal_stats['selectors'] + num_sheets_removed
if num_changes > 0: if num_changes > 0:
if removal_stats['rules']: if removal_stats['rules']:
report(ngettext('Removed one unused CSS style rule', 'Removed {} unused CSS style rules', report(ngettext('Removed one unused CSS style rule', 'Removed {} unused CSS style rules',
@ -253,6 +282,9 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
if num_rules_merged > 0: if num_rules_merged > 0:
report(ngettext('Merged one CSS style rule with identical properties', 'Merged {} CSS style rules with identical properties', report(ngettext('Merged one CSS style rule with identical properties', 'Merged {} CSS style rules with identical properties',
num_rules_merged).format(num_rules_merged)) num_rules_merged).format(num_rules_merged))
if num_sheets_removed:
report(ngettext('Removed one unreferenced stylesheet', 'Removed {} unreferenced stylesheets',
num_sheets_removed).format(num_sheets_removed))
if not removal_stats['rules']: if not removal_stats['rules']:
report(_('No unused CSS style rules found')) report(_('No unused CSS style rules found'))
if not removal_stats['selectors']: if not removal_stats['selectors']:
@ -261,6 +293,8 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
report(_('No unused class attributes found')) report(_('No unused class attributes found'))
if merge_rules and num_merged == 0: if merge_rules and num_merged == 0:
report(_('No style rules that could be merged found')) report(_('No style rules that could be merged found'))
if remove_unreferenced_sheets and num_sheets_removed == 0:
report(_('No unused stylesheets found'))
return num_changes > 0 return num_changes > 0

View File

@ -44,6 +44,7 @@ CUSTOMIZATION = {
'remove_unused_classes': False, 'remove_unused_classes': False,
'merge_identical_selectors': False, 'merge_identical_selectors': False,
'merge_rules_with_identical_properties': False, 'merge_rules_with_identical_properties': False,
'remove_unreferenced_sheets': True,
'remove_ncx': True, 'remove_ncx': True,
} }
@ -238,7 +239,8 @@ def polish_one(ebook, opts, report, customization=None):
ebook, report, ebook, report,
remove_unused_classes=customization['remove_unused_classes'], remove_unused_classes=customization['remove_unused_classes'],
merge_rules=customization['merge_identical_selectors'], merge_rules=customization['merge_identical_selectors'],
merge_rules_with_identical_properties=customization['merge_rules_with_identical_properties'] merge_rules_with_identical_properties=customization['merge_rules_with_identical_properties'],
remove_unreferenced_sheets=customization['remove_unreferenced_sheets']
): ):
changed = True changed = True
report('') report('')

View File

@ -56,6 +56,7 @@ d['remove_unused_classes'] = False
d['merge_identical_selectors'] = False d['merge_identical_selectors'] = False
d['merge_identical_selectors'] = False d['merge_identical_selectors'] = False
d['merge_rules_with_identical_properties'] = False d['merge_rules_with_identical_properties'] = False
d['remove_unreferenced_sheets'] = True
d['global_book_toolbar'] = [ d['global_book_toolbar'] = [
'new-file', 'open-book', 'save-book', None, 'global-undo', 'global-redo', 'create-checkpoint', None, 'donate', 'user-manual'] 'new-file', 'open-book', 'save-book', None, 'global-undo', 'global-redo', 'create-checkpoint', None, 'donate', 'user-manual']
d['global_tools_toolbar'] = [ d['global_tools_toolbar'] = [

View File

@ -47,20 +47,26 @@ def customize_remove_unused_css(name, parent, ans):
l.addWidget(c) l.addWidget(c)
d.la2 = label('<span style="font-size:small; font-style: italic">' + _( d.la2 = label('<span style="font-size:small; font-style: italic">' + _(
'Remove all class attributes from the HTML that do not match any existing CSS rules')) 'Remove all class attributes from the HTML that do not match any existing CSS rules'))
d.m = m = QCheckBox(_('Merge CSS rules with identical selectors')) d.m = m = QCheckBox(_('Merge CSS rules with identical &selectors'))
m.setChecked(tprefs['merge_identical_selectors']) m.setChecked(tprefs['merge_identical_selectors'])
l.addWidget(m) l.addWidget(m)
d.la3 = label('<span style="font-size:small; font-style: italic">' + _( d.la3 = label('<span style="font-size:small; font-style: italic">' + _(
'Merge CSS rules in the same stylesheet that have identical selectors.' 'Merge CSS rules in the same stylesheet that have identical selectors.'
' Note that in rare cases merging can result in a change to the effective styling' ' Note that in rare cases merging can result in a change to the effective styling'
' of the book, so use with care.')) ' of the book, so use with care.'))
d.p = p = QCheckBox(_('Merge CSS rules with identical properties')) d.p = p = QCheckBox(_('Merge CSS rules with identical &properties'))
p.setChecked(tprefs['merge_rules_with_identical_properties']) p.setChecked(tprefs['merge_rules_with_identical_properties'])
l.addWidget(p) l.addWidget(p)
d.la4 = label('<span style="font-size:small; font-style: italic">' + _( d.la4 = label('<span style="font-size:small; font-style: italic">' + _(
'Merge CSS rules in the same stylesheet that have identical properties.' 'Merge CSS rules in the same stylesheet that have identical properties.'
' Note that in rare cases merging can result in a change to the effective styling' ' Note that in rare cases merging can result in a change to the effective styling'
' of the book, so use with care.')) ' of the book, so use with care.'))
d.p = p = QCheckBox(_('Remove &unreferenced style sheets'))
p.setChecked(tprefs['remove_unreferenced_sheets'])
l.addWidget(p)
d.la4 = label('<span style="font-size:small; font-style: italic">' + _(
'Remove stylesheets that are not referenced by any content.'
))
d.bb = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel) d.bb = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel)
d.l.addWidget(d.bb) d.l.addWidget(d.bb)
@ -70,6 +76,7 @@ def customize_remove_unused_css(name, parent, ans):
ans['remove_unused_classes'] = tprefs['remove_unused_classes'] = c.isChecked() ans['remove_unused_classes'] = tprefs['remove_unused_classes'] = c.isChecked()
ans['merge_identical_selectors'] = tprefs['merge_identical_selectors'] = m.isChecked() ans['merge_identical_selectors'] = tprefs['merge_identical_selectors'] = m.isChecked()
ans['merge_rules_with_identical_properties'] = tprefs['merge_rules_with_identical_properties'] = p.isChecked() ans['merge_rules_with_identical_properties'] = tprefs['merge_rules_with_identical_properties'] = p.isChecked()
ans['remove_unreferenced_sheets'] = tprefs['remove_unreferenced_sheets'] = p.isChecked()
if ret != QDialog.DialogCode.Accepted: if ret != QDialog.DialogCode.Accepted:
raise Abort() raise Abort()