Edit book: Remove unused CSS: Add an option to also remove unused class attributes from the HTML

Edit Book: Remove unused CSS: Handle @import rules when checking for unused CSS
This commit is contained in:
Kovid Goyal 2014-06-15 12:36:14 +05:30
parent 7396cb76d5
commit eb5889b92f
5 changed files with 176 additions and 32 deletions

View File

@ -9,8 +9,10 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from lxml import etree
from cssselect import HTMLTranslator
from cssutils.css import CSSRule
from cssselect import HTMLTranslator, parse
from cssselect.xpath import XPathExpr, is_safe_name
from cssselect.parser import SelectorSyntaxError
from calibre import force_unicode
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPNSMAP, XHTML_NS
@ -118,8 +120,21 @@ def preserve_htmlns_prefix(sheet, prefix):
else:
sheet.namespaces[prefix] = XHTML_NS
def remove_unused_css(container, report):
from cssutils.css import CSSRule
def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None):
ans = set()
sheet = sheet or sheets[name]
for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE):
if rule.href:
iname = container.href_to_name(rule.href, name)
if iname in sheets:
ans.add(iname)
if recursion_level > 0:
for imported_sheet in tuple(ans):
ans |= get_imported_sheets(imported_sheet, container, sheets, recursion_level=recursion_level-1)
ans.discard(name)
return ans
def remove_unused_css(container, report, remove_unused_classes=False):
def safe_parse(name):
try:
return container.parsed(name)
@ -127,13 +142,16 @@ def remove_unused_css(container, report):
pass
sheets = {name:safe_parse(name) for name, mt in container.mime_map.iteritems() if mt in OEB_STYLES}
sheets = {k:v for k, v in sheets.iteritems() if v is not None}
import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
if remove_unused_classes:
class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.iteritems()}
sheet_namespace = {}
for sheet in sheets.itervalues():
sheet_namespace[sheet] = process_namespaces(sheet)
sheet.namespaces['h'] = XHTML_NS
style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()}
num_of_removed_rules = 0
num_of_removed_rules = num_of_removed_classes = 0
pseudo_pat = re.compile(r':(first-letter|first-line|link|hover|visited|active|focus|before|after)', re.I)
cache = {}
@ -141,9 +159,17 @@ def remove_unused_css(container, report):
if mt not in OEB_DOCS:
continue
root = container.parsed(name)
used_classes = set()
for style in root.xpath('//*[local-name()="style"]'):
if style.get('type', 'text/css') == 'text/css' and style.text:
sheet = container.parse_css(style.text)
if remove_unused_classes:
used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
imports = get_imported_sheets(name, container, sheets, sheet=sheet)
for imported_sheet in imports:
style_rules[imported_sheet] = tuple(filter_used_rules(root, style_rules[imported_sheet], container.log, pseudo_pat, cache))
if remove_unused_classes:
used_classes |= class_map[imported_sheet]
ns = process_namespaces(sheet)
sheet.namespaces['h'] = XHTML_NS
rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
@ -160,6 +186,27 @@ def remove_unused_css(container, report):
sname = container.href_to_name(link.get('href'), name)
if sname in sheets:
style_rules[sname] = tuple(filter_used_rules(root, style_rules[sname], container.log, pseudo_pat, cache))
if remove_unused_classes:
used_classes |= class_map[sname]
for iname in import_map[sname]:
style_rules[iname] = tuple(filter_used_rules(root, style_rules[iname], container.log, pseudo_pat, cache))
if remove_unused_classes:
used_classes |= class_map[iname]
if remove_unused_classes:
for elem in root.xpath('//*[@class]'):
original_classes, classes = elem.get('class', '').split(), []
for x in original_classes:
if icu_lower(x) in used_classes:
classes.append(x)
if len(classes) != len(original_classes):
if classes:
elem.set('class', ' '.join(classes))
else:
del elem.attrib['class']
num_of_removed_classes += len(original_classes) - len(classes)
container.dirty(name)
for name, sheet in sheets.iteritems():
preserve_htmlns_prefix(sheet, sheet_namespace[sheet])
@ -170,10 +217,17 @@ def remove_unused_css(container, report):
container.dirty(name)
if num_of_removed_rules > 0:
report(_('Removed %d unused CSS style rules') % num_of_removed_rules)
report(ngettext('Removed %d unused CSS style rule', 'Removed %d unused CSS style rules',
num_of_removed_rules) % num_of_removed_rules)
else:
report(_('No unused CSS style rules found'))
return num_of_removed_rules > 0
if remove_unused_classes:
if num_of_removed_classes > 0:
report(ngettext('Removed %d unused class from the HTML', 'Removed %d unused classes from the HTML',
num_of_removed_classes) % num_of_removed_classes)
else:
report(_('No unused class attributes found'))
return num_of_removed_rules + num_of_removed_classes > 0
def filter_declaration(style, properties):
changed = False
@ -251,3 +305,30 @@ def filter_css(container, properties, names=()):
return doc_changed
def _classes_in_selector(selector, classes):
for attr in ('selector', 'subselector', 'parsed_tree'):
s = getattr(selector, attr, None)
if s is not None:
_classes_in_selector(s, classes)
cn = getattr(selector, 'class_name', None)
if cn is not None:
classes.add(cn)
def classes_in_selector(text):
classes = set()
try:
for selector in parse(text):
_classes_in_selector(selector, classes)
except SelectorSyntaxError:
pass
return classes
def classes_in_rule_list(css_rules):
classes = set()
for rule in css_rules:
if rule.type == rule.STYLE_RULE:
classes |= classes_in_selector(rule.selectorText)
elif hasattr(rule, 'cssRules'):
classes |= classes_in_rule_list(rule.cssRules)
return classes

View File

@ -33,6 +33,10 @@ ALL_OPTS = {
'remove_unused_css':False,
}
CUSTOMIZATION = {
'remove_unused_classes': False,
}
SUPPORTED = {'EPUB', 'AZW3'}
# Help {{{
@ -129,10 +133,11 @@ def update_metadata(ebook, new_opf):
stream.truncate()
stream.write(opf.render())
def polish_one(ebook, opts, report):
def polish_one(ebook, opts, report, customization=None):
rt = lambda x: report('\n### ' + x)
jacket = None
changed = False
customization = customization or CUSTOMIZATION.copy()
if opts.subset or opts.embed:
stats = StatsCollector(ebook, do_embed=opts.embed)
@ -194,7 +199,7 @@ def polish_one(ebook, opts, report):
if opts.remove_unused_css:
rt(_('Removing unused CSS rules'))
if remove_unused_css(ebook, report):
if remove_unused_css(ebook, report, remove_unused_classes=customization['remove_unused_classes']):
changed = True
report('')
@ -233,13 +238,13 @@ def gui_polish(data):
log(msg)
return '\n\n'.join(report)
def tweak_polish(container, actions):
def tweak_polish(container, actions, customization=None):
opts = ALL_OPTS.copy()
opts.update(actions)
O = namedtuple('Options', ' '.join(ALL_OPTS.iterkeys()))
opts = O(**opts)
report = []
changed = polish_one(container, opts, report.append)
changed = polish_one(container, opts, report.append, customization=customization)
return report, changed
def option_parser():

View File

@ -48,6 +48,7 @@ d['insert_tag_mru'] = ['p', 'div', 'li', 'h1', 'h2', 'h3', 'h4', 'em', 'strong',
d['spell_check_case_sensitive_sort'] = False
d['inline_spell_check'] = True
d['custom_themes'] = {}
d['remove_unused_classes'] = False
del d

View File

@ -11,8 +11,7 @@ from functools import partial, wraps
from PyQt4.Qt import (
QObject, QApplication, QDialog, QGridLayout, QLabel, QSize, Qt,
QDialogButtonBox, QIcon, QTimer, QPixmap, QTextBrowser, QVBoxLayout,
QInputDialog)
QDialogButtonBox, QIcon, QTimer, QPixmap, QInputDialog)
from calibre import prints, isbytestring
from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory
@ -425,36 +424,22 @@ class Boss(QObject):
self.edit_file(name, 'html')
def polish(self, action, name, parent=None):
from calibre.gui2.tweak_book.polish import get_customization, show_report
customization = get_customization(action, name, parent or self.gui)
if customization is None:
return
with BusyCursor():
self.add_savepoint(_('Before: %s') % name)
try:
report, changed = tweak_polish(current_container(), {action:True})
report, changed = tweak_polish(current_container(), {action:True}, customization=customization)
except:
self.rewind_savepoint()
raise
if changed:
self.apply_container_update_to_gui()
from calibre.ebooks.markdown import markdown
report = markdown('# %s\n\n'%self.current_metadata.title + '\n\n'.join(report), output_format='html4')
if not changed:
self.rewind_savepoint()
d = QDialog(parent or self.gui)
d.l = QVBoxLayout()
d.setLayout(d.l)
d.e = QTextBrowser(d)
d.l.addWidget(d.e)
d.e.setHtml(report)
d.bb = QDialogButtonBox(QDialogButtonBox.Close)
if changed:
b = d.b = d.bb.addButton(_('See what &changed'), d.bb.AcceptRole)
b.setIcon(QIcon(I('diff.png'))), b.setAutoDefault(False)
b.clicked.connect(partial(self.show_current_diff, allow_revert=True))
d.bb.button(d.bb.Close).setDefault(True)
d.l.addWidget(d.bb)
d.bb.rejected.connect(d.reject)
d.bb.accepted.connect(d.accept)
d.resize(600, 400)
d.exec_()
show_report(changed, self.current_metadata.title, report, parent or self.gui, self.show_current_diff)
def manage_fonts(self):
self.commit_all_editors_to_container()

View File

@ -0,0 +1,72 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
from functools import partial
from PyQt4.Qt import (
QTextBrowser, QVBoxLayout, QDialog, QDialogButtonBox, QIcon, QLabel, QCheckBox)
from calibre.ebooks.oeb.polish.main import CUSTOMIZATION
from calibre.gui2.tweak_book import tprefs
class Abort(Exception):
pass
def customize_remove_unused_css(name, parent, ans):
d = QDialog(parent)
d.l = l = QVBoxLayout()
d.setLayout(d.l)
d.setWindowTitle(_('Remove unused CSS'))
d.la = la = QLabel(_(
'This will remove all CSS rules that do not match any actual content. You'
' can also have it automatically remove any class attributes from the HTML'
' that do not match any CSS rules, by using the check box below:'))
la.setWordWrap(True), l.addWidget(la)
d.c = c = QCheckBox(_('Remove unused &class attributes'))
c.setChecked(tprefs['remove_unused_classes'])
l.addWidget(c)
d.bb = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
d.l.addWidget(d.bb)
d.bb.rejected.connect(d.reject)
d.bb.accepted.connect(d.accept)
if d.exec_() != d.Accepted:
raise Abort()
ans['remove_unused_classes'] = tprefs['remove_unused_classes'] = c.isChecked()
def get_customization(action, name, parent):
ans = CUSTOMIZATION.copy()
try:
if action == 'remove_unused_css':
customize_remove_unused_css(name, parent, ans)
except Abort:
return None
return ans
def format_report(title, report):
from calibre.ebooks.markdown import markdown
return markdown('# %s\n\n'%title + '\n\n'.join(report), output_format='html4')
def show_report(changed, title, report, parent, show_current_diff):
report = format_report(title, report)
d = QDialog(parent)
d.l = QVBoxLayout()
d.setLayout(d.l)
d.e = QTextBrowser(d)
d.l.addWidget(d.e)
d.e.setHtml(report)
d.bb = QDialogButtonBox(QDialogButtonBox.Close)
if changed:
b = d.b = d.bb.addButton(_('See what &changed'), d.bb.AcceptRole)
b.setIcon(QIcon(I('diff.png'))), b.setAutoDefault(False)
b.clicked.connect(partial(show_current_diff, allow_revert=True))
d.bb.button(d.bb.Close).setDefault(True)
d.l.addWidget(d.bb)
d.bb.rejected.connect(d.reject)
d.bb.accepted.connect(d.accept)
d.resize(600, 400)
d.exec_()