Edit book: Add a new tool to automatically remove all unused CSS rules.

Useful for books created from templates that can have large numbers of CSS rules that dont match any actual content. Available via Tools->Remove unused CSS. Book polishing: Add an option to automatically remove all unused CSS rules, works the same as the tool for Edit book, above.
2025-07-08 10:44:09 -04:00 · 2014-01-21 18:52:47 +05:30 · 2014-01-21 18:52:47 +05:30 · 7a307a2e24
commit 7a307a2e24
parent d29c209316
5 changed files with 182 additions and 2 deletions
--- a/manual/edit.rst
+++ b/manual/edit.rst
@ -335,6 +335,15 @@ Note that the algorithm can sometimes generate incorrect results, especially
 when single quotes at the start of contractions are involved. Accessed via
 :guilabel:`Tools->Smarten punctuation`.

+Removing unused CSS rules
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Remove all unused CSS rules from stylesheets and <style> tags. Some books
+created from production templates can have a large number of extra CSS rules
+that dont match any actual content. These extra rules can slow down readers
+that need to process them all. Accessed via :guilabel:`Tools->Remove unused CSS`.
+
+
 Fix HTML
 ^^^^^^^^^^^

--- a/src/calibre/ebooks/oeb/polish/css.py
+++ b/src/calibre/ebooks/oeb/polish/css.py
@ -0,0 +1,149 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import re
+
+from lxml import etree
+from cssselect import HTMLTranslator
+from cssselect.xpath import XPathExpr, is_safe_name
+
+from calibre import force_unicode
+from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPNSMAP, XHTML_NS
+from calibre.ebooks.oeb.stylizer import MIN_SPACE_RE, is_non_whitespace, xpath_lower_case, fix_namespace
+from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style
+
+class NamespacedTranslator(HTMLTranslator):
+
+    def xpath_element(self, selector):
+        element = selector.element
+        if not element:
+            element = '*'
+            safe = True
+        else:
+            safe = is_safe_name(element)
+            if safe:
+                # We use the h: prefix for the XHTML namespace
+                element = 'h:%s' % element.lower()
+        xpath = XPathExpr(element=element)
+        if not safe:
+            xpath.add_name_test()
+        return xpath
+
+class CaseInsensitiveAttributesTranslator(NamespacedTranslator):
+    'Treat class and id CSS selectors case-insensitively'
+
+    def xpath_class(self, class_selector):
+        """Translate a class selector."""
+        x = self.xpath(class_selector.selector)
+        if is_non_whitespace(class_selector.class_name):
+            x.add_condition(
+                "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
+                % ('@class', xpath_lower_case('@class'), self.xpath_literal(
+                    ' '+class_selector.class_name.lower()+' ')))
+        else:
+            x.add_condition('0')
+        return x
+
+    def xpath_hash(self, id_selector):
+        """Translate an ID selector."""
+        x = self.xpath(id_selector.selector)
+        return self.xpath_attrib_equals(x, xpath_lower_case('@id'),
+                (id_selector.id.lower()))
+
+css_to_xpath = NamespacedTranslator().css_to_xpath
+ci_css_to_xpath = CaseInsensitiveAttributesTranslator().css_to_xpath
+
+def build_selector(text, case_sensitive=True):
+    func = css_to_xpath if case_sensitive else ci_css_to_xpath
+    try:
+        return etree.XPath(fix_namespace(func(text)), namespaces=XPNSMAP)
+    except Exception:
+        return None
+
+def is_rule_used(root, selector, log, pseudo_pat, cache):
+    selector = pseudo_pat.sub('', selector)
+    selector = MIN_SPACE_RE.sub(r'\1', selector)
+    try:
+        xp = cache[(True, selector)]
+    except KeyError:
+        xp = cache[(True, selector)] = build_selector(selector)
+    try:
+        if xp(root):
+            return True
+    except Exception:
+        return True
+
+    # See if interpreting class and id selectors case-insensitively gives us
+    # matches. Strictly speaking, class and id selectors should be case
+    # sensitive for XHTML, but we err on the side of caution and not remove
+    # them, since case sensitivity depends on whether the html is rendered in
+    # quirks mode or not.
+    try:
+        xp = cache[(False, selector)]
+    except KeyError:
+        xp = cache[(False, selector)] = build_selector(selector, case_sensitive=False)
+    try:
+        return bool(xp(root))
+    except Exception:
+        return True
+
+def filter_used_rules(root, rules, log, pseudo_pat, cache):
+    for rule in rules:
+        used = False
+        for selector in rule.selectorList:
+            text = selector.selectorText
+            if is_rule_used(root, text, log, pseudo_pat, cache):
+                used = True
+                break
+        if not used:
+            yield rule
+
+def remove_unused_css(container, report):
+    from cssutils.css import CSSRule
+    sheets = {name:container.parsed(name) for name, mt in container.mime_map.iteritems() if mt in OEB_STYLES}
+    for sheet in sheets.itervalues():
+        sheet.namespaces['h'] = XHTML_NS
+    style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()}
+
+    num_of_removed_rules = 0
+    pseudo_pat = re.compile(r':(first-letter|first-line|link|hover|visited|active|focus|before|after)', re.I)
+    cache = {}
+
+    for name, mt in container.mime_map.iteritems():
+        if mt not in OEB_DOCS:
+            continue
+        root = container.parsed(name)
+        for style in root.xpath('//*[local-name()="style"]'):
+            if style.get('type', 'text/css') == 'text/css' and style.text:
+                sheet = container.parse_css(style.text)
+                sheet.namespaces['h'] = XHTML_NS
+                rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
+                unused_rules = tuple(filter_used_rules(root, rules, container.log, pseudo_pat, cache))
+                if unused_rules:
+                    num_of_removed_rules += len(unused_rules)
+                    [sheet.cssRules.remove(r) for r in unused_rules]
+                    style.text = force_unicode(sheet.cssText, 'utf-8')
+                    pretty_script_or_style(container, style)
+                    container.dirty(name)
+
+        for link in root.xpath('//*[local-name()="link" and @href]'):
+            sname = container.href_to_name(link.get('href'), name)
+            if sname in sheets:
+                style_rules[sname] = tuple(filter_used_rules(root, style_rules[sname], container.log, pseudo_pat, cache))
+
+    for name, sheet in sheets.iteritems():
+        unused_rules = style_rules[name]
+        if unused_rules:
+            num_of_removed_rules += len(unused_rules)
+            [sheet.cssRules.remove(r) for r in unused_rules]
+            container.dirty(name)
+
+    if num_of_removed_rules > 0:
+        report(_('Removed %d unused CSS style rules') % num_of_removed_rules)
+    else:
+        report(_('No unused CSS style rules found'))
--- a/src/calibre/ebooks/oeb/polish/main.py
+++ b/src/calibre/ebooks/oeb/polish/main.py
@ -19,6 +19,7 @@ from calibre.ebooks.oeb.polish.cover import set_cover
 from calibre.ebooks.oeb.polish.replace import smarten_punctuation
 from calibre.ebooks.oeb.polish.jacket import (
    replace_jacket, add_or_replace_jacket, find_existing_jacket, remove_jacket)
+from calibre.ebooks.oeb.polish.css import remove_unused_css
 from calibre.utils.logging import Log

 ALL_OPTS = {
@ -29,6 +30,7 @@ ALL_OPTS = {
    'jacket': False,
    'remove_jacket':False,
    'smarten_punctuation':False,
+    'remove_unused_css':False,
 }

 SUPPORTED = {'EPUB', 'AZW3'}
@ -90,6 +92,13 @@ typographically correct equivalents.</p>
 <p>Note that the algorithm can sometimes generate incorrect results, especially
 when single quotes at the start of contractions are involved.</p>
 '''),
+
+'remove_unused_css': _('''\
+<p>Remove all unused CSS rules from stylesheets and &lt;style&gt; tags. Some books
+created from production templates can have a large number of extra CSS rules
+that dont match any actual content. These extra rules can slow down readers
+that need to parse them all.</p>
+'''),
 }

 def hfix(name, raw):
@ -98,6 +107,7 @@ def hfix(name, raw):
    raw = raw.replace('\n\n', '__XX__')
    raw = raw.replace('\n', ' ')
    raw = raw.replace('__XX__', '\n')
+    raw = raw.replace('&lt;', '<').replace('&gt;', '>')
    return raw

 CLI_HELP = {x:hfix(x, re.sub('<.*?>', '', y)) for x, y in HELP.iteritems()}
@ -174,6 +184,11 @@ def polish_one(ebook, opts, report):
        subset_all_fonts(ebook, stats.font_stats, report)
        report('')

+    if opts.remove_unused_css:
+        rt(_('Removing unused CSS rules'))
+        remove_unused_css(ebook, report)
+        report('')
+

 def polish(file_map, opts, log, report):
    st = time.time()
@ -233,6 +248,7 @@ def option_parser():
    o('--jacket', '-j', help=CLI_HELP['jacket'])
    o('--remove-jacket', help=CLI_HELP['remove_jacket'])
    o('--smarten-punctuation', '-p', help=CLI_HELP['smarten_punctuation'])
+    o('--remove-unused-css', '-u', help=CLI_HELP['remove_unused_css'])

    o('--verbose', help=_('Produce more verbose output, useful for debugging.'))

--- a/src/calibre/gui2/actions/polish.py
+++ b/src/calibre/gui2/actions/polish.py
@ -59,13 +59,14 @@ class Polish(QDialog):  # {{{
                         ' formats are not capable of supporting all the'
                         ' metadata in calibre.</p><p>There is a separate option to'
                         ' update the cover.</p>'),
-            'do_cover': _('<p>Update the covers in the ebook files to match the'
+            'do_cover': _('<h3>Update cover</h3><p>Update the covers in the ebook files to match the'
                        ' current cover in the calibre library.</p>'
                        '<p>If the ebook file does not have'
                        ' an identifiable cover, a new cover is inserted.</p>'
                        ),
            'jacket':_('<h3>Book Jacket</h3>%s')%HELP['jacket'],
            'remove_jacket':_('<h3>Remove Book Jacket</h3>%s')%HELP['remove_jacket'],
+            'remove_unused_css':_('<h3>Remove unused CSS rules</h3>%s')%HELP['remove_unused_css'],
        }

        self.l = l = QGridLayout()
@ -83,6 +84,7 @@ class Polish(QDialog):  # {{{
            ('do_cover', _('Update the &cover in the book files')),
            ('jacket', _('Add metadata as a "book &jacket" page')),
            ('remove_jacket', _('&Remove a previously inserted book jacket')),
+            ('remove_unused_css', _('Remove &unused CSS rules from the book')),
        ])
        prefs = gprefs.get('polishing_settings', {})
        for name, text in self.all_actions.iteritems():
--- a/src/calibre/gui2/tweak_book/ui.py
+++ b/src/calibre/gui2/tweak_book/ui.py
@ -341,6 +341,9 @@ class Main(MainWindow):
        self.action_smarten_punctuation = reg(
            'smarten-punctuation.png', _('&Smarten punctuation'), partial(
                self.boss.polish, 'smarten_punctuation', _('Smarten punctuation')), 'smarten-punctuation', (), _('Smarten punctuation'))
+        self.action_remove_unused_css = reg(
+            'edit-clear.png', _('Remove &unused CSS rules'), partial(
+                self.boss.polish, 'remove_unused_css', _('Remove unused CSS rules')), 'remove-unused-css', (), _('Remove unused CSS rules'))

        # Preview actions
        group = _('Preview')
@ -440,6 +443,7 @@ class Main(MainWindow):
        e.addAction(self.action_embed_fonts)
        e.addAction(self.action_subset_fonts)
        e.addAction(self.action_smarten_punctuation)
+        e.addAction(self.action_remove_unused_css)
        e.addAction(self.action_fix_html_all)
        e.addAction(self.action_pretty_all)
        e.addAction(self.action_rationalize_folders)
@ -519,7 +523,7 @@ class Main(MainWindow):
        a(self.action_help)

        a = create(_('Polish book tool bar'), 'polish').addAction
-        for x in ('embed_fonts', 'subset_fonts', 'smarten_punctuation'):
+        for x in ('embed_fonts', 'subset_fonts', 'smarten_punctuation', 'remove_unused_css'):
            a(getattr(self, 'action_' + x))

    def create_docks(self):