Edit book: Add a new tool to automatically remove all unused CSS rules.

Useful for books created from templates that can have large numbers of CSS rules that dont match any actual content. Available via Tools->Remove unused CSS.

Book polishing: Add an option to automatically remove all unused CSS
rules, works the same as the tool for Edit book, above.
This commit is contained in:
Kovid Goyal 2014-01-21 18:52:47 +05:30
parent d29c209316
commit 7a307a2e24
5 changed files with 182 additions and 2 deletions

View File

@ -335,6 +335,15 @@ Note that the algorithm can sometimes generate incorrect results, especially
when single quotes at the start of contractions are involved. Accessed via
:guilabel:`Tools->Smarten punctuation`.
Removing unused CSS rules
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Remove all unused CSS rules from stylesheets and <style> tags. Some books
created from production templates can have a large number of extra CSS rules
that dont match any actual content. These extra rules can slow down readers
that need to process them all. Accessed via :guilabel:`Tools->Remove unused CSS`.
Fix HTML
^^^^^^^^^^^

View File

@ -0,0 +1,149 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from lxml import etree
from cssselect import HTMLTranslator
from cssselect.xpath import XPathExpr, is_safe_name
from calibre import force_unicode
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPNSMAP, XHTML_NS
from calibre.ebooks.oeb.stylizer import MIN_SPACE_RE, is_non_whitespace, xpath_lower_case, fix_namespace
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style
class NamespacedTranslator(HTMLTranslator):
def xpath_element(self, selector):
element = selector.element
if not element:
element = '*'
safe = True
else:
safe = is_safe_name(element)
if safe:
# We use the h: prefix for the XHTML namespace
element = 'h:%s' % element.lower()
xpath = XPathExpr(element=element)
if not safe:
xpath.add_name_test()
return xpath
class CaseInsensitiveAttributesTranslator(NamespacedTranslator):
'Treat class and id CSS selectors case-insensitively'
def xpath_class(self, class_selector):
"""Translate a class selector."""
x = self.xpath(class_selector.selector)
if is_non_whitespace(class_selector.class_name):
x.add_condition(
"%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
% ('@class', xpath_lower_case('@class'), self.xpath_literal(
' '+class_selector.class_name.lower()+' ')))
else:
x.add_condition('0')
return x
def xpath_hash(self, id_selector):
"""Translate an ID selector."""
x = self.xpath(id_selector.selector)
return self.xpath_attrib_equals(x, xpath_lower_case('@id'),
(id_selector.id.lower()))
css_to_xpath = NamespacedTranslator().css_to_xpath
ci_css_to_xpath = CaseInsensitiveAttributesTranslator().css_to_xpath
def build_selector(text, case_sensitive=True):
func = css_to_xpath if case_sensitive else ci_css_to_xpath
try:
return etree.XPath(fix_namespace(func(text)), namespaces=XPNSMAP)
except Exception:
return None
def is_rule_used(root, selector, log, pseudo_pat, cache):
selector = pseudo_pat.sub('', selector)
selector = MIN_SPACE_RE.sub(r'\1', selector)
try:
xp = cache[(True, selector)]
except KeyError:
xp = cache[(True, selector)] = build_selector(selector)
try:
if xp(root):
return True
except Exception:
return True
# See if interpreting class and id selectors case-insensitively gives us
# matches. Strictly speaking, class and id selectors should be case
# sensitive for XHTML, but we err on the side of caution and not remove
# them, since case sensitivity depends on whether the html is rendered in
# quirks mode or not.
try:
xp = cache[(False, selector)]
except KeyError:
xp = cache[(False, selector)] = build_selector(selector, case_sensitive=False)
try:
return bool(xp(root))
except Exception:
return True
def filter_used_rules(root, rules, log, pseudo_pat, cache):
for rule in rules:
used = False
for selector in rule.selectorList:
text = selector.selectorText
if is_rule_used(root, text, log, pseudo_pat, cache):
used = True
break
if not used:
yield rule
def remove_unused_css(container, report):
from cssutils.css import CSSRule
sheets = {name:container.parsed(name) for name, mt in container.mime_map.iteritems() if mt in OEB_STYLES}
for sheet in sheets.itervalues():
sheet.namespaces['h'] = XHTML_NS
style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()}
num_of_removed_rules = 0
pseudo_pat = re.compile(r':(first-letter|first-line|link|hover|visited|active|focus|before|after)', re.I)
cache = {}
for name, mt in container.mime_map.iteritems():
if mt not in OEB_DOCS:
continue
root = container.parsed(name)
for style in root.xpath('//*[local-name()="style"]'):
if style.get('type', 'text/css') == 'text/css' and style.text:
sheet = container.parse_css(style.text)
sheet.namespaces['h'] = XHTML_NS
rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
unused_rules = tuple(filter_used_rules(root, rules, container.log, pseudo_pat, cache))
if unused_rules:
num_of_removed_rules += len(unused_rules)
[sheet.cssRules.remove(r) for r in unused_rules]
style.text = force_unicode(sheet.cssText, 'utf-8')
pretty_script_or_style(container, style)
container.dirty(name)
for link in root.xpath('//*[local-name()="link" and @href]'):
sname = container.href_to_name(link.get('href'), name)
if sname in sheets:
style_rules[sname] = tuple(filter_used_rules(root, style_rules[sname], container.log, pseudo_pat, cache))
for name, sheet in sheets.iteritems():
unused_rules = style_rules[name]
if unused_rules:
num_of_removed_rules += len(unused_rules)
[sheet.cssRules.remove(r) for r in unused_rules]
container.dirty(name)
if num_of_removed_rules > 0:
report(_('Removed %d unused CSS style rules') % num_of_removed_rules)
else:
report(_('No unused CSS style rules found'))

View File

@ -19,6 +19,7 @@ from calibre.ebooks.oeb.polish.cover import set_cover
from calibre.ebooks.oeb.polish.replace import smarten_punctuation
from calibre.ebooks.oeb.polish.jacket import (
replace_jacket, add_or_replace_jacket, find_existing_jacket, remove_jacket)
from calibre.ebooks.oeb.polish.css import remove_unused_css
from calibre.utils.logging import Log
ALL_OPTS = {
@ -29,6 +30,7 @@ ALL_OPTS = {
'jacket': False,
'remove_jacket':False,
'smarten_punctuation':False,
'remove_unused_css':False,
}
SUPPORTED = {'EPUB', 'AZW3'}
@ -90,6 +92,13 @@ typographically correct equivalents.</p>
<p>Note that the algorithm can sometimes generate incorrect results, especially
when single quotes at the start of contractions are involved.</p>
'''),
'remove_unused_css': _('''\
<p>Remove all unused CSS rules from stylesheets and &lt;style&gt; tags. Some books
created from production templates can have a large number of extra CSS rules
that dont match any actual content. These extra rules can slow down readers
that need to parse them all.</p>
'''),
}
def hfix(name, raw):
@ -98,6 +107,7 @@ def hfix(name, raw):
raw = raw.replace('\n\n', '__XX__')
raw = raw.replace('\n', ' ')
raw = raw.replace('__XX__', '\n')
raw = raw.replace('&lt;', '<').replace('&gt;', '>')
return raw
CLI_HELP = {x:hfix(x, re.sub('<.*?>', '', y)) for x, y in HELP.iteritems()}
@ -174,6 +184,11 @@ def polish_one(ebook, opts, report):
subset_all_fonts(ebook, stats.font_stats, report)
report('')
if opts.remove_unused_css:
rt(_('Removing unused CSS rules'))
remove_unused_css(ebook, report)
report('')
def polish(file_map, opts, log, report):
st = time.time()
@ -233,6 +248,7 @@ def option_parser():
o('--jacket', '-j', help=CLI_HELP['jacket'])
o('--remove-jacket', help=CLI_HELP['remove_jacket'])
o('--smarten-punctuation', '-p', help=CLI_HELP['smarten_punctuation'])
o('--remove-unused-css', '-u', help=CLI_HELP['remove_unused_css'])
o('--verbose', help=_('Produce more verbose output, useful for debugging.'))

View File

@ -59,13 +59,14 @@ class Polish(QDialog): # {{{
' formats are not capable of supporting all the'
' metadata in calibre.</p><p>There is a separate option to'
' update the cover.</p>'),
'do_cover': _('<p>Update the covers in the ebook files to match the'
'do_cover': _('<h3>Update cover</h3><p>Update the covers in the ebook files to match the'
' current cover in the calibre library.</p>'
'<p>If the ebook file does not have'
' an identifiable cover, a new cover is inserted.</p>'
),
'jacket':_('<h3>Book Jacket</h3>%s')%HELP['jacket'],
'remove_jacket':_('<h3>Remove Book Jacket</h3>%s')%HELP['remove_jacket'],
'remove_unused_css':_('<h3>Remove unused CSS rules</h3>%s')%HELP['remove_unused_css'],
}
self.l = l = QGridLayout()
@ -83,6 +84,7 @@ class Polish(QDialog): # {{{
('do_cover', _('Update the &cover in the book files')),
('jacket', _('Add metadata as a "book &jacket" page')),
('remove_jacket', _('&Remove a previously inserted book jacket')),
('remove_unused_css', _('Remove &unused CSS rules from the book')),
])
prefs = gprefs.get('polishing_settings', {})
for name, text in self.all_actions.iteritems():

View File

@ -341,6 +341,9 @@ class Main(MainWindow):
self.action_smarten_punctuation = reg(
'smarten-punctuation.png', _('&Smarten punctuation'), partial(
self.boss.polish, 'smarten_punctuation', _('Smarten punctuation')), 'smarten-punctuation', (), _('Smarten punctuation'))
self.action_remove_unused_css = reg(
'edit-clear.png', _('Remove &unused CSS rules'), partial(
self.boss.polish, 'remove_unused_css', _('Remove unused CSS rules')), 'remove-unused-css', (), _('Remove unused CSS rules'))
# Preview actions
group = _('Preview')
@ -440,6 +443,7 @@ class Main(MainWindow):
e.addAction(self.action_embed_fonts)
e.addAction(self.action_subset_fonts)
e.addAction(self.action_smarten_punctuation)
e.addAction(self.action_remove_unused_css)
e.addAction(self.action_fix_html_all)
e.addAction(self.action_pretty_all)
e.addAction(self.action_rationalize_folders)
@ -519,7 +523,7 @@ class Main(MainWindow):
a(self.action_help)
a = create(_('Polish book tool bar'), 'polish').addAction
for x in ('embed_fonts', 'subset_fonts', 'smarten_punctuation'):
for x in ('embed_fonts', 'subset_fonts', 'smarten_punctuation', 'remove_unused_css'):
a(getattr(self, 'action_' + x))
def create_docks(self):