Get rid of cssselect from Edit Book

2025-08-11 09:13:57 -04:00 · 2015-02-22 09:11:03 +05:30 · 2015-02-22 09:11:03 +05:30 · 8f6f60bca2
commit 8f6f60bca2
parent 0c4e86dcd1
3 changed files with 21 additions and 143 deletions
--- a/src/calibre/ebooks/oeb/polish/css.py
+++ b/src/calibre/ebooks/oeb/polish/css.py
@ -6,122 +6,26 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
 import re
 from lxml import etree
 from cssutils.css import CSSRule
-from cssselect import HTMLTranslator, parse
+from css_selectors import parse, SelectorSyntaxError
 from cssselect.xpath import XPathExpr, is_safe_name
 from cssselect.parser import SelectorSyntaxError
 from calibre import force_unicode
-from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPNSMAP, XHTML_NS
+from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS
 from calibre.ebooks.oeb.normalize_css import normalize_filter_css, normalizers
 from calibre.ebooks.oeb.stylizer import MIN_SPACE_RE, is_non_whitespace, xpath_lower_case, fix_namespace
 from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style
 from css_selectors import Select
 class NamespacedTranslator(HTMLTranslator):
-    def xpath_element(self, selector):
+def filter_used_rules(rules, log, select):
        element = selector.element
        if not element:
            element = '*'
            safe = True
        else:
            safe = is_safe_name(element)
            if safe:
                # We use the h: prefix for the XHTML namespace
                element = 'h:%s' % element.lower()
        xpath = XPathExpr(element=element)
        if not safe:
            xpath.add_name_test()
        return xpath
 class CaseInsensitiveAttributesTranslator(NamespacedTranslator):
    'Treat class and id CSS selectors case-insensitively'
    def xpath_class(self, class_selector):
        """Translate a class selector."""
        x = self.xpath(class_selector.selector)
        if is_non_whitespace(class_selector.class_name):
            x.add_condition(
                "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
                % ('@class', xpath_lower_case('@class'), self.xpath_literal(
                    ' '+class_selector.class_name.lower()+' ')))
        else:
            x.add_condition('0')
        return x
    def xpath_hash(self, id_selector):
        """Translate an ID selector."""
        x = self.xpath(id_selector.selector)
        return self.xpath_attrib_equals(x, xpath_lower_case('@id'),
                (id_selector.id.lower()))
 css_to_xpath = NamespacedTranslator().css_to_xpath
 ci_css_to_xpath = CaseInsensitiveAttributesTranslator().css_to_xpath
 def build_selector(text, case_sensitive=True):
    func = css_to_xpath if case_sensitive else ci_css_to_xpath
    try:
        return etree.XPath(fix_namespace(func(text)), namespaces=XPNSMAP)
    except Exception:
        return None
 PSEUDO_PAT = r':(first-letter|first-line|link|hover|visited|active|focus|before|after)'
 def is_rule_used(root, selector, log, pseudo_pat, cache):
    selector = pseudo_pat.sub('', selector)
    selector = MIN_SPACE_RE.sub(r'\1', selector)
    try:
        xp = cache[(True, selector)]
    except KeyError:
        xp = cache[(True, selector)] = build_selector(selector)
    try:
        if xp(root):
            return True
    except Exception:
        return True
    # See if interpreting class and id selectors case-insensitively gives us
    # matches. Strictly speaking, class and id selectors should be case
    # sensitive for XHTML, but we err on the side of caution and not remove
    # them, since case sensitivity depends on whether the html is rendered in
    # quirks mode or not.
    try:
        xp = cache[(False, selector)]
    except KeyError:
        xp = cache[(False, selector)] = build_selector(selector, case_sensitive=False)
    try:
        return bool(xp(root))
    except Exception:
        return True
 def filter_used_rules(root, rules, log, pseudo_pat, cache):
    for rule in rules:
        used = False
        for selector in rule.selectorList:
-            text = selector.selectorText
+            if select.has_matches(selector.selectorText):
            if is_rule_used(root, text, log, pseudo_pat, cache):
                used = True
                break
        if not used:
            yield rule
 def process_namespaces(sheet):
    # Find the namespace prefix (if any) for the XHTML namespace, so that we
    # can preserve it after processing
    for prefix in sheet.namespaces:
        if sheet.namespaces[prefix] == XHTML_NS:
            return prefix
 def preserve_htmlns_prefix(sheet, prefix):
    if prefix is None:
        while 'h' in sheet.namespaces:
            del sheet.namespaces['h']
    else:
        sheet.namespaces[prefix] = XHTML_NS
 def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None):
    ans = set()
    sheet = sheet or sheets[name]
@ -155,20 +59,15 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
    import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
    if remove_unused_classes:
        class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.iteritems()}
    sheet_namespace = {}
    for sheet in sheets.itervalues():
        sheet_namespace[sheet] = process_namespaces(sheet)
        sheet.namespaces['h'] = XHTML_NS
    style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()}
    num_of_removed_rules = num_of_removed_classes = 0
    pseudo_pat = re.compile(PSEUDO_PAT, re.I)
    cache = {}
    for name, mt in container.mime_map.iteritems():
        if mt not in OEB_DOCS:
            continue
        root = container.parsed(name)
        select = Select(root, ignore_inappropriate_pseudo_classes=True)
        used_classes = set()
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css' and style.text:
@ -177,17 +76,14 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
                    used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
                imports = get_imported_sheets(name, container, sheets, sheet=sheet)
                for imported_sheet in imports:
-                    style_rules[imported_sheet] = tuple(filter_used_rules(root, style_rules[imported_sheet], container.log, pseudo_pat, cache))
+                    style_rules[imported_sheet] = tuple(filter_used_rules(style_rules[imported_sheet], container.log, select))
                    if remove_unused_classes:
                        used_classes |= class_map[imported_sheet]
                ns = process_namespaces(sheet)
                sheet.namespaces['h'] = XHTML_NS
                rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
-                unused_rules = tuple(filter_used_rules(root, rules, container.log, pseudo_pat, cache))
+                unused_rules = tuple(filter_used_rules(rules, container.log, select))
                if unused_rules:
                    num_of_removed_rules += len(unused_rules)
                    [sheet.cssRules.remove(r) for r in unused_rules]
                    preserve_htmlns_prefix(sheet, ns)
                    style.text = force_unicode(sheet.cssText, 'utf-8')
                    pretty_script_or_style(container, style)
                    container.dirty(name)
@ -196,12 +92,12 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
            sname = container.href_to_name(link.get('href'), name)
            if sname not in sheets:
                continue
-            style_rules[sname] = tuple(filter_used_rules(root, style_rules[sname], container.log, pseudo_pat, cache))
+            style_rules[sname] = tuple(filter_used_rules(style_rules[sname], container.log, select))
            if remove_unused_classes:
                used_classes |= class_map[sname]
            for iname in import_map[sname]:
-                style_rules[iname] = tuple(filter_used_rules(root, style_rules[iname], container.log, pseudo_pat, cache))
+                style_rules[iname] = tuple(filter_used_rules(style_rules[iname], container.log, select))
                if remove_unused_classes:
                    used_classes |= class_map[iname]
@ -220,7 +116,6 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
                    container.dirty(name)
    for name, sheet in sheets.iteritems():
        preserve_htmlns_prefix(sheet, sheet_namespace[sheet])
        unused_rules = style_rules[name]
        if unused_rules:
            num_of_removed_rules += len(unused_rules)
--- a/src/calibre/ebooks/oeb/polish/report.py
+++ b/src/calibre/ebooks/oeb/polish/report.py
@ -6,17 +6,17 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
-import posixpath, os, time, types, re
+import posixpath, os, time, types
 from collections import namedtuple, defaultdict, Counter
 from itertools import chain
 from calibre import prepare_string_for_xml, force_unicode
 from calibre.ebooks.oeb.base import XPath, xml2text
 from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES, OEB_FONTS
 from calibre.ebooks.oeb.polish.css import build_selector, PSEUDO_PAT, MIN_SPACE_RE
 from calibre.ebooks.oeb.polish.spell import get_all_words
 from calibre.utils.icu import numeric_sort_key, ord_string, safe_chr
 from calibre.utils.magick.draw import identify
 from css_selectors import Select, SelectorError
 File = namedtuple('File', 'name dir basename size category')
@ -255,8 +255,6 @@ def css_data(container, book_locale, result_data, *args):
                        css_rules(name, parser.parse_stylesheet(force_unicode(style.text, 'utf-8')).rules, style.sourceline - 1))
    rule_map = defaultdict(lambda : defaultdict(list))
    pseudo_pat = re.compile(PSEUDO_PAT, re.I)
    cache = {}
    def rules_in_sheet(sheet):
        for rule in sheet:
@ -285,28 +283,12 @@ def css_data(container, book_locale, result_data, *args):
                return '<%s %s>' % (tag, attribs)
            ans = tt_cache[elem] = '<%s>' % tag
-    def matches_for_selector(selector, root, class_map, rule):
+    def matches_for_selector(selector, select, class_map, rule):
        selector = pseudo_pat.sub('', selector)
        selector = MIN_SPACE_RE.sub(r'\1', selector)
        try:
            xp = cache[(True, selector)]
        except KeyError:
            xp = cache[(True, selector)] = build_selector(selector)
        try:
            matches = xp(root)
        except Exception:
            return ()
        if not matches:
            try:
                xp = cache[(False, selector)]
            except KeyError:
                xp = cache[(False, selector)] = build_selector(selector, case_sensitive=False)
            try:
                matches = xp(root)
            except Exception:
                return ()
        lsel = selector.lower()
        try:
            matches = tuple(select(selector))
        except SelectorError:
            return ()
        for elem in matches:
            for cls in elem.get('class', '').split():
                if '.' + cls.lower() in lsel:
@ -322,9 +304,10 @@ def css_data(container, book_locale, result_data, *args):
        for elem in root.xpath('//*[@class]'):
            for cls in elem.get('class', '').split():
                cmap[cls][elem] = []
        select = Select(root, ignore_inappropriate_pseudo_classes=True)
        for sheet in chain(sheets_for_html(name, root), inline_sheets):
            for rule in rules_in_sheet(sheet):
-                rule_map[rule][name].extend(matches_for_selector(rule.selector, root, cmap, rule))
+                rule_map[rule][name].extend(matches_for_selector(rule.selector, select, cmap, rule))
        for cls, elem_map in cmap.iteritems():
            class_elements = class_map[cls][name]
            for elem, usage in elem_map.iteritems():
--- a/src/calibre/gui2/tweak_book/live_css.py
+++ b/src/calibre/gui2/tweak_book/live_css.py
@ -8,7 +8,6 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
 import json
 from cssselect import parse
 from PyQt5.Qt import (
    QWidget, QTimer, QStackedLayout, QLabel, QScrollArea, QVBoxLayout,
    QPainter, Qt, QPalette, QRect, QSize, QSizePolicy, pyqtSignal,
@ -18,6 +17,7 @@ from calibre.constants import iswindows
 from calibre.gui2.tweak_book import editors, actions, current_container, tprefs
 from calibre.gui2.tweak_book.editor.themes import get_theme, theme_color
 from calibre.gui2.tweak_book.editor.text import default_font_family
 from css_selectors import parse, SelectorError
 class Heading(QWidget):  # {{{
@ -434,7 +434,7 @@ class LiveCSS(QWidget):
        if selector is not None:
            try:
                specificity = [0] + list(parse(selector)[0].specificity())
-            except (AttributeError, TypeError):
+            except (AttributeError, TypeError, SelectorError):
                specificity = [0, 0, 0, 0]
        else:  # style attribute
            specificity = [1, 0, 0, 0]