mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Get rid of cssselect from Edit Book
This commit is contained in:
parent
0c4e86dcd1
commit
8f6f60bca2
@ -6,122 +6,26 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import re
|
||||
|
||||
from lxml import etree
|
||||
from cssutils.css import CSSRule
|
||||
from cssselect import HTMLTranslator, parse
|
||||
from cssselect.xpath import XPathExpr, is_safe_name
|
||||
from cssselect.parser import SelectorSyntaxError
|
||||
from css_selectors import parse, SelectorSyntaxError
|
||||
|
||||
from calibre import force_unicode
|
||||
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPNSMAP, XHTML_NS
|
||||
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS
|
||||
from calibre.ebooks.oeb.normalize_css import normalize_filter_css, normalizers
|
||||
from calibre.ebooks.oeb.stylizer import MIN_SPACE_RE, is_non_whitespace, xpath_lower_case, fix_namespace
|
||||
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style
|
||||
from css_selectors import Select
|
||||
|
||||
class NamespacedTranslator(HTMLTranslator):
|
||||
|
||||
def xpath_element(self, selector):
|
||||
element = selector.element
|
||||
if not element:
|
||||
element = '*'
|
||||
safe = True
|
||||
else:
|
||||
safe = is_safe_name(element)
|
||||
if safe:
|
||||
# We use the h: prefix for the XHTML namespace
|
||||
element = 'h:%s' % element.lower()
|
||||
xpath = XPathExpr(element=element)
|
||||
if not safe:
|
||||
xpath.add_name_test()
|
||||
return xpath
|
||||
|
||||
class CaseInsensitiveAttributesTranslator(NamespacedTranslator):
|
||||
'Treat class and id CSS selectors case-insensitively'
|
||||
|
||||
def xpath_class(self, class_selector):
|
||||
"""Translate a class selector."""
|
||||
x = self.xpath(class_selector.selector)
|
||||
if is_non_whitespace(class_selector.class_name):
|
||||
x.add_condition(
|
||||
"%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
|
||||
% ('@class', xpath_lower_case('@class'), self.xpath_literal(
|
||||
' '+class_selector.class_name.lower()+' ')))
|
||||
else:
|
||||
x.add_condition('0')
|
||||
return x
|
||||
|
||||
def xpath_hash(self, id_selector):
|
||||
"""Translate an ID selector."""
|
||||
x = self.xpath(id_selector.selector)
|
||||
return self.xpath_attrib_equals(x, xpath_lower_case('@id'),
|
||||
(id_selector.id.lower()))
|
||||
|
||||
css_to_xpath = NamespacedTranslator().css_to_xpath
|
||||
ci_css_to_xpath = CaseInsensitiveAttributesTranslator().css_to_xpath
|
||||
|
||||
def build_selector(text, case_sensitive=True):
|
||||
func = css_to_xpath if case_sensitive else ci_css_to_xpath
|
||||
try:
|
||||
return etree.XPath(fix_namespace(func(text)), namespaces=XPNSMAP)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
PSEUDO_PAT = r':(first-letter|first-line|link|hover|visited|active|focus|before|after)'
|
||||
|
||||
def is_rule_used(root, selector, log, pseudo_pat, cache):
|
||||
selector = pseudo_pat.sub('', selector)
|
||||
selector = MIN_SPACE_RE.sub(r'\1', selector)
|
||||
try:
|
||||
xp = cache[(True, selector)]
|
||||
except KeyError:
|
||||
xp = cache[(True, selector)] = build_selector(selector)
|
||||
try:
|
||||
if xp(root):
|
||||
return True
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
# See if interpreting class and id selectors case-insensitively gives us
|
||||
# matches. Strictly speaking, class and id selectors should be case
|
||||
# sensitive for XHTML, but we err on the side of caution and not remove
|
||||
# them, since case sensitivity depends on whether the html is rendered in
|
||||
# quirks mode or not.
|
||||
try:
|
||||
xp = cache[(False, selector)]
|
||||
except KeyError:
|
||||
xp = cache[(False, selector)] = build_selector(selector, case_sensitive=False)
|
||||
try:
|
||||
return bool(xp(root))
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
def filter_used_rules(root, rules, log, pseudo_pat, cache):
|
||||
def filter_used_rules(rules, log, select):
|
||||
for rule in rules:
|
||||
used = False
|
||||
for selector in rule.selectorList:
|
||||
text = selector.selectorText
|
||||
if is_rule_used(root, text, log, pseudo_pat, cache):
|
||||
if select.has_matches(selector.selectorText):
|
||||
used = True
|
||||
break
|
||||
if not used:
|
||||
yield rule
|
||||
|
||||
def process_namespaces(sheet):
|
||||
# Find the namespace prefix (if any) for the XHTML namespace, so that we
|
||||
# can preserve it after processing
|
||||
for prefix in sheet.namespaces:
|
||||
if sheet.namespaces[prefix] == XHTML_NS:
|
||||
return prefix
|
||||
|
||||
def preserve_htmlns_prefix(sheet, prefix):
|
||||
if prefix is None:
|
||||
while 'h' in sheet.namespaces:
|
||||
del sheet.namespaces['h']
|
||||
else:
|
||||
sheet.namespaces[prefix] = XHTML_NS
|
||||
|
||||
def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None):
|
||||
ans = set()
|
||||
sheet = sheet or sheets[name]
|
||||
@ -155,20 +59,15 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
|
||||
import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
|
||||
if remove_unused_classes:
|
||||
class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.iteritems()}
|
||||
sheet_namespace = {}
|
||||
for sheet in sheets.itervalues():
|
||||
sheet_namespace[sheet] = process_namespaces(sheet)
|
||||
sheet.namespaces['h'] = XHTML_NS
|
||||
style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()}
|
||||
|
||||
num_of_removed_rules = num_of_removed_classes = 0
|
||||
pseudo_pat = re.compile(PSEUDO_PAT, re.I)
|
||||
cache = {}
|
||||
|
||||
for name, mt in container.mime_map.iteritems():
|
||||
if mt not in OEB_DOCS:
|
||||
continue
|
||||
root = container.parsed(name)
|
||||
select = Select(root, ignore_inappropriate_pseudo_classes=True)
|
||||
used_classes = set()
|
||||
for style in root.xpath('//*[local-name()="style"]'):
|
||||
if style.get('type', 'text/css') == 'text/css' and style.text:
|
||||
@ -177,17 +76,14 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
|
||||
used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
|
||||
imports = get_imported_sheets(name, container, sheets, sheet=sheet)
|
||||
for imported_sheet in imports:
|
||||
style_rules[imported_sheet] = tuple(filter_used_rules(root, style_rules[imported_sheet], container.log, pseudo_pat, cache))
|
||||
style_rules[imported_sheet] = tuple(filter_used_rules(style_rules[imported_sheet], container.log, select))
|
||||
if remove_unused_classes:
|
||||
used_classes |= class_map[imported_sheet]
|
||||
ns = process_namespaces(sheet)
|
||||
sheet.namespaces['h'] = XHTML_NS
|
||||
rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
|
||||
unused_rules = tuple(filter_used_rules(root, rules, container.log, pseudo_pat, cache))
|
||||
unused_rules = tuple(filter_used_rules(rules, container.log, select))
|
||||
if unused_rules:
|
||||
num_of_removed_rules += len(unused_rules)
|
||||
[sheet.cssRules.remove(r) for r in unused_rules]
|
||||
preserve_htmlns_prefix(sheet, ns)
|
||||
style.text = force_unicode(sheet.cssText, 'utf-8')
|
||||
pretty_script_or_style(container, style)
|
||||
container.dirty(name)
|
||||
@ -196,12 +92,12 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
|
||||
sname = container.href_to_name(link.get('href'), name)
|
||||
if sname not in sheets:
|
||||
continue
|
||||
style_rules[sname] = tuple(filter_used_rules(root, style_rules[sname], container.log, pseudo_pat, cache))
|
||||
style_rules[sname] = tuple(filter_used_rules(style_rules[sname], container.log, select))
|
||||
if remove_unused_classes:
|
||||
used_classes |= class_map[sname]
|
||||
|
||||
for iname in import_map[sname]:
|
||||
style_rules[iname] = tuple(filter_used_rules(root, style_rules[iname], container.log, pseudo_pat, cache))
|
||||
style_rules[iname] = tuple(filter_used_rules(style_rules[iname], container.log, select))
|
||||
if remove_unused_classes:
|
||||
used_classes |= class_map[iname]
|
||||
|
||||
@ -220,7 +116,6 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
|
||||
container.dirty(name)
|
||||
|
||||
for name, sheet in sheets.iteritems():
|
||||
preserve_htmlns_prefix(sheet, sheet_namespace[sheet])
|
||||
unused_rules = style_rules[name]
|
||||
if unused_rules:
|
||||
num_of_removed_rules += len(unused_rules)
|
||||
|
@ -6,17 +6,17 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import posixpath, os, time, types, re
|
||||
import posixpath, os, time, types
|
||||
from collections import namedtuple, defaultdict, Counter
|
||||
from itertools import chain
|
||||
|
||||
from calibre import prepare_string_for_xml, force_unicode
|
||||
from calibre.ebooks.oeb.base import XPath, xml2text
|
||||
from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES, OEB_FONTS
|
||||
from calibre.ebooks.oeb.polish.css import build_selector, PSEUDO_PAT, MIN_SPACE_RE
|
||||
from calibre.ebooks.oeb.polish.spell import get_all_words
|
||||
from calibre.utils.icu import numeric_sort_key, ord_string, safe_chr
|
||||
from calibre.utils.magick.draw import identify
|
||||
from css_selectors import Select, SelectorError
|
||||
|
||||
File = namedtuple('File', 'name dir basename size category')
|
||||
|
||||
@ -255,8 +255,6 @@ def css_data(container, book_locale, result_data, *args):
|
||||
css_rules(name, parser.parse_stylesheet(force_unicode(style.text, 'utf-8')).rules, style.sourceline - 1))
|
||||
|
||||
rule_map = defaultdict(lambda : defaultdict(list))
|
||||
pseudo_pat = re.compile(PSEUDO_PAT, re.I)
|
||||
cache = {}
|
||||
|
||||
def rules_in_sheet(sheet):
|
||||
for rule in sheet:
|
||||
@ -285,28 +283,12 @@ def css_data(container, book_locale, result_data, *args):
|
||||
return '<%s %s>' % (tag, attribs)
|
||||
ans = tt_cache[elem] = '<%s>' % tag
|
||||
|
||||
def matches_for_selector(selector, root, class_map, rule):
|
||||
selector = pseudo_pat.sub('', selector)
|
||||
selector = MIN_SPACE_RE.sub(r'\1', selector)
|
||||
try:
|
||||
xp = cache[(True, selector)]
|
||||
except KeyError:
|
||||
xp = cache[(True, selector)] = build_selector(selector)
|
||||
|
||||
try:
|
||||
matches = xp(root)
|
||||
except Exception:
|
||||
return ()
|
||||
if not matches:
|
||||
try:
|
||||
xp = cache[(False, selector)]
|
||||
except KeyError:
|
||||
xp = cache[(False, selector)] = build_selector(selector, case_sensitive=False)
|
||||
try:
|
||||
matches = xp(root)
|
||||
except Exception:
|
||||
return ()
|
||||
def matches_for_selector(selector, select, class_map, rule):
|
||||
lsel = selector.lower()
|
||||
try:
|
||||
matches = tuple(select(selector))
|
||||
except SelectorError:
|
||||
return ()
|
||||
for elem in matches:
|
||||
for cls in elem.get('class', '').split():
|
||||
if '.' + cls.lower() in lsel:
|
||||
@ -322,9 +304,10 @@ def css_data(container, book_locale, result_data, *args):
|
||||
for elem in root.xpath('//*[@class]'):
|
||||
for cls in elem.get('class', '').split():
|
||||
cmap[cls][elem] = []
|
||||
select = Select(root, ignore_inappropriate_pseudo_classes=True)
|
||||
for sheet in chain(sheets_for_html(name, root), inline_sheets):
|
||||
for rule in rules_in_sheet(sheet):
|
||||
rule_map[rule][name].extend(matches_for_selector(rule.selector, root, cmap, rule))
|
||||
rule_map[rule][name].extend(matches_for_selector(rule.selector, select, cmap, rule))
|
||||
for cls, elem_map in cmap.iteritems():
|
||||
class_elements = class_map[cls][name]
|
||||
for elem, usage in elem_map.iteritems():
|
||||
|
@ -8,7 +8,6 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import json
|
||||
|
||||
from cssselect import parse
|
||||
from PyQt5.Qt import (
|
||||
QWidget, QTimer, QStackedLayout, QLabel, QScrollArea, QVBoxLayout,
|
||||
QPainter, Qt, QPalette, QRect, QSize, QSizePolicy, pyqtSignal,
|
||||
@ -18,6 +17,7 @@ from calibre.constants import iswindows
|
||||
from calibre.gui2.tweak_book import editors, actions, current_container, tprefs
|
||||
from calibre.gui2.tweak_book.editor.themes import get_theme, theme_color
|
||||
from calibre.gui2.tweak_book.editor.text import default_font_family
|
||||
from css_selectors import parse, SelectorError
|
||||
|
||||
class Heading(QWidget): # {{{
|
||||
|
||||
@ -434,7 +434,7 @@ class LiveCSS(QWidget):
|
||||
if selector is not None:
|
||||
try:
|
||||
specificity = [0] + list(parse(selector)[0].specificity())
|
||||
except (AttributeError, TypeError):
|
||||
except (AttributeError, TypeError, SelectorError):
|
||||
specificity = [0, 0, 0, 0]
|
||||
else: # style attribute
|
||||
specificity = [1, 0, 0, 0]
|
||||
|
Loading…
x
Reference in New Issue
Block a user