mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Get rid of cssselect from Edit Book
This commit is contained in:
parent
0c4e86dcd1
commit
8f6f60bca2
@ -6,122 +6,26 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
from cssutils.css import CSSRule
|
from cssutils.css import CSSRule
|
||||||
from cssselect import HTMLTranslator, parse
|
from css_selectors import parse, SelectorSyntaxError
|
||||||
from cssselect.xpath import XPathExpr, is_safe_name
|
|
||||||
from cssselect.parser import SelectorSyntaxError
|
|
||||||
|
|
||||||
from calibre import force_unicode
|
from calibre import force_unicode
|
||||||
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPNSMAP, XHTML_NS
|
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS
|
||||||
from calibre.ebooks.oeb.normalize_css import normalize_filter_css, normalizers
|
from calibre.ebooks.oeb.normalize_css import normalize_filter_css, normalizers
|
||||||
from calibre.ebooks.oeb.stylizer import MIN_SPACE_RE, is_non_whitespace, xpath_lower_case, fix_namespace
|
|
||||||
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style
|
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style
|
||||||
|
from css_selectors import Select
|
||||||
|
|
||||||
class NamespacedTranslator(HTMLTranslator):
|
|
||||||
|
|
||||||
def xpath_element(self, selector):
|
def filter_used_rules(rules, log, select):
|
||||||
element = selector.element
|
|
||||||
if not element:
|
|
||||||
element = '*'
|
|
||||||
safe = True
|
|
||||||
else:
|
|
||||||
safe = is_safe_name(element)
|
|
||||||
if safe:
|
|
||||||
# We use the h: prefix for the XHTML namespace
|
|
||||||
element = 'h:%s' % element.lower()
|
|
||||||
xpath = XPathExpr(element=element)
|
|
||||||
if not safe:
|
|
||||||
xpath.add_name_test()
|
|
||||||
return xpath
|
|
||||||
|
|
||||||
class CaseInsensitiveAttributesTranslator(NamespacedTranslator):
|
|
||||||
'Treat class and id CSS selectors case-insensitively'
|
|
||||||
|
|
||||||
def xpath_class(self, class_selector):
|
|
||||||
"""Translate a class selector."""
|
|
||||||
x = self.xpath(class_selector.selector)
|
|
||||||
if is_non_whitespace(class_selector.class_name):
|
|
||||||
x.add_condition(
|
|
||||||
"%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
|
|
||||||
% ('@class', xpath_lower_case('@class'), self.xpath_literal(
|
|
||||||
' '+class_selector.class_name.lower()+' ')))
|
|
||||||
else:
|
|
||||||
x.add_condition('0')
|
|
||||||
return x
|
|
||||||
|
|
||||||
def xpath_hash(self, id_selector):
|
|
||||||
"""Translate an ID selector."""
|
|
||||||
x = self.xpath(id_selector.selector)
|
|
||||||
return self.xpath_attrib_equals(x, xpath_lower_case('@id'),
|
|
||||||
(id_selector.id.lower()))
|
|
||||||
|
|
||||||
css_to_xpath = NamespacedTranslator().css_to_xpath
|
|
||||||
ci_css_to_xpath = CaseInsensitiveAttributesTranslator().css_to_xpath
|
|
||||||
|
|
||||||
def build_selector(text, case_sensitive=True):
|
|
||||||
func = css_to_xpath if case_sensitive else ci_css_to_xpath
|
|
||||||
try:
|
|
||||||
return etree.XPath(fix_namespace(func(text)), namespaces=XPNSMAP)
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
PSEUDO_PAT = r':(first-letter|first-line|link|hover|visited|active|focus|before|after)'
|
|
||||||
|
|
||||||
def is_rule_used(root, selector, log, pseudo_pat, cache):
|
|
||||||
selector = pseudo_pat.sub('', selector)
|
|
||||||
selector = MIN_SPACE_RE.sub(r'\1', selector)
|
|
||||||
try:
|
|
||||||
xp = cache[(True, selector)]
|
|
||||||
except KeyError:
|
|
||||||
xp = cache[(True, selector)] = build_selector(selector)
|
|
||||||
try:
|
|
||||||
if xp(root):
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# See if interpreting class and id selectors case-insensitively gives us
|
|
||||||
# matches. Strictly speaking, class and id selectors should be case
|
|
||||||
# sensitive for XHTML, but we err on the side of caution and not remove
|
|
||||||
# them, since case sensitivity depends on whether the html is rendered in
|
|
||||||
# quirks mode or not.
|
|
||||||
try:
|
|
||||||
xp = cache[(False, selector)]
|
|
||||||
except KeyError:
|
|
||||||
xp = cache[(False, selector)] = build_selector(selector, case_sensitive=False)
|
|
||||||
try:
|
|
||||||
return bool(xp(root))
|
|
||||||
except Exception:
|
|
||||||
return True
|
|
||||||
|
|
||||||
def filter_used_rules(root, rules, log, pseudo_pat, cache):
|
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
used = False
|
used = False
|
||||||
for selector in rule.selectorList:
|
for selector in rule.selectorList:
|
||||||
text = selector.selectorText
|
if select.has_matches(selector.selectorText):
|
||||||
if is_rule_used(root, text, log, pseudo_pat, cache):
|
|
||||||
used = True
|
used = True
|
||||||
break
|
break
|
||||||
if not used:
|
if not used:
|
||||||
yield rule
|
yield rule
|
||||||
|
|
||||||
def process_namespaces(sheet):
|
|
||||||
# Find the namespace prefix (if any) for the XHTML namespace, so that we
|
|
||||||
# can preserve it after processing
|
|
||||||
for prefix in sheet.namespaces:
|
|
||||||
if sheet.namespaces[prefix] == XHTML_NS:
|
|
||||||
return prefix
|
|
||||||
|
|
||||||
def preserve_htmlns_prefix(sheet, prefix):
|
|
||||||
if prefix is None:
|
|
||||||
while 'h' in sheet.namespaces:
|
|
||||||
del sheet.namespaces['h']
|
|
||||||
else:
|
|
||||||
sheet.namespaces[prefix] = XHTML_NS
|
|
||||||
|
|
||||||
def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None):
|
def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None):
|
||||||
ans = set()
|
ans = set()
|
||||||
sheet = sheet or sheets[name]
|
sheet = sheet or sheets[name]
|
||||||
@ -155,20 +59,15 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
|
|||||||
import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
|
import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
|
||||||
if remove_unused_classes:
|
if remove_unused_classes:
|
||||||
class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.iteritems()}
|
class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.iteritems()}
|
||||||
sheet_namespace = {}
|
|
||||||
for sheet in sheets.itervalues():
|
|
||||||
sheet_namespace[sheet] = process_namespaces(sheet)
|
|
||||||
sheet.namespaces['h'] = XHTML_NS
|
|
||||||
style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()}
|
style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()}
|
||||||
|
|
||||||
num_of_removed_rules = num_of_removed_classes = 0
|
num_of_removed_rules = num_of_removed_classes = 0
|
||||||
pseudo_pat = re.compile(PSEUDO_PAT, re.I)
|
|
||||||
cache = {}
|
|
||||||
|
|
||||||
for name, mt in container.mime_map.iteritems():
|
for name, mt in container.mime_map.iteritems():
|
||||||
if mt not in OEB_DOCS:
|
if mt not in OEB_DOCS:
|
||||||
continue
|
continue
|
||||||
root = container.parsed(name)
|
root = container.parsed(name)
|
||||||
|
select = Select(root, ignore_inappropriate_pseudo_classes=True)
|
||||||
used_classes = set()
|
used_classes = set()
|
||||||
for style in root.xpath('//*[local-name()="style"]'):
|
for style in root.xpath('//*[local-name()="style"]'):
|
||||||
if style.get('type', 'text/css') == 'text/css' and style.text:
|
if style.get('type', 'text/css') == 'text/css' and style.text:
|
||||||
@ -177,17 +76,14 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
|
|||||||
used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
|
used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
|
||||||
imports = get_imported_sheets(name, container, sheets, sheet=sheet)
|
imports = get_imported_sheets(name, container, sheets, sheet=sheet)
|
||||||
for imported_sheet in imports:
|
for imported_sheet in imports:
|
||||||
style_rules[imported_sheet] = tuple(filter_used_rules(root, style_rules[imported_sheet], container.log, pseudo_pat, cache))
|
style_rules[imported_sheet] = tuple(filter_used_rules(style_rules[imported_sheet], container.log, select))
|
||||||
if remove_unused_classes:
|
if remove_unused_classes:
|
||||||
used_classes |= class_map[imported_sheet]
|
used_classes |= class_map[imported_sheet]
|
||||||
ns = process_namespaces(sheet)
|
|
||||||
sheet.namespaces['h'] = XHTML_NS
|
|
||||||
rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
|
rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
|
||||||
unused_rules = tuple(filter_used_rules(root, rules, container.log, pseudo_pat, cache))
|
unused_rules = tuple(filter_used_rules(rules, container.log, select))
|
||||||
if unused_rules:
|
if unused_rules:
|
||||||
num_of_removed_rules += len(unused_rules)
|
num_of_removed_rules += len(unused_rules)
|
||||||
[sheet.cssRules.remove(r) for r in unused_rules]
|
[sheet.cssRules.remove(r) for r in unused_rules]
|
||||||
preserve_htmlns_prefix(sheet, ns)
|
|
||||||
style.text = force_unicode(sheet.cssText, 'utf-8')
|
style.text = force_unicode(sheet.cssText, 'utf-8')
|
||||||
pretty_script_or_style(container, style)
|
pretty_script_or_style(container, style)
|
||||||
container.dirty(name)
|
container.dirty(name)
|
||||||
@ -196,12 +92,12 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
|
|||||||
sname = container.href_to_name(link.get('href'), name)
|
sname = container.href_to_name(link.get('href'), name)
|
||||||
if sname not in sheets:
|
if sname not in sheets:
|
||||||
continue
|
continue
|
||||||
style_rules[sname] = tuple(filter_used_rules(root, style_rules[sname], container.log, pseudo_pat, cache))
|
style_rules[sname] = tuple(filter_used_rules(style_rules[sname], container.log, select))
|
||||||
if remove_unused_classes:
|
if remove_unused_classes:
|
||||||
used_classes |= class_map[sname]
|
used_classes |= class_map[sname]
|
||||||
|
|
||||||
for iname in import_map[sname]:
|
for iname in import_map[sname]:
|
||||||
style_rules[iname] = tuple(filter_used_rules(root, style_rules[iname], container.log, pseudo_pat, cache))
|
style_rules[iname] = tuple(filter_used_rules(style_rules[iname], container.log, select))
|
||||||
if remove_unused_classes:
|
if remove_unused_classes:
|
||||||
used_classes |= class_map[iname]
|
used_classes |= class_map[iname]
|
||||||
|
|
||||||
@ -220,7 +116,6 @@ def remove_unused_css(container, report=None, remove_unused_classes=False):
|
|||||||
container.dirty(name)
|
container.dirty(name)
|
||||||
|
|
||||||
for name, sheet in sheets.iteritems():
|
for name, sheet in sheets.iteritems():
|
||||||
preserve_htmlns_prefix(sheet, sheet_namespace[sheet])
|
|
||||||
unused_rules = style_rules[name]
|
unused_rules = style_rules[name]
|
||||||
if unused_rules:
|
if unused_rules:
|
||||||
num_of_removed_rules += len(unused_rules)
|
num_of_removed_rules += len(unused_rules)
|
||||||
|
@ -6,17 +6,17 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import posixpath, os, time, types, re
|
import posixpath, os, time, types
|
||||||
from collections import namedtuple, defaultdict, Counter
|
from collections import namedtuple, defaultdict, Counter
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
from calibre import prepare_string_for_xml, force_unicode
|
from calibre import prepare_string_for_xml, force_unicode
|
||||||
from calibre.ebooks.oeb.base import XPath, xml2text
|
from calibre.ebooks.oeb.base import XPath, xml2text
|
||||||
from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES, OEB_FONTS
|
from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES, OEB_FONTS
|
||||||
from calibre.ebooks.oeb.polish.css import build_selector, PSEUDO_PAT, MIN_SPACE_RE
|
|
||||||
from calibre.ebooks.oeb.polish.spell import get_all_words
|
from calibre.ebooks.oeb.polish.spell import get_all_words
|
||||||
from calibre.utils.icu import numeric_sort_key, ord_string, safe_chr
|
from calibre.utils.icu import numeric_sort_key, ord_string, safe_chr
|
||||||
from calibre.utils.magick.draw import identify
|
from calibre.utils.magick.draw import identify
|
||||||
|
from css_selectors import Select, SelectorError
|
||||||
|
|
||||||
File = namedtuple('File', 'name dir basename size category')
|
File = namedtuple('File', 'name dir basename size category')
|
||||||
|
|
||||||
@ -255,8 +255,6 @@ def css_data(container, book_locale, result_data, *args):
|
|||||||
css_rules(name, parser.parse_stylesheet(force_unicode(style.text, 'utf-8')).rules, style.sourceline - 1))
|
css_rules(name, parser.parse_stylesheet(force_unicode(style.text, 'utf-8')).rules, style.sourceline - 1))
|
||||||
|
|
||||||
rule_map = defaultdict(lambda : defaultdict(list))
|
rule_map = defaultdict(lambda : defaultdict(list))
|
||||||
pseudo_pat = re.compile(PSEUDO_PAT, re.I)
|
|
||||||
cache = {}
|
|
||||||
|
|
||||||
def rules_in_sheet(sheet):
|
def rules_in_sheet(sheet):
|
||||||
for rule in sheet:
|
for rule in sheet:
|
||||||
@ -285,28 +283,12 @@ def css_data(container, book_locale, result_data, *args):
|
|||||||
return '<%s %s>' % (tag, attribs)
|
return '<%s %s>' % (tag, attribs)
|
||||||
ans = tt_cache[elem] = '<%s>' % tag
|
ans = tt_cache[elem] = '<%s>' % tag
|
||||||
|
|
||||||
def matches_for_selector(selector, root, class_map, rule):
|
def matches_for_selector(selector, select, class_map, rule):
|
||||||
selector = pseudo_pat.sub('', selector)
|
|
||||||
selector = MIN_SPACE_RE.sub(r'\1', selector)
|
|
||||||
try:
|
|
||||||
xp = cache[(True, selector)]
|
|
||||||
except KeyError:
|
|
||||||
xp = cache[(True, selector)] = build_selector(selector)
|
|
||||||
|
|
||||||
try:
|
|
||||||
matches = xp(root)
|
|
||||||
except Exception:
|
|
||||||
return ()
|
|
||||||
if not matches:
|
|
||||||
try:
|
|
||||||
xp = cache[(False, selector)]
|
|
||||||
except KeyError:
|
|
||||||
xp = cache[(False, selector)] = build_selector(selector, case_sensitive=False)
|
|
||||||
try:
|
|
||||||
matches = xp(root)
|
|
||||||
except Exception:
|
|
||||||
return ()
|
|
||||||
lsel = selector.lower()
|
lsel = selector.lower()
|
||||||
|
try:
|
||||||
|
matches = tuple(select(selector))
|
||||||
|
except SelectorError:
|
||||||
|
return ()
|
||||||
for elem in matches:
|
for elem in matches:
|
||||||
for cls in elem.get('class', '').split():
|
for cls in elem.get('class', '').split():
|
||||||
if '.' + cls.lower() in lsel:
|
if '.' + cls.lower() in lsel:
|
||||||
@ -322,9 +304,10 @@ def css_data(container, book_locale, result_data, *args):
|
|||||||
for elem in root.xpath('//*[@class]'):
|
for elem in root.xpath('//*[@class]'):
|
||||||
for cls in elem.get('class', '').split():
|
for cls in elem.get('class', '').split():
|
||||||
cmap[cls][elem] = []
|
cmap[cls][elem] = []
|
||||||
|
select = Select(root, ignore_inappropriate_pseudo_classes=True)
|
||||||
for sheet in chain(sheets_for_html(name, root), inline_sheets):
|
for sheet in chain(sheets_for_html(name, root), inline_sheets):
|
||||||
for rule in rules_in_sheet(sheet):
|
for rule in rules_in_sheet(sheet):
|
||||||
rule_map[rule][name].extend(matches_for_selector(rule.selector, root, cmap, rule))
|
rule_map[rule][name].extend(matches_for_selector(rule.selector, select, cmap, rule))
|
||||||
for cls, elem_map in cmap.iteritems():
|
for cls, elem_map in cmap.iteritems():
|
||||||
class_elements = class_map[cls][name]
|
class_elements = class_map[cls][name]
|
||||||
for elem, usage in elem_map.iteritems():
|
for elem, usage in elem_map.iteritems():
|
||||||
|
@ -8,7 +8,6 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from cssselect import parse
|
|
||||||
from PyQt5.Qt import (
|
from PyQt5.Qt import (
|
||||||
QWidget, QTimer, QStackedLayout, QLabel, QScrollArea, QVBoxLayout,
|
QWidget, QTimer, QStackedLayout, QLabel, QScrollArea, QVBoxLayout,
|
||||||
QPainter, Qt, QPalette, QRect, QSize, QSizePolicy, pyqtSignal,
|
QPainter, Qt, QPalette, QRect, QSize, QSizePolicy, pyqtSignal,
|
||||||
@ -18,6 +17,7 @@ from calibre.constants import iswindows
|
|||||||
from calibre.gui2.tweak_book import editors, actions, current_container, tprefs
|
from calibre.gui2.tweak_book import editors, actions, current_container, tprefs
|
||||||
from calibre.gui2.tweak_book.editor.themes import get_theme, theme_color
|
from calibre.gui2.tweak_book.editor.themes import get_theme, theme_color
|
||||||
from calibre.gui2.tweak_book.editor.text import default_font_family
|
from calibre.gui2.tweak_book.editor.text import default_font_family
|
||||||
|
from css_selectors import parse, SelectorError
|
||||||
|
|
||||||
class Heading(QWidget): # {{{
|
class Heading(QWidget): # {{{
|
||||||
|
|
||||||
@ -434,7 +434,7 @@ class LiveCSS(QWidget):
|
|||||||
if selector is not None:
|
if selector is not None:
|
||||||
try:
|
try:
|
||||||
specificity = [0] + list(parse(selector)[0].specificity())
|
specificity = [0] + list(parse(selector)[0].specificity())
|
||||||
except (AttributeError, TypeError):
|
except (AttributeError, TypeError, SelectorError):
|
||||||
specificity = [0, 0, 0, 0]
|
specificity = [0, 0, 0, 0]
|
||||||
else: # style attribute
|
else: # style attribute
|
||||||
specificity = [1, 0, 0, 0]
|
specificity = [1, 0, 0, 0]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user