Switch to using lxml.cssselect instead of my home-rolled selector.

This commit is contained in:
Marshall T. Vandegrift 2008-12-19 08:57:52 -05:00
parent f6c1a684c3
commit 30c2325913
2 changed files with 40 additions and 81 deletions

View File

@ -392,22 +392,6 @@ spacer {
float: none ! important;
}
/* focusable content: anything w/ tabindex >=0 is focusable */
abbr:focus, acronym:focus, address:focus, applet:focus, b:focus,
base:focus, big:focus, blockquote:focus, br:focus, canvas:focus, caption:focus,
center:focus, cite:focus, code:focus, col:focus, colgroup:focus, dd:focus,
del:focus, dfn:focus, dir:focus, div:focus, dl:focus, dt:focus, em:focus,
fieldset:focus, font:focus, form:focus, h1:focus, h2:focus, h3:focus, h4:focus,
h5:focus, h6:focus, hr:focus, i:focus, img:focus, ins:focus,
kbd:focus, label:focus, legend:focus, li:focus, link:focus, menu:focus,
object:focus, ol:focus, p:focus, pre:focus, q:focus, s:focus, samp:focus,
small:focus, span:focus, strike:focus, strong:focus, sub:focus, sup:focus,
table:focus, tbody:focus, td:focus, tfoot:focus, th:focus, thead:focus,
tr:focus, tt:focus, u:focus, ul:focus, var:focus {
/* Don't specify the outline-color, we should always use initial value. */
outline: 1px dotted;
}
/* hidden elements */
area, base, basefont, head, meta, script, style, title,
noembed, param, link {

View File

@ -21,12 +21,14 @@ import cssutils
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
CSSValueList, cssproperties
from lxml import etree
from lxml.cssselect import css_to_xpath, ExpressionError
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import barename, urlnormalize
from calibre.resources import html_css
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
XHTML_CSS_NAMESPACE = "@namespace url(%s);\n" % XHTML_NS
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
HTML_CSS_STYLESHEET.namespaces['h'] = XHTML_NS
INHERITED = set(['azimuth', 'border-collapse', 'border-spacing',
'caption-side', 'color', 'cursor', 'direction', 'elevation',
@ -97,6 +99,18 @@ XPNSMAP = {'h': XHTML_NS,}
def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP)
class CSSSelector(etree.XPath):
def __init__(self, css, namespaces=XPNSMAP):
path = css_to_xpath(css)
etree.XPath.__init__(self, path, namespaces=namespaces)
self.css = css
def __repr__(self):
return '<%s %s for %r>' % (
self.__class__.__name__,
hex(abs(id(self)))[2:],
self.css)
class Page(object):
def __init__(self, width, height, dpi, fbase, fsizes):
@ -132,6 +146,7 @@ class Stylizer(object):
and elem.get('type', CSS_MIME) in OEB_STYLES:
text = XHTML_CSS_NAMESPACE + elem.text
stylesheet = parser.parseString(text, href=cssname)
stylesheet.namespaces['h'] = XHTML_NS
stylesheets.append(stylesheet)
elif elem.tag == XHTML('link') and elem.get('href') \
and elem.get('rel', 'stylesheet') == 'stylesheet' \
@ -145,6 +160,7 @@ class Stylizer(object):
data = XHTML_CSS_NAMESPACE
data += oeb.manifest.hrefs[path].data
stylesheet = parser.parseString(data, href=path)
stylesheet.namespaces['h'] = XHTML_NS
self.STYLESHEETS[path] = stylesheet
stylesheets.append(stylesheet)
rules = []
@ -160,6 +176,16 @@ class Stylizer(object):
rules.sort()
self.rules = rules
self._styles = {}
for _, _, cssdict, text, _ in rules:
try:
selector = CSSSelector(text)
except ExpressionError:
continue
for elem in selector(tree):
self.style(elem)._update_cssdict(cssdict)
for elem in tree.xpath('//*[@style]'):
self.style(elem)._apply_style_tag()
def flatten_rule(self, rule, href, index):
results = []
@ -236,8 +262,9 @@ class Stylizer(object):
return style
def style(self, element):
try: return self._styles[element]
except: pass
try:
return self._styles[element]
except KeyError:
return Style(element, self)
def stylesheet(self, name, font_scale=None):
@ -259,69 +286,17 @@ class Style(object):
self._element = element
self._page = stylizer.page
self._stylizer = stylizer
self._style = self._assemble_style(element, stylizer)
self._style = {}
stylizer._styles[element] = self
def _assemble_style(self, element, stylizer):
result = {}
rules = stylizer.rules
for _, selector, style, _, _ in rules:
if self._selects_element(element, selector):
result.update(style)
try:
style = CSSStyleDeclaration(element.attrib['style'])
result.update(stylizer.flatten_style(style))
except KeyError:
pass
return result
def _update_cssdict(self, cssdict):
self._style.update(cssdict)
def _selects_element(self, element, selector):
def _selects_element(element, items, index):
if index == -1:
return True
item = items[index]
if item.type == 'universal':
pass
elif item.type == 'type-selector':
name1 = ("{%s}%s" % item.value).lower()
name2 = element.tag.lower()
if name1 != name2:
return False
elif item.type == 'id':
name1 = item.value[1:]
name2 = element.get('id', '')
if name1 != name2:
return False
elif item.type == 'class':
name = item.value[1:].lower()
classes = element.get('class', '').lower().split()
if name not in classes:
return False
elif item.type == 'child':
parent = element.getparent()
if parent is None:
return False
element = parent
elif item.type == 'descendant':
element = element.getparent()
while element is not None:
if _selects_element(element, items, index - 1):
return True
element = element.getparent()
return False
elif item.type == 'pseudo-class':
if item.value == ':first-child':
e = element.getprevious()
if e is not None:
return False
else:
return False
elif item.type == 'pseudo-element':
return False
else:
return False
return _selects_element(element, items, index - 1)
return _selects_element(element, selector, len(selector) - 1)
def _apply_style_tag(self):
attrib = self._element.attrib
if 'style' in attrib:
style = CSSStyleDeclaration(attrib['style'])
self._style.update(self._stylizer.flatten_style(style))
def _has_parent(self):
parent = self._element.getparent()