Finish up implementation of css_selectors

This commit is contained in:
Kovid Goyal 2015-02-20 16:29:07 +05:30
parent 085dd58217
commit 2d90b5695f
3 changed files with 630 additions and 23 deletions

View File

@ -14,7 +14,7 @@ import re
import operator import operator
import string import string
from css_selectors.errors import SelectorSyntaxError from css_selectors.errors import SelectorSyntaxError, ExpressionError
if sys.version_info[0] < 3: if sys.version_info[0] < 3:
_unicode = unicode _unicode = unicode
@ -159,6 +159,7 @@ class Function(object):
self.selector = selector self.selector = selector
self.name = ascii_lower(name) self.name = ascii_lower(name)
self.arguments = arguments self.arguments = arguments
self._parsed_arguments = None
def __repr__(self): def __repr__(self):
return '%s[%r:%s(%s)]' % ( return '%s[%r:%s(%s)]' % (
@ -168,6 +169,19 @@ class Function(object):
def argument_types(self): def argument_types(self):
return [token.type for token in self.arguments] return [token.type for token in self.arguments]
@property
def parsed_arguments(self):
if self._parsed_arguments is None:
try:
self._parsed_arguments = parse_series(self.arguments)
except ValueError:
raise ExpressionError("Invalid series: '%r'" % self.arguments)
return self._parsed_arguments
def parse_arguments(self):
if not self.arguments_parsed:
self.arguments_parsed = True
def specificity(self): def specificity(self):
a, b, c = self.selector.specificity() a, b, c = self.selector.specificity()
b += 1 b += 1

View File

@ -9,6 +9,7 @@ __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import re, itertools import re, itertools
from collections import OrderedDict, defaultdict from collections import OrderedDict, defaultdict
from functools import wraps from functools import wraps
from itertools import chain
from lxml import etree from lxml import etree
@ -90,13 +91,16 @@ class Select(object):
Tags are returned in document order. Note that attribute and tag names are Tags are returned in document order. Note that attribute and tag names are
matched case-insensitively. Also namespaces are ignored (this is for matched case-insensitively. Also namespaces are ignored (this is for
performance of the common case). performance of the common case). The UI related selectors are not
implemented, such as :enabled, :diabled, :checked, :hover, etc. Similarly,
the non-element related selectors such as ::first-line, ::first-letter,
::before, etc. are not implemented.
WARNING: This class uses internal caches. You *must not* make any changes WARNING: This class uses internal caches. You *must not* make any changes
to the lxml tree. If you do make some changes, either create a new Select to the lxml tree. If you do make some changes, either create a new Select
object or call :meth:`invalidate_caches`. object or call :meth:`invalidate_caches`.
This class can be easily sub-classes to work with tree implementations This class can be easily sub-classed to work with tree implementations
other than lxml. Simply override the methods in the ``Tree Integration`` other than lxml. Simply override the methods in the ``Tree Integration``
block. block.
@ -135,6 +139,11 @@ class Select(object):
self._attrib_map = None self._attrib_map = None
self._attrib_space_map = None self._attrib_space_map = None
self._lang_map = None self._lang_map = None
self.map_tag_name = ascii_lower
if '{' in self.root.tag:
def map_tag_name(x):
return ascii_lower(x.rpartition('}')[2])
self.map_tag_name = map_tag_name
def __call__(self, selector): def __call__(self, selector):
'Return an iterator over all matching tags, in document order.' 'Return an iterator over all matching tags, in document order.'
@ -159,13 +168,8 @@ class Select(object):
def element_map(self): def element_map(self):
if self._element_map is None: if self._element_map is None:
self._element_map = em = defaultdict(OrderedSet) self._element_map = em = defaultdict(OrderedSet)
map_tag_name = ascii_lower
if '{' in self.root.tag:
def map_tag_name(x):
return ascii_lower(x.rpartition('}')[2])
for tag in self.itertag(): for tag in self.itertag():
em[map_tag_name(tag.tag)].add(tag) em[self.map_tag_name(tag.tag)].add(tag)
return self._element_map return self._element_map
@property @property
@ -251,6 +255,38 @@ class Select(object):
def iterclasstags(self): def iterclasstags(self):
return get_compiled_xpath('//*[@class]')(self.root) return get_compiled_xpath('//*[@class]')(self.root)
def sibling_count(self, child, before=True, same_type=False):
' Return the number of siblings before or after child or raise ValueError if child has no parent. '
parent = child.getparent()
if parent is None:
raise ValueError('Child has no parent')
if same_type:
siblings = OrderedSet(child.itersiblings(preceding=before))
return len(self.element_map[self.map_tag_name(child.tag)] & siblings)
else:
if before:
return parent.index(child)
return len(parent) - parent.index(child) - 1
def all_sibling_count(self, child, same_type=False):
' Return the number of siblings of child or raise ValueError if child has no parent '
parent = child.getparent()
if parent is None:
raise ValueError('Child has no parent')
if same_type:
siblings = OrderedSet(chain(child.itersiblings(preceding=False), child.itersiblings(preceding=True)))
return len(self.element_map[self.map_tag_name(child.tag)] & siblings)
else:
return len(parent) - 1
def is_empty(self, elem):
for child in elem:
# Check for comment/PI nodes with tail text
if child.tail:
return False
return len(tuple(elem.iterchildren('*'))) == 0 and not elem.text
# }}} # }}}
# Combinators {{{ # Combinators {{{
@ -324,6 +360,13 @@ def select_class(cache, selector):
if elem in items: if elem in items:
yield elem yield elem
def select_negation(cache, selector):
'Implement :not()'
exclude = frozenset(cache.iterparsedselector(selector.subselector))
for item in cache.iterparsedselector(selector.selector):
if item not in exclude:
yield item
# Attribute selectors {{{ # Attribute selectors {{{
def select_attrib(cache, selector): def select_attrib(cache, selector):
@ -381,17 +424,24 @@ def select_substringmatch(cache, attrib, value):
def select_function(cache, function): def select_function(cache, function):
"""Select with a functional pseudo-class.""" """Select with a functional pseudo-class."""
fname = function.name.replace('-', '_')
try: try:
func = cache.dispatch_map[function.name.replace('-', '_')] func = cache.dispatch_map[fname]
except KeyError: except KeyError:
raise ExpressionError( raise ExpressionError(
"The pseudo-class :%s() is unknown" % function.name) "The pseudo-class :%s() is unknown" % function.name)
items = frozenset(func(cache, function)) if fname == 'lang':
for item in cache.iterparsedselector(function.selector): items = frozenset(func(cache, function))
if item in items: for item in cache.iterparsedselector(function.selector):
yield item if item in items:
yield item
else:
for item in cache.iterparsedselector(function.selector):
if func(cache, function, item):
yield item
def select_lang(cache, function): def select_lang(cache, function):
' Implement :lang() '
if function.argument_types() not in (['STRING'], ['IDENT']): if function.argument_types() not in (['STRING'], ['IDENT']):
raise ExpressionError("Expected a single string or ident for :lang(), got %r" % function.arguments) raise ExpressionError("Expected a single string or ident for :lang(), got %r" % function.arguments)
lang = function.arguments[0].value lang = function.arguments[0].value
@ -403,12 +453,118 @@ def select_lang(cache, function):
for elem in elem_set: for elem in elem_set:
yield elem yield elem
def select_nth_child(cache, function, elem):
' Implement :nth-child() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
def select_nth_last_child(cache, function, elem):
' Implement :nth-last-child() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem, before=False) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
def select_nth_of_type(cache, function, elem):
' Implement :nth-of-type() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem, same_type=True) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
def select_nth_last_of_type(cache, function, elem):
' Implement :nth-last-of-type() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem, before=False, same_type=True) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
# }}}
# Pseudo elements {{{
def select_pseudo(cache, pseudo):
if pseudo.ident == 'root':
yield cache.root
return
try:
func = cache.dispatch_map[pseudo.ident.replace('-', '_')]
except KeyError:
raise ExpressionError(
"The pseudo-class :%s is not supported" % pseudo.ident)
for item in cache.iterparsedselector(pseudo.selector):
if func(cache, item):
yield item
def select_first_child(cache, elem):
try:
return cache.sibling_count(elem) == 0
except ValueError:
return False
def select_last_child(cache, elem):
try:
return cache.sibling_count(elem, before=False) == 0
except ValueError:
return False
def select_only_child(cache, elem):
try:
return cache.all_sibling_count(elem) == 0
except ValueError:
return False
def select_first_of_type(cache, elem):
try:
return cache.sibling_count(elem, same_type=True) == 0
except ValueError:
return False
def select_last_of_type(cache, elem):
try:
return cache.sibling_count(elem, before=False, same_type=True) == 0
except ValueError:
return False
def select_only_of_type(cache, elem):
try:
return cache.all_sibling_count(elem, same_type=True) == 0
except ValueError:
return False
def select_empty(cache, elem):
return cache.is_empty(elem)
# }}} # }}}
default_dispatch_map = {name.partition('_')[2]:obj for name, obj in globals().items() if name.startswith('select_') and callable(obj)} default_dispatch_map = {name.partition('_')[2]:obj for name, obj in globals().items() if name.startswith('select_') and callable(obj)}
if __name__ == '__main__': if __name__ == '__main__':
from pprint import pprint from pprint import pprint
root = etree.fromstring('<body xmlns="xxx" xml:lang="en"><p id="p" class="one two" lang="fr"><a id="a"/></p></body>') root = etree.fromstring('<body xmlns="xxx" xml:lang="en"><p id="p" class="one two" lang="fr"><a id="a"/><b/><c/><d/></p></body>')
select = Select(root, trace=True) select = Select(root, trace=True)
pprint(list(select('p a'))) pprint(list(select('p *:root')))

View File

@ -6,14 +6,26 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import unittest, sys, argparse import unittest, sys, argparse, json
from lxml import etree from lxml import etree, html
from css_selectors.errors import SelectorSyntaxError from css_selectors.errors import SelectorSyntaxError
from css_selectors.parse import tokenize, parse from css_selectors.parse import tokenize, parse
from css_selectors.select import Select from css_selectors.select import Select
def run_webkit_selector(page, selector):
return json.loads(page.mainFrame().evaluateJavaScript(
'''
var nodes = document.querySelectorAll(%s);
var ans = [];
var i = 0;
for (var i = 0; i < nodes.length; i++)
ans.push(nodes[i].getAttribute("id"));
JSON.stringify(ans);
''' % json.dumps(selector)
) or '[]')
class TestCSSSelectors(unittest.TestCase): class TestCSSSelectors(unittest.TestCase):
# Test data {{{ # Test data {{{
@ -67,6 +79,318 @@ c"></li>
cde"><span id="foobar-span"></span></div> cde"><span id="foobar-span"></span></div>
</body></html> </body></html>
''' '''
HTML_SHAKESPEARE = '''
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" debug="true">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
</head>
<body>
<div id="test">
<div class="dialog">
<h2>As You Like It</h2>
<div id="playwright">
by William Shakespeare
</div>
<div class="dialog scene thirdClass" id="scene1">
<h3>ACT I, SCENE III. A room in the palace.</h3>
<div class="dialog">
<div class="direction">Enter CELIA and ROSALIND</div>
</div>
<div id="speech1" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.1">Why, cousin! why, Rosalind! Cupid have mercy! not a word?</div>
</div>
<div id="speech2" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.2">Not one to throw at a dog.</div>
</div>
<div id="speech3" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.3">No, thy words are too precious to be cast away upon</div>
<div id="scene1.3.4">curs; throw some of them at me; come, lame me with reasons.</div>
</div>
<div id="speech4" class="character">ROSALIND</div>
<div id="speech5" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.8">But is all this for your father?</div>
</div>
<div class="dialog">
<div id="scene1.3.5">Then there were two cousins laid up; when the one</div>
<div id="scene1.3.6">should be lamed with reasons and the other mad</div>
<div id="scene1.3.7">without any.</div>
</div>
<div id="speech6" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.9">No, some of it is for my child's father. O, how</div>
<div id="scene1.3.10">full of briers is this working-day world!</div>
</div>
<div id="speech7" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.11">They are but burs, cousin, thrown upon thee in</div>
<div id="scene1.3.12">holiday foolery: if we walk not in the trodden</div>
<div id="scene1.3.13">paths our very petticoats will catch them.</div>
</div>
<div id="speech8" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.14">I could shake them off my coat: these burs are in my heart.</div>
</div>
<div id="speech9" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.15">Hem them away.</div>
</div>
<div id="speech10" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.16">I would try, if I could cry 'hem' and have him.</div>
</div>
<div id="speech11" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.17">Come, come, wrestle with thy affections.</div>
</div>
<div id="speech12" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.18">O, they take the part of a better wrestler than myself!</div>
</div>
<div id="speech13" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.19">O, a good wish upon you! you will try in time, in</div>
<div id="scene1.3.20">despite of a fall. But, turning these jests out of</div>
<div id="scene1.3.21">service, let us talk in good earnest: is it</div>
<div id="scene1.3.22">possible, on such a sudden, you should fall into so</div>
<div id="scene1.3.23">strong a liking with old Sir Rowland's youngest son?</div>
</div>
<div id="speech14" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.24">The duke my father loved his father dearly.</div>
</div>
<div id="speech15" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.25">Doth it therefore ensue that you should love his son</div>
<div id="scene1.3.26">dearly? By this kind of chase, I should hate him,</div>
<div id="scene1.3.27">for my father hated his father dearly; yet I hate</div>
<div id="scene1.3.28">not Orlando.</div>
</div>
<div id="speech16" class="character">ROSALIND</div>
<div title="wtf" class="dialog">
<div id="scene1.3.29">No, faith, hate him not, for my sake.</div>
</div>
<div id="speech17" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.30">Why should I not? doth he not deserve well?</div>
</div>
<div id="speech18" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.31">Let me love him for that, and do you love him</div>
<div id="scene1.3.32">because I do. Look, here comes the duke.</div>
</div>
<div id="speech19" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.33">With his eyes full of anger.</div>
<div class="direction">Enter DUKE FREDERICK, with Lords</div>
</div>
<div id="speech20" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.34">Mistress, dispatch you with your safest haste</div>
<div id="scene1.3.35">And get you from our court.</div>
</div>
<div id="speech21" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.36">Me, uncle?</div>
</div>
<div id="speech22" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.37">You, cousin</div>
<div id="scene1.3.38">Within these ten days if that thou be'st found</div>
<div id="scene1.3.39">So near our public court as twenty miles,</div>
<div id="scene1.3.40">Thou diest for it.</div>
</div>
<div id="speech23" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.41"> I do beseech your grace,</div>
<div id="scene1.3.42">Let me the knowledge of my fault bear with me:</div>
<div id="scene1.3.43">If with myself I hold intelligence</div>
<div id="scene1.3.44">Or have acquaintance with mine own desires,</div>
<div id="scene1.3.45">If that I do not dream or be not frantic,--</div>
<div id="scene1.3.46">As I do trust I am not--then, dear uncle,</div>
<div id="scene1.3.47">Never so much as in a thought unborn</div>
<div id="scene1.3.48">Did I offend your highness.</div>
</div>
<div id="speech24" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.49">Thus do all traitors:</div>
<div id="scene1.3.50">If their purgation did consist in words,</div>
<div id="scene1.3.51">They are as innocent as grace itself:</div>
<div id="scene1.3.52">Let it suffice thee that I trust thee not.</div>
</div>
<div id="speech25" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.53">Yet your mistrust cannot make me a traitor:</div>
<div id="scene1.3.54">Tell me whereon the likelihood depends.</div>
</div>
<div id="speech26" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.55">Thou art thy father's daughter; there's enough.</div>
</div>
<div id="speech27" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.56">So was I when your highness took his dukedom;</div>
<div id="scene1.3.57">So was I when your highness banish'd him:</div>
<div id="scene1.3.58">Treason is not inherited, my lord;</div>
<div id="scene1.3.59">Or, if we did derive it from our friends,</div>
<div id="scene1.3.60">What's that to me? my father was no traitor:</div>
<div id="scene1.3.61">Then, good my liege, mistake me not so much</div>
<div id="scene1.3.62">To think my poverty is treacherous.</div>
</div>
<div id="speech28" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.63">Dear sovereign, hear me speak.</div>
</div>
<div id="speech29" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.64">Ay, Celia; we stay'd her for your sake,</div>
<div id="scene1.3.65">Else had she with her father ranged along.</div>
</div>
<div id="speech30" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.66">I did not then entreat to have her stay;</div>
<div id="scene1.3.67">It was your pleasure and your own remorse:</div>
<div id="scene1.3.68">I was too young that time to value her;</div>
<div id="scene1.3.69">But now I know her: if she be a traitor,</div>
<div id="scene1.3.70">Why so am I; we still have slept together,</div>
<div id="scene1.3.71">Rose at an instant, learn'd, play'd, eat together,</div>
<div id="scene1.3.72">And wheresoever we went, like Juno's swans,</div>
<div id="scene1.3.73">Still we went coupled and inseparable.</div>
</div>
<div id="speech31" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.74">She is too subtle for thee; and her smoothness,</div>
<div id="scene1.3.75">Her very silence and her patience</div>
<div id="scene1.3.76">Speak to the people, and they pity her.</div>
<div id="scene1.3.77">Thou art a fool: she robs thee of thy name;</div>
<div id="scene1.3.78">And thou wilt show more bright and seem more virtuous</div>
<div id="scene1.3.79">When she is gone. Then open not thy lips:</div>
<div id="scene1.3.80">Firm and irrevocable is my doom</div>
<div id="scene1.3.81">Which I have pass'd upon her; she is banish'd.</div>
</div>
<div id="speech32" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.82">Pronounce that sentence then on me, my liege:</div>
<div id="scene1.3.83">I cannot live out of her company.</div>
</div>
<div id="speech33" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.84">You are a fool. You, niece, provide yourself:</div>
<div id="scene1.3.85">If you outstay the time, upon mine honour,</div>
<div id="scene1.3.86">And in the greatness of my word, you die.</div>
<div class="direction">Exeunt DUKE FREDERICK and Lords</div>
</div>
<div id="speech34" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.87">O my poor Rosalind, whither wilt thou go?</div>
<div id="scene1.3.88">Wilt thou change fathers? I will give thee mine.</div>
<div id="scene1.3.89">I charge thee, be not thou more grieved than I am.</div>
</div>
<div id="speech35" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.90">I have more cause.</div>
</div>
<div id="speech36" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.91"> Thou hast not, cousin;</div>
<div id="scene1.3.92">Prithee be cheerful: know'st thou not, the duke</div>
<div id="scene1.3.93">Hath banish'd me, his daughter?</div>
</div>
<div id="speech37" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.94">That he hath not.</div>
</div>
<div id="speech38" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.95">No, hath not? Rosalind lacks then the love</div>
<div id="scene1.3.96">Which teacheth thee that thou and I am one:</div>
<div id="scene1.3.97">Shall we be sunder'd? shall we part, sweet girl?</div>
<div id="scene1.3.98">No: let my father seek another heir.</div>
<div id="scene1.3.99">Therefore devise with me how we may fly,</div>
<div id="scene1.3.100">Whither to go and what to bear with us;</div>
<div id="scene1.3.101">And do not seek to take your change upon you,</div>
<div id="scene1.3.102">To bear your griefs yourself and leave me out;</div>
<div id="scene1.3.103">For, by this heaven, now at our sorrows pale,</div>
<div id="scene1.3.104">Say what thou canst, I'll go along with thee.</div>
</div>
<div id="speech39" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.105">Why, whither shall we go?</div>
</div>
<div id="speech40" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.106">To seek my uncle in the forest of Arden.</div>
</div>
<div id="speech41" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.107">Alas, what danger will it be to us,</div>
<div id="scene1.3.108">Maids as we are, to travel forth so far!</div>
<div id="scene1.3.109">Beauty provoketh thieves sooner than gold.</div>
</div>
<div id="speech42" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.110">I'll put myself in poor and mean attire</div>
<div id="scene1.3.111">And with a kind of umber smirch my face;</div>
<div id="scene1.3.112">The like do you: so shall we pass along</div>
<div id="scene1.3.113">And never stir assailants.</div>
</div>
<div id="speech43" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.114">Were it not better,</div>
<div id="scene1.3.115">Because that I am more than common tall,</div>
<div id="scene1.3.116">That I did suit me all points like a man?</div>
<div id="scene1.3.117">A gallant curtle-axe upon my thigh,</div>
<div id="scene1.3.118">A boar-spear in my hand; and--in my heart</div>
<div id="scene1.3.119">Lie there what hidden woman's fear there will--</div>
<div id="scene1.3.120">We'll have a swashing and a martial outside,</div>
<div id="scene1.3.121">As many other mannish cowards have</div>
<div id="scene1.3.122">That do outface it with their semblances.</div>
</div>
<div id="speech44" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.123">What shall I call thee when thou art a man?</div>
</div>
<div id="speech45" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.124">I'll have no worse a name than Jove's own page;</div>
<div id="scene1.3.125">And therefore look you call me Ganymede.</div>
<div id="scene1.3.126">But what will you be call'd?</div>
</div>
<div id="speech46" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.127">Something that hath a reference to my state</div>
<div id="scene1.3.128">No longer Celia, but Aliena.</div>
</div>
<div id="speech47" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.129">But, cousin, what if we assay'd to steal</div>
<div id="scene1.3.130">The clownish fool out of your father's court?</div>
<div id="scene1.3.131">Would he not be a comfort to our travel?</div>
</div>
<div id="speech48" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.132">He'll go along o'er the wide world with me;</div>
<div id="scene1.3.133">Leave me alone to woo him. Let's away,</div>
<div id="scene1.3.134">And get our jewels and our wealth together,</div>
<div id="scene1.3.135">Devise the fittest time and safest way</div>
<div id="scene1.3.136">To hide us from pursuit that will be made</div>
<div id="scene1.3.137">After my flight. Now go we in content</div>
<div id="scene1.3.138">To liberty and not to banishment.</div>
<div class="direction">Exeunt</div>
</div>
</div>
</div>
</div>
</body>
</html>
'''
# }}} # }}}
ae = unittest.TestCase.assertEqual ae = unittest.TestCase.assertEqual
@ -337,18 +661,26 @@ cde"><span id="foobar-span"></span></div>
document = etree.fromstring(self.HTML_IDS) document = etree.fromstring(self.HTML_IDS)
select = Select(document) select = Select(document)
from PyQt5.Qt import QApplication, QWebPage
app = QApplication([])
w = QWebPage()
w.mainFrame().setHtml(self.HTML_IDS)
def select_ids(selector): def select_ids(selector):
for elem in select(selector): for elem in select(selector):
yield elem.get('id') or 'nil' yield elem.get('id')
def pcss(main, *selectors, **kwargs): def pcss(main, *selectors, **kwargs):
result = list(select_ids(main)) result = list(select_ids(main))
for selector in selectors: for selector in selectors:
self.ae(list(select_ids(selector)), result) self.ae(list(select_ids(selector)), result)
if not kwargs.get('skip_webkit'):
wk = set(run_webkit_selector(w, main))
self.ae(set(result), wk, 'WebKit did not match result for: %r. Result: %r WebKit: %r' % (main, set(result), wk))
return result return result
all_ids = pcss('*') all_ids = pcss('*')
self.ae(all_ids[:6], [ self.ae(all_ids[:6], [
'html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div']) 'html', None, 'link-href', 'link-nohref', None, 'outer-div'])
self.ae(all_ids[-1:], ['foobar-span']) self.ae(all_ids[-1:], ['foobar-span'])
self.ae(pcss('div'), ['outer-div', 'li-div', 'foobar-div']) self.ae(pcss('div'), ['outer-div', 'li-div', 'foobar-div'])
self.ae(pcss('DIV'), [ self.ae(pcss('DIV'), [
@ -366,15 +698,120 @@ cde"><span id="foobar-span"></span></div>
self.ae(pcss('a[href^=""]'), []) self.ae(pcss('a[href^=""]'), [])
self.ae(pcss('a[href$="org"]'), ['nofollow-anchor']) self.ae(pcss('a[href$="org"]'), ['nofollow-anchor'])
self.ae(pcss('a[href$=""]'), []) self.ae(pcss('a[href$=""]'), [])
self.ae(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]'), ['foobar-div']) self.ae(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]', skip_webkit=True), ['foobar-div'])
self.ae(pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]'), []) self.ae(pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]'), [])
self.ae(pcss('div[foobar~="cd"]'), []) self.ae(pcss('div[foobar~="cd"]'), [])
self.ae(pcss('*[lang|="En"]', '[lang|="En-us"]'), ['second-li']) self.ae(pcss('*[lang|="En"]', '[lang|="En-us"]'), ['second-li'])
# Attribute values are case sensitive # Attribute values are case sensitive
self.ae(pcss('*[lang|="en"]', '[lang|="en-US"]'), []) self.ae(pcss('*[lang|="en"]', '[lang|="en-US"]', skip_webkit=True), [])
self.ae(pcss('*[lang|="e"]'), []) self.ae(pcss('*[lang|="e"]'), [])
self.ae(pcss(':lang("EN")', '*:lang(en-US)'), ['second-li', 'li-div']) self.ae(pcss(':lang("EN")', '*:lang(en-US)', skip_webkit=True), ['second-li', 'li-div'])
self.ae(pcss(':lang("e")'), []) self.ae(pcss(':lang("e")'), [])
self.ae(pcss('li:nth-child(1)', 'li:first-child'), ['first-li'])
self.ae(pcss('li:nth-child(3)'), ['third-li'])
self.ae(pcss('li:nth-child(10)'), [])
self.ae(pcss('li:nth-child(2n)', 'li:nth-child(even)', 'li:nth-child(2n+0)'), ['second-li', 'fourth-li', 'sixth-li'])
self.ae(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), ['first-li', 'third-li', 'fifth-li', 'seventh-li'])
self.ae(pcss('li:nth-child(2n+4)'), ['fourth-li', 'sixth-li'])
self.ae(pcss('li:nth-child(3n+1)'), ['first-li', 'fourth-li', 'seventh-li'])
self.ae(pcss('li:nth-last-child(0)'), [])
self.ae(pcss('li:nth-last-child(1)', 'li:last-child'), ['seventh-li'])
self.ae(pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)'), ['second-li', 'fourth-li', 'sixth-li'])
self.ae(pcss('li:nth-last-child(2n+2)'), ['second-li', 'fourth-li', 'sixth-li'])
self.ae(pcss('ol:first-of-type'), ['first-ol'])
self.ae(pcss('ol:nth-child(1)'), [])
self.ae(pcss('ol:nth-of-type(2)'), ['second-ol'])
self.ae(pcss('ol:nth-last-of-type(1)'), ['second-ol'])
self.ae(pcss('span:only-child'), ['foobar-span'])
self.ae(pcss('li div:only-child'), ['li-div'])
self.ae(pcss('div *:only-child'), ['li-div', 'foobar-span'])
self.ae(pcss('p *:only-of-type', skip_webkit=True), ['p-em', 'fieldset'])
self.ae(pcss('p:only-of-type', skip_webkit=True), ['paragraph'])
self.ae(pcss('a:empty', 'a:EMpty'), ['name-anchor'])
self.ae(pcss('li:empty'), ['third-li', 'fourth-li', 'fifth-li', 'sixth-li'])
self.ae(pcss(':root', 'html:root', 'li:root'), ['html'])
self.ae(pcss('* :root', 'p *:root'), [])
self.ae(pcss('.a', '.b', '*.a', 'ol.a'), ['first-ol'])
self.ae(pcss('.c', '*.c'), ['first-ol', 'third-li', 'fourth-li'])
self.ae(pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c'), [
'third-li', 'fourth-li'])
self.ae(pcss('#first-li', 'li#first-li', '*#first-li'), ['first-li'])
self.ae(pcss('li div', 'li > div', 'div div'), ['li-div'])
self.ae(pcss('div > div'), [])
self.ae(pcss('div>.c', 'div > .c'), ['first-ol'])
self.ae(pcss('div + div'), ['foobar-div'])
self.ae(pcss('a ~ a'), ['tag-anchor', 'nofollow-anchor'])
self.ae(pcss('a[rel="tag"] ~ a'), ['nofollow-anchor'])
self.ae(pcss('ol#first-ol li:last-child'), ['seventh-li'])
self.ae(pcss('ol#first-ol *:last-child'), ['li-div', 'seventh-li'])
self.ae(pcss('#outer-div:first-child'), ['outer-div'])
self.ae(pcss('#outer-div :first-child'), [
'name-anchor', 'first-li', 'li-div', 'p-b',
'checkbox-fieldset-disabled', 'area-href'])
self.ae(pcss('a[href]'), ['tag-anchor', 'nofollow-anchor'])
self.ae(pcss(':not(*)'), [])
self.ae(pcss('a:not([href])'), ['name-anchor'])
self.ae(pcss('ol :Not(li[class])', skip_webkit=True), [
'first-li', 'second-li', 'li-div',
'fifth-li', 'sixth-li', 'seventh-li'])
self.ae(pcss(r'di\a0 v', r'div\['), [])
self.ae(pcss(r'[h\a0 ref]', r'[h\]ref]'), [])
del app
def test_select_shakespeare(self):
document = html.document_fromstring(self.HTML_SHAKESPEARE)
select = Select(document)
count = lambda s: sum(1 for r in select(s))
# Data borrowed from http://mootools.net/slickspeed/
# Changed from original; probably because I'm only
self.ae(count('*'), 249)
assert count('div:only-child') == 22 # ?
assert count('div:nth-child(even)') == 106
assert count('div:nth-child(2n)') == 106
assert count('div:nth-child(odd)') == 137
assert count('div:nth-child(2n+1)') == 137
assert count('div:nth-child(n)') == 243
assert count('div:last-child') == 53
assert count('div:first-child') == 51
assert count('div > div') == 242
assert count('div + div') == 190
assert count('div ~ div') == 190
assert count('body') == 1
assert count('body div') == 243
assert count('div') == 243
assert count('div div') == 242
assert count('div div div') == 241
assert count('div, div, div') == 243
assert count('div, a, span') == 243
assert count('.dialog') == 51
assert count('div.dialog') == 51
assert count('div .dialog') == 51
assert count('div.character, div.dialog') == 99
assert count('div.direction.dialog') == 0
assert count('div.dialog.direction') == 0
assert count('div.dialog.scene') == 1
assert count('div.scene.scene') == 1
assert count('div.scene .scene') == 0
assert count('div.direction .dialog ') == 0
assert count('div .dialog .direction') == 4
assert count('div.dialog .dialog .direction') == 4
assert count('#speech5') == 1
assert count('div#speech5') == 1
assert count('div #speech5') == 1
assert count('div.scene div.dialog') == 49
assert count('div#scene1 div.dialog div') == 142
assert count('#scene1 #speech1') == 1
assert count('div[class]') == 103
assert count('div[class=dialog]') == 50
assert count('div[class^=dia]') == 51
assert count('div[class$=log]') == 50
assert count('div[class*=sce]') == 1
assert count('div[class|=dialog]') == 50 # ? Seems right
assert count('div[class~=dialog]') == 51 # ? Seems right
# }}} # }}}
# Run tests {{{ # Run tests {{{