Finish up implementation of css_selectors

This commit is contained in:
Kovid Goyal 2015-02-20 16:29:07 +05:30
parent 085dd58217
commit 2d90b5695f
3 changed files with 630 additions and 23 deletions

View File

@ -14,7 +14,7 @@ import re
import operator
import string
from css_selectors.errors import SelectorSyntaxError
from css_selectors.errors import SelectorSyntaxError, ExpressionError
if sys.version_info[0] < 3:
_unicode = unicode
@ -159,6 +159,7 @@ class Function(object):
self.selector = selector
self.name = ascii_lower(name)
self.arguments = arguments
self._parsed_arguments = None
def __repr__(self):
return '%s[%r:%s(%s)]' % (
@ -168,6 +169,19 @@ class Function(object):
def argument_types(self):
return [token.type for token in self.arguments]
@property
def parsed_arguments(self):
if self._parsed_arguments is None:
try:
self._parsed_arguments = parse_series(self.arguments)
except ValueError:
raise ExpressionError("Invalid series: '%r'" % self.arguments)
return self._parsed_arguments
def parse_arguments(self):
if not self.arguments_parsed:
self.arguments_parsed = True
def specificity(self):
a, b, c = self.selector.specificity()
b += 1

View File

@ -9,6 +9,7 @@ __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import re, itertools
from collections import OrderedDict, defaultdict
from functools import wraps
from itertools import chain
from lxml import etree
@ -90,13 +91,16 @@ class Select(object):
Tags are returned in document order. Note that attribute and tag names are
matched case-insensitively. Also namespaces are ignored (this is for
performance of the common case).
performance of the common case). The UI related selectors are not
implemented, such as :enabled, :diabled, :checked, :hover, etc. Similarly,
the non-element related selectors such as ::first-line, ::first-letter,
::before, etc. are not implemented.
WARNING: This class uses internal caches. You *must not* make any changes
to the lxml tree. If you do make some changes, either create a new Select
object or call :meth:`invalidate_caches`.
This class can be easily sub-classes to work with tree implementations
This class can be easily sub-classed to work with tree implementations
other than lxml. Simply override the methods in the ``Tree Integration``
block.
@ -135,6 +139,11 @@ class Select(object):
self._attrib_map = None
self._attrib_space_map = None
self._lang_map = None
self.map_tag_name = ascii_lower
if '{' in self.root.tag:
def map_tag_name(x):
return ascii_lower(x.rpartition('}')[2])
self.map_tag_name = map_tag_name
def __call__(self, selector):
'Return an iterator over all matching tags, in document order.'
@ -159,13 +168,8 @@ class Select(object):
def element_map(self):
if self._element_map is None:
self._element_map = em = defaultdict(OrderedSet)
map_tag_name = ascii_lower
if '{' in self.root.tag:
def map_tag_name(x):
return ascii_lower(x.rpartition('}')[2])
for tag in self.itertag():
em[map_tag_name(tag.tag)].add(tag)
em[self.map_tag_name(tag.tag)].add(tag)
return self._element_map
@property
@ -251,6 +255,38 @@ class Select(object):
def iterclasstags(self):
return get_compiled_xpath('//*[@class]')(self.root)
def sibling_count(self, child, before=True, same_type=False):
' Return the number of siblings before or after child or raise ValueError if child has no parent. '
parent = child.getparent()
if parent is None:
raise ValueError('Child has no parent')
if same_type:
siblings = OrderedSet(child.itersiblings(preceding=before))
return len(self.element_map[self.map_tag_name(child.tag)] & siblings)
else:
if before:
return parent.index(child)
return len(parent) - parent.index(child) - 1
def all_sibling_count(self, child, same_type=False):
' Return the number of siblings of child or raise ValueError if child has no parent '
parent = child.getparent()
if parent is None:
raise ValueError('Child has no parent')
if same_type:
siblings = OrderedSet(chain(child.itersiblings(preceding=False), child.itersiblings(preceding=True)))
return len(self.element_map[self.map_tag_name(child.tag)] & siblings)
else:
return len(parent) - 1
def is_empty(self, elem):
for child in elem:
# Check for comment/PI nodes with tail text
if child.tail:
return False
return len(tuple(elem.iterchildren('*'))) == 0 and not elem.text
# }}}
# Combinators {{{
@ -324,6 +360,13 @@ def select_class(cache, selector):
if elem in items:
yield elem
def select_negation(cache, selector):
'Implement :not()'
exclude = frozenset(cache.iterparsedselector(selector.subselector))
for item in cache.iterparsedselector(selector.selector):
if item not in exclude:
yield item
# Attribute selectors {{{
def select_attrib(cache, selector):
@ -381,17 +424,24 @@ def select_substringmatch(cache, attrib, value):
def select_function(cache, function):
"""Select with a functional pseudo-class."""
fname = function.name.replace('-', '_')
try:
func = cache.dispatch_map[function.name.replace('-', '_')]
func = cache.dispatch_map[fname]
except KeyError:
raise ExpressionError(
"The pseudo-class :%s() is unknown" % function.name)
items = frozenset(func(cache, function))
for item in cache.iterparsedselector(function.selector):
if item in items:
yield item
if fname == 'lang':
items = frozenset(func(cache, function))
for item in cache.iterparsedselector(function.selector):
if item in items:
yield item
else:
for item in cache.iterparsedselector(function.selector):
if func(cache, function, item):
yield item
def select_lang(cache, function):
' Implement :lang() '
if function.argument_types() not in (['STRING'], ['IDENT']):
raise ExpressionError("Expected a single string or ident for :lang(), got %r" % function.arguments)
lang = function.arguments[0].value
@ -403,12 +453,118 @@ def select_lang(cache, function):
for elem in elem_set:
yield elem
def select_nth_child(cache, function, elem):
' Implement :nth-child() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
def select_nth_last_child(cache, function, elem):
' Implement :nth-last-child() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem, before=False) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
def select_nth_of_type(cache, function, elem):
' Implement :nth-of-type() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem, same_type=True) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
def select_nth_last_of_type(cache, function, elem):
' Implement :nth-last-of-type() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem, before=False, same_type=True) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
# }}}
# Pseudo elements {{{
def select_pseudo(cache, pseudo):
if pseudo.ident == 'root':
yield cache.root
return
try:
func = cache.dispatch_map[pseudo.ident.replace('-', '_')]
except KeyError:
raise ExpressionError(
"The pseudo-class :%s is not supported" % pseudo.ident)
for item in cache.iterparsedselector(pseudo.selector):
if func(cache, item):
yield item
def select_first_child(cache, elem):
try:
return cache.sibling_count(elem) == 0
except ValueError:
return False
def select_last_child(cache, elem):
try:
return cache.sibling_count(elem, before=False) == 0
except ValueError:
return False
def select_only_child(cache, elem):
try:
return cache.all_sibling_count(elem) == 0
except ValueError:
return False
def select_first_of_type(cache, elem):
try:
return cache.sibling_count(elem, same_type=True) == 0
except ValueError:
return False
def select_last_of_type(cache, elem):
try:
return cache.sibling_count(elem, before=False, same_type=True) == 0
except ValueError:
return False
def select_only_of_type(cache, elem):
try:
return cache.all_sibling_count(elem, same_type=True) == 0
except ValueError:
return False
def select_empty(cache, elem):
return cache.is_empty(elem)
# }}}
default_dispatch_map = {name.partition('_')[2]:obj for name, obj in globals().items() if name.startswith('select_') and callable(obj)}
if __name__ == '__main__':
from pprint import pprint
root = etree.fromstring('<body xmlns="xxx" xml:lang="en"><p id="p" class="one two" lang="fr"><a id="a"/></p></body>')
root = etree.fromstring('<body xmlns="xxx" xml:lang="en"><p id="p" class="one two" lang="fr"><a id="a"/><b/><c/><d/></p></body>')
select = Select(root, trace=True)
pprint(list(select('p a')))
pprint(list(select('p *:root')))

View File

@ -6,14 +6,26 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import unittest, sys, argparse
import unittest, sys, argparse, json
from lxml import etree
from lxml import etree, html
from css_selectors.errors import SelectorSyntaxError
from css_selectors.parse import tokenize, parse
from css_selectors.select import Select
def run_webkit_selector(page, selector):
return json.loads(page.mainFrame().evaluateJavaScript(
'''
var nodes = document.querySelectorAll(%s);
var ans = [];
var i = 0;
for (var i = 0; i < nodes.length; i++)
ans.push(nodes[i].getAttribute("id"));
JSON.stringify(ans);
''' % json.dumps(selector)
) or '[]')
class TestCSSSelectors(unittest.TestCase):
# Test data {{{
@ -67,6 +79,318 @@ c"></li>
cde"><span id="foobar-span"></span></div>
</body></html>
'''
HTML_SHAKESPEARE = '''
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" debug="true">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
</head>
<body>
<div id="test">
<div class="dialog">
<h2>As You Like It</h2>
<div id="playwright">
by William Shakespeare
</div>
<div class="dialog scene thirdClass" id="scene1">
<h3>ACT I, SCENE III. A room in the palace.</h3>
<div class="dialog">
<div class="direction">Enter CELIA and ROSALIND</div>
</div>
<div id="speech1" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.1">Why, cousin! why, Rosalind! Cupid have mercy! not a word?</div>
</div>
<div id="speech2" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.2">Not one to throw at a dog.</div>
</div>
<div id="speech3" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.3">No, thy words are too precious to be cast away upon</div>
<div id="scene1.3.4">curs; throw some of them at me; come, lame me with reasons.</div>
</div>
<div id="speech4" class="character">ROSALIND</div>
<div id="speech5" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.8">But is all this for your father?</div>
</div>
<div class="dialog">
<div id="scene1.3.5">Then there were two cousins laid up; when the one</div>
<div id="scene1.3.6">should be lamed with reasons and the other mad</div>
<div id="scene1.3.7">without any.</div>
</div>
<div id="speech6" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.9">No, some of it is for my child's father. O, how</div>
<div id="scene1.3.10">full of briers is this working-day world!</div>
</div>
<div id="speech7" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.11">They are but burs, cousin, thrown upon thee in</div>
<div id="scene1.3.12">holiday foolery: if we walk not in the trodden</div>
<div id="scene1.3.13">paths our very petticoats will catch them.</div>
</div>
<div id="speech8" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.14">I could shake them off my coat: these burs are in my heart.</div>
</div>
<div id="speech9" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.15">Hem them away.</div>
</div>
<div id="speech10" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.16">I would try, if I could cry 'hem' and have him.</div>
</div>
<div id="speech11" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.17">Come, come, wrestle with thy affections.</div>
</div>
<div id="speech12" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.18">O, they take the part of a better wrestler than myself!</div>
</div>
<div id="speech13" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.19">O, a good wish upon you! you will try in time, in</div>
<div id="scene1.3.20">despite of a fall. But, turning these jests out of</div>
<div id="scene1.3.21">service, let us talk in good earnest: is it</div>
<div id="scene1.3.22">possible, on such a sudden, you should fall into so</div>
<div id="scene1.3.23">strong a liking with old Sir Rowland's youngest son?</div>
</div>
<div id="speech14" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.24">The duke my father loved his father dearly.</div>
</div>
<div id="speech15" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.25">Doth it therefore ensue that you should love his son</div>
<div id="scene1.3.26">dearly? By this kind of chase, I should hate him,</div>
<div id="scene1.3.27">for my father hated his father dearly; yet I hate</div>
<div id="scene1.3.28">not Orlando.</div>
</div>
<div id="speech16" class="character">ROSALIND</div>
<div title="wtf" class="dialog">
<div id="scene1.3.29">No, faith, hate him not, for my sake.</div>
</div>
<div id="speech17" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.30">Why should I not? doth he not deserve well?</div>
</div>
<div id="speech18" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.31">Let me love him for that, and do you love him</div>
<div id="scene1.3.32">because I do. Look, here comes the duke.</div>
</div>
<div id="speech19" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.33">With his eyes full of anger.</div>
<div class="direction">Enter DUKE FREDERICK, with Lords</div>
</div>
<div id="speech20" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.34">Mistress, dispatch you with your safest haste</div>
<div id="scene1.3.35">And get you from our court.</div>
</div>
<div id="speech21" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.36">Me, uncle?</div>
</div>
<div id="speech22" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.37">You, cousin</div>
<div id="scene1.3.38">Within these ten days if that thou be'st found</div>
<div id="scene1.3.39">So near our public court as twenty miles,</div>
<div id="scene1.3.40">Thou diest for it.</div>
</div>
<div id="speech23" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.41"> I do beseech your grace,</div>
<div id="scene1.3.42">Let me the knowledge of my fault bear with me:</div>
<div id="scene1.3.43">If with myself I hold intelligence</div>
<div id="scene1.3.44">Or have acquaintance with mine own desires,</div>
<div id="scene1.3.45">If that I do not dream or be not frantic,--</div>
<div id="scene1.3.46">As I do trust I am not--then, dear uncle,</div>
<div id="scene1.3.47">Never so much as in a thought unborn</div>
<div id="scene1.3.48">Did I offend your highness.</div>
</div>
<div id="speech24" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.49">Thus do all traitors:</div>
<div id="scene1.3.50">If their purgation did consist in words,</div>
<div id="scene1.3.51">They are as innocent as grace itself:</div>
<div id="scene1.3.52">Let it suffice thee that I trust thee not.</div>
</div>
<div id="speech25" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.53">Yet your mistrust cannot make me a traitor:</div>
<div id="scene1.3.54">Tell me whereon the likelihood depends.</div>
</div>
<div id="speech26" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.55">Thou art thy father's daughter; there's enough.</div>
</div>
<div id="speech27" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.56">So was I when your highness took his dukedom;</div>
<div id="scene1.3.57">So was I when your highness banish'd him:</div>
<div id="scene1.3.58">Treason is not inherited, my lord;</div>
<div id="scene1.3.59">Or, if we did derive it from our friends,</div>
<div id="scene1.3.60">What's that to me? my father was no traitor:</div>
<div id="scene1.3.61">Then, good my liege, mistake me not so much</div>
<div id="scene1.3.62">To think my poverty is treacherous.</div>
</div>
<div id="speech28" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.63">Dear sovereign, hear me speak.</div>
</div>
<div id="speech29" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.64">Ay, Celia; we stay'd her for your sake,</div>
<div id="scene1.3.65">Else had she with her father ranged along.</div>
</div>
<div id="speech30" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.66">I did not then entreat to have her stay;</div>
<div id="scene1.3.67">It was your pleasure and your own remorse:</div>
<div id="scene1.3.68">I was too young that time to value her;</div>
<div id="scene1.3.69">But now I know her: if she be a traitor,</div>
<div id="scene1.3.70">Why so am I; we still have slept together,</div>
<div id="scene1.3.71">Rose at an instant, learn'd, play'd, eat together,</div>
<div id="scene1.3.72">And wheresoever we went, like Juno's swans,</div>
<div id="scene1.3.73">Still we went coupled and inseparable.</div>
</div>
<div id="speech31" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.74">She is too subtle for thee; and her smoothness,</div>
<div id="scene1.3.75">Her very silence and her patience</div>
<div id="scene1.3.76">Speak to the people, and they pity her.</div>
<div id="scene1.3.77">Thou art a fool: she robs thee of thy name;</div>
<div id="scene1.3.78">And thou wilt show more bright and seem more virtuous</div>
<div id="scene1.3.79">When she is gone. Then open not thy lips:</div>
<div id="scene1.3.80">Firm and irrevocable is my doom</div>
<div id="scene1.3.81">Which I have pass'd upon her; she is banish'd.</div>
</div>
<div id="speech32" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.82">Pronounce that sentence then on me, my liege:</div>
<div id="scene1.3.83">I cannot live out of her company.</div>
</div>
<div id="speech33" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.84">You are a fool. You, niece, provide yourself:</div>
<div id="scene1.3.85">If you outstay the time, upon mine honour,</div>
<div id="scene1.3.86">And in the greatness of my word, you die.</div>
<div class="direction">Exeunt DUKE FREDERICK and Lords</div>
</div>
<div id="speech34" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.87">O my poor Rosalind, whither wilt thou go?</div>
<div id="scene1.3.88">Wilt thou change fathers? I will give thee mine.</div>
<div id="scene1.3.89">I charge thee, be not thou more grieved than I am.</div>
</div>
<div id="speech35" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.90">I have more cause.</div>
</div>
<div id="speech36" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.91"> Thou hast not, cousin;</div>
<div id="scene1.3.92">Prithee be cheerful: know'st thou not, the duke</div>
<div id="scene1.3.93">Hath banish'd me, his daughter?</div>
</div>
<div id="speech37" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.94">That he hath not.</div>
</div>
<div id="speech38" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.95">No, hath not? Rosalind lacks then the love</div>
<div id="scene1.3.96">Which teacheth thee that thou and I am one:</div>
<div id="scene1.3.97">Shall we be sunder'd? shall we part, sweet girl?</div>
<div id="scene1.3.98">No: let my father seek another heir.</div>
<div id="scene1.3.99">Therefore devise with me how we may fly,</div>
<div id="scene1.3.100">Whither to go and what to bear with us;</div>
<div id="scene1.3.101">And do not seek to take your change upon you,</div>
<div id="scene1.3.102">To bear your griefs yourself and leave me out;</div>
<div id="scene1.3.103">For, by this heaven, now at our sorrows pale,</div>
<div id="scene1.3.104">Say what thou canst, I'll go along with thee.</div>
</div>
<div id="speech39" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.105">Why, whither shall we go?</div>
</div>
<div id="speech40" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.106">To seek my uncle in the forest of Arden.</div>
</div>
<div id="speech41" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.107">Alas, what danger will it be to us,</div>
<div id="scene1.3.108">Maids as we are, to travel forth so far!</div>
<div id="scene1.3.109">Beauty provoketh thieves sooner than gold.</div>
</div>
<div id="speech42" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.110">I'll put myself in poor and mean attire</div>
<div id="scene1.3.111">And with a kind of umber smirch my face;</div>
<div id="scene1.3.112">The like do you: so shall we pass along</div>
<div id="scene1.3.113">And never stir assailants.</div>
</div>
<div id="speech43" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.114">Were it not better,</div>
<div id="scene1.3.115">Because that I am more than common tall,</div>
<div id="scene1.3.116">That I did suit me all points like a man?</div>
<div id="scene1.3.117">A gallant curtle-axe upon my thigh,</div>
<div id="scene1.3.118">A boar-spear in my hand; and--in my heart</div>
<div id="scene1.3.119">Lie there what hidden woman's fear there will--</div>
<div id="scene1.3.120">We'll have a swashing and a martial outside,</div>
<div id="scene1.3.121">As many other mannish cowards have</div>
<div id="scene1.3.122">That do outface it with their semblances.</div>
</div>
<div id="speech44" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.123">What shall I call thee when thou art a man?</div>
</div>
<div id="speech45" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.124">I'll have no worse a name than Jove's own page;</div>
<div id="scene1.3.125">And therefore look you call me Ganymede.</div>
<div id="scene1.3.126">But what will you be call'd?</div>
</div>
<div id="speech46" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.127">Something that hath a reference to my state</div>
<div id="scene1.3.128">No longer Celia, but Aliena.</div>
</div>
<div id="speech47" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.129">But, cousin, what if we assay'd to steal</div>
<div id="scene1.3.130">The clownish fool out of your father's court?</div>
<div id="scene1.3.131">Would he not be a comfort to our travel?</div>
</div>
<div id="speech48" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.132">He'll go along o'er the wide world with me;</div>
<div id="scene1.3.133">Leave me alone to woo him. Let's away,</div>
<div id="scene1.3.134">And get our jewels and our wealth together,</div>
<div id="scene1.3.135">Devise the fittest time and safest way</div>
<div id="scene1.3.136">To hide us from pursuit that will be made</div>
<div id="scene1.3.137">After my flight. Now go we in content</div>
<div id="scene1.3.138">To liberty and not to banishment.</div>
<div class="direction">Exeunt</div>
</div>
</div>
</div>
</div>
</body>
</html>
'''
# }}}
ae = unittest.TestCase.assertEqual
@ -337,18 +661,26 @@ cde"><span id="foobar-span"></span></div>
document = etree.fromstring(self.HTML_IDS)
select = Select(document)
from PyQt5.Qt import QApplication, QWebPage
app = QApplication([])
w = QWebPage()
w.mainFrame().setHtml(self.HTML_IDS)
def select_ids(selector):
for elem in select(selector):
yield elem.get('id') or 'nil'
yield elem.get('id')
def pcss(main, *selectors, **kwargs):
result = list(select_ids(main))
for selector in selectors:
self.ae(list(select_ids(selector)), result)
if not kwargs.get('skip_webkit'):
wk = set(run_webkit_selector(w, main))
self.ae(set(result), wk, 'WebKit did not match result for: %r. Result: %r WebKit: %r' % (main, set(result), wk))
return result
all_ids = pcss('*')
self.ae(all_ids[:6], [
'html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div'])
'html', None, 'link-href', 'link-nohref', None, 'outer-div'])
self.ae(all_ids[-1:], ['foobar-span'])
self.ae(pcss('div'), ['outer-div', 'li-div', 'foobar-div'])
self.ae(pcss('DIV'), [
@ -366,15 +698,120 @@ cde"><span id="foobar-span"></span></div>
self.ae(pcss('a[href^=""]'), [])
self.ae(pcss('a[href$="org"]'), ['nofollow-anchor'])
self.ae(pcss('a[href$=""]'), [])
self.ae(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]'), ['foobar-div'])
self.ae(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]', skip_webkit=True), ['foobar-div'])
self.ae(pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]'), [])
self.ae(pcss('div[foobar~="cd"]'), [])
self.ae(pcss('*[lang|="En"]', '[lang|="En-us"]'), ['second-li'])
# Attribute values are case sensitive
self.ae(pcss('*[lang|="en"]', '[lang|="en-US"]'), [])
self.ae(pcss('*[lang|="en"]', '[lang|="en-US"]', skip_webkit=True), [])
self.ae(pcss('*[lang|="e"]'), [])
self.ae(pcss(':lang("EN")', '*:lang(en-US)'), ['second-li', 'li-div'])
self.ae(pcss(':lang("EN")', '*:lang(en-US)', skip_webkit=True), ['second-li', 'li-div'])
self.ae(pcss(':lang("e")'), [])
self.ae(pcss('li:nth-child(1)', 'li:first-child'), ['first-li'])
self.ae(pcss('li:nth-child(3)'), ['third-li'])
self.ae(pcss('li:nth-child(10)'), [])
self.ae(pcss('li:nth-child(2n)', 'li:nth-child(even)', 'li:nth-child(2n+0)'), ['second-li', 'fourth-li', 'sixth-li'])
self.ae(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), ['first-li', 'third-li', 'fifth-li', 'seventh-li'])
self.ae(pcss('li:nth-child(2n+4)'), ['fourth-li', 'sixth-li'])
self.ae(pcss('li:nth-child(3n+1)'), ['first-li', 'fourth-li', 'seventh-li'])
self.ae(pcss('li:nth-last-child(0)'), [])
self.ae(pcss('li:nth-last-child(1)', 'li:last-child'), ['seventh-li'])
self.ae(pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)'), ['second-li', 'fourth-li', 'sixth-li'])
self.ae(pcss('li:nth-last-child(2n+2)'), ['second-li', 'fourth-li', 'sixth-li'])
self.ae(pcss('ol:first-of-type'), ['first-ol'])
self.ae(pcss('ol:nth-child(1)'), [])
self.ae(pcss('ol:nth-of-type(2)'), ['second-ol'])
self.ae(pcss('ol:nth-last-of-type(1)'), ['second-ol'])
self.ae(pcss('span:only-child'), ['foobar-span'])
self.ae(pcss('li div:only-child'), ['li-div'])
self.ae(pcss('div *:only-child'), ['li-div', 'foobar-span'])
self.ae(pcss('p *:only-of-type', skip_webkit=True), ['p-em', 'fieldset'])
self.ae(pcss('p:only-of-type', skip_webkit=True), ['paragraph'])
self.ae(pcss('a:empty', 'a:EMpty'), ['name-anchor'])
self.ae(pcss('li:empty'), ['third-li', 'fourth-li', 'fifth-li', 'sixth-li'])
self.ae(pcss(':root', 'html:root', 'li:root'), ['html'])
self.ae(pcss('* :root', 'p *:root'), [])
self.ae(pcss('.a', '.b', '*.a', 'ol.a'), ['first-ol'])
self.ae(pcss('.c', '*.c'), ['first-ol', 'third-li', 'fourth-li'])
self.ae(pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c'), [
'third-li', 'fourth-li'])
self.ae(pcss('#first-li', 'li#first-li', '*#first-li'), ['first-li'])
self.ae(pcss('li div', 'li > div', 'div div'), ['li-div'])
self.ae(pcss('div > div'), [])
self.ae(pcss('div>.c', 'div > .c'), ['first-ol'])
self.ae(pcss('div + div'), ['foobar-div'])
self.ae(pcss('a ~ a'), ['tag-anchor', 'nofollow-anchor'])
self.ae(pcss('a[rel="tag"] ~ a'), ['nofollow-anchor'])
self.ae(pcss('ol#first-ol li:last-child'), ['seventh-li'])
self.ae(pcss('ol#first-ol *:last-child'), ['li-div', 'seventh-li'])
self.ae(pcss('#outer-div:first-child'), ['outer-div'])
self.ae(pcss('#outer-div :first-child'), [
'name-anchor', 'first-li', 'li-div', 'p-b',
'checkbox-fieldset-disabled', 'area-href'])
self.ae(pcss('a[href]'), ['tag-anchor', 'nofollow-anchor'])
self.ae(pcss(':not(*)'), [])
self.ae(pcss('a:not([href])'), ['name-anchor'])
self.ae(pcss('ol :Not(li[class])', skip_webkit=True), [
'first-li', 'second-li', 'li-div',
'fifth-li', 'sixth-li', 'seventh-li'])
self.ae(pcss(r'di\a0 v', r'div\['), [])
self.ae(pcss(r'[h\a0 ref]', r'[h\]ref]'), [])
del app
def test_select_shakespeare(self):
document = html.document_fromstring(self.HTML_SHAKESPEARE)
select = Select(document)
count = lambda s: sum(1 for r in select(s))
# Data borrowed from http://mootools.net/slickspeed/
# Changed from original; probably because I'm only
self.ae(count('*'), 249)
assert count('div:only-child') == 22 # ?
assert count('div:nth-child(even)') == 106
assert count('div:nth-child(2n)') == 106
assert count('div:nth-child(odd)') == 137
assert count('div:nth-child(2n+1)') == 137
assert count('div:nth-child(n)') == 243
assert count('div:last-child') == 53
assert count('div:first-child') == 51
assert count('div > div') == 242
assert count('div + div') == 190
assert count('div ~ div') == 190
assert count('body') == 1
assert count('body div') == 243
assert count('div') == 243
assert count('div div') == 242
assert count('div div div') == 241
assert count('div, div, div') == 243
assert count('div, a, span') == 243
assert count('.dialog') == 51
assert count('div.dialog') == 51
assert count('div .dialog') == 51
assert count('div.character, div.dialog') == 99
assert count('div.direction.dialog') == 0
assert count('div.dialog.direction') == 0
assert count('div.dialog.scene') == 1
assert count('div.scene.scene') == 1
assert count('div.scene .scene') == 0
assert count('div.direction .dialog ') == 0
assert count('div .dialog .direction') == 4
assert count('div.dialog .dialog .direction') == 4
assert count('#speech5') == 1
assert count('div#speech5') == 1
assert count('div #speech5') == 1
assert count('div.scene div.dialog') == 49
assert count('div#scene1 div.dialog div') == 142
assert count('#scene1 #speech1') == 1
assert count('div[class]') == 103
assert count('div[class=dialog]') == 50
assert count('div[class^=dia]') == 51
assert count('div[class$=log]') == 50
assert count('div[class*=sce]') == 1
assert count('div[class|=dialog]') == 50 # ? Seems right
assert count('div[class~=dialog]') == 51 # ? Seems right
# }}}
# Run tests {{{