diff --git a/src/css_selectors/errors.py b/src/css_selectors/errors.py index 17f2d9095b..56d4f69c3c 100644 --- a/src/css_selectors/errors.py +++ b/src/css_selectors/errors.py @@ -10,7 +10,7 @@ class SelectorError(ValueError): """Common parent for SelectorSyntaxError and ExpressionError""" -class SelectorSyntaxError(SelectorError, SyntaxError): +class SelectorSyntaxError(SelectorError): """Parsing a selector that does not match the grammar.""" diff --git a/src/css_selectors/parse.py b/src/css_selectors/parse.py index e12ae5f920..ce4ee8e348 100644 --- a/src/css_selectors/parse.py +++ b/src/css_selectors/parse.py @@ -30,6 +30,14 @@ def ascii_lower(string): """Lower-case, but only in the ASCII range.""" return string.translate(utab if isinstance(string, _unicode) else tab) +def urepr(x): + if isinstance(x, list): + return '[%s]' % ', '.join((map(urepr, x))) + ans = repr(x) + if ans.startswith("u'") or ans.startswith('u"'): + ans = ans[1:] + return ans + # Parsed objects class Selector(object): @@ -129,9 +137,9 @@ class FunctionalPseudoElement(object): self.arguments = arguments def __repr__(self): - return '%s[::%s(%r)]' % ( + return '%s[::%s(%s)]' % ( self.__class__.__name__, self.name, - [token.value for token in self.arguments]) + urepr([token.value for token in self.arguments])) def argument_types(self): return [token.type for token in self.arguments] @@ -153,9 +161,9 @@ class Function(object): self.arguments = arguments def __repr__(self): - return '%s[%r:%s(%r)]' % ( + return '%s[%r:%s(%s)]' % ( self.__class__.__name__, self.selector, self.name, - [token.value for token in self.arguments]) + urepr([token.value for token in self.arguments])) def argument_types(self): return [token.type for token in self.arguments] @@ -225,9 +233,9 @@ class Attrib(object): return '%s[%r[%s]]' % ( self.__class__.__name__, self.selector, attrib) else: - return '%s[%r[%s %s %r]]' % ( + return '%s[%r[%s %s %s]]' % ( self.__class__.__name__, self.selector, attrib, - self.operator, self.value) + self.operator, urepr(self.value)) def specificity(self): a, b, c = self.selector.specificity() diff --git a/src/css_selectors/tests.py b/src/css_selectors/tests.py new file mode 100644 index 0000000000..d6dd74f270 --- /dev/null +++ b/src/css_selectors/tests.py @@ -0,0 +1,320 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2015, Kovid Goyal ' + +import unittest, sys, argparse + +from css_selectors.errors import SelectorSyntaxError +from css_selectors.parse import tokenize, parse + +class TestCSSSelectors(unittest.TestCase): + + ae = unittest.TestCase.assertEqual + + def test_tokenizer(self): # {{{ + tokens = [ + type('')(item) for item in tokenize( + r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')] + self.ae(tokens, [ + "", + "", + "' at 5>", + "", + # the no-break space is not whitespace in CSS + "", # f\xa0 + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + ]) + # }}} + + def test_parser(self): # {{{ + def repr_parse(css): + selectors = parse(css) + for selector in selectors: + assert selector.pseudo_element is None + return [repr(selector.parsed_tree).replace("(u'", "('") + for selector in selectors] + + def parse_many(first, *others): + result = repr_parse(first) + for other in others: + assert repr_parse(other) == result + return result + + assert parse_many('*') == ['Element[*]'] + assert parse_many('*|*') == ['Element[*]'] + assert parse_many('*|foo') == ['Element[foo]'] + assert parse_many('foo|*') == ['Element[foo|*]'] + assert parse_many('foo|bar') == ['Element[foo|bar]'] + # This will never match, but it is valid: + assert parse_many('#foo#bar') == ['Hash[Hash[Element[*]#foo]#bar]'] + assert parse_many( + 'div>.foo', + 'div> .foo', + 'div >.foo', + 'div > .foo', + 'div \n> \t \t .foo', 'div\r>\n\n\n.foo', 'div\f>\f.foo' + ) == ['CombinedSelector[Element[div] > Class[Element[*].foo]]'] + assert parse_many('td.foo,.bar', + 'td.foo, .bar', + 'td.foo\t\r\n\f ,\t\r\n\f .bar' + ) == [ + 'Class[Element[td].foo]', + 'Class[Element[*].bar]' + ] + assert parse_many('div, td.foo, div.bar span') == [ + 'Element[div]', + 'Class[Element[td].foo]', + 'CombinedSelector[Class[Element[div].bar] ' + ' Element[span]]'] + assert parse_many('div > p') == [ + 'CombinedSelector[Element[div] > Element[p]]'] + assert parse_many('td:first') == [ + 'Pseudo[Element[td]:first]'] + assert parse_many('td:first') == [ + 'Pseudo[Element[td]:first]'] + assert parse_many('td :first') == [ + 'CombinedSelector[Element[td] ' + ' Pseudo[Element[*]:first]]'] + assert parse_many('td :first') == [ + 'CombinedSelector[Element[td] ' + ' Pseudo[Element[*]:first]]'] + assert parse_many('a[name]', 'a[ name\t]') == [ + 'Attrib[Element[a][name]]'] + assert parse_many('a [name]') == [ + 'CombinedSelector[Element[a] Attrib[Element[*][name]]]'] + self.ae(parse_many('a[rel="include"]', 'a[rel = include]'), [ + "Attrib[Element[a][rel = 'include']]"]) + assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [ + "Attrib[Element[a][hreflang |= 'en']]"] + self.ae(parse_many('div:nth-child(10)'), [ + "Function[Element[div]:nth-child(['10'])]"]) + assert parse_many(':nth-child(2n+2)') == [ + "Function[Element[*]:nth-child(['2', 'n', '+2'])]"] + assert parse_many('div:nth-of-type(10)') == [ + "Function[Element[div]:nth-of-type(['10'])]"] + assert parse_many('div div:nth-of-type(10) .aclass') == [ + 'CombinedSelector[CombinedSelector[Element[div] ' + "Function[Element[div]:nth-of-type(['10'])]] " + ' Class[Element[*].aclass]]'] + assert parse_many('label:only') == [ + 'Pseudo[Element[label]:only]'] + assert parse_many('a:lang(fr)') == [ + "Function[Element[a]:lang(['fr'])]"] + assert parse_many('div:contains("foo")') == [ + "Function[Element[div]:contains(['foo'])]"] + assert parse_many('div#foobar') == [ + 'Hash[Element[div]#foobar]'] + assert parse_many('div:not(div.foo)') == [ + 'Negation[Element[div]:not(Class[Element[div].foo])]'] + assert parse_many('td ~ th') == [ + 'CombinedSelector[Element[td] ~ Element[th]]'] + # }}} + + def test_pseudo_elements(self): # {{{ + def parse_pseudo(css): + result = [] + for selector in parse(css): + pseudo = selector.pseudo_element + pseudo = type('')(pseudo) if pseudo else pseudo + # No Symbol here + assert pseudo is None or isinstance(pseudo, type('')) + selector = repr(selector.parsed_tree).replace("(u'", "('") + result.append((selector, pseudo)) + return result + + def parse_one(css): + result = parse_pseudo(css) + assert len(result) == 1 + return result[0] + + assert parse_one('foo') == ('Element[foo]', None) + assert parse_one('*') == ('Element[*]', None) + assert parse_one(':empty') == ('Pseudo[Element[*]:empty]', None) + + # Special cases for CSS 2.1 pseudo-elements + assert parse_one(':BEfore') == ('Element[*]', 'before') + assert parse_one(':aftER') == ('Element[*]', 'after') + assert parse_one(':First-Line') == ('Element[*]', 'first-line') + assert parse_one(':First-Letter') == ('Element[*]', 'first-letter') + + assert parse_one('::befoRE') == ('Element[*]', 'before') + assert parse_one('::AFter') == ('Element[*]', 'after') + assert parse_one('::firsT-linE') == ('Element[*]', 'first-line') + assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter') + + assert parse_one('::text-content') == ('Element[*]', 'text-content') + self.ae(parse_one('::attr(name)'), ( + "Element[*]", "FunctionalPseudoElement[::attr(['name'])]")) + + assert parse_one('::Selection') == ('Element[*]', 'selection') + assert parse_one('foo:after') == ('Element[foo]', 'after') + assert parse_one('foo::selection') == ('Element[foo]', 'selection') + assert parse_one('lorem#ipsum ~ a#b.c[href]:empty::selection') == ( + 'CombinedSelector[Hash[Element[lorem]#ipsum] ~ ' + 'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]', + 'selection') + + parse_pseudo('foo:before, bar, baz:after') == [ + ('Element[foo]', 'before'), + ('Element[bar]', None), + ('Element[baz]', 'after')] + # }}} + + def test_specificity(self): # {{{ + def specificity(css): + selectors = parse(css) + assert len(selectors) == 1 + return selectors[0].specificity() + + assert specificity('*') == (0, 0, 0) + assert specificity(' foo') == (0, 0, 1) + assert specificity(':empty ') == (0, 1, 0) + assert specificity(':before') == (0, 0, 1) + assert specificity('*:before') == (0, 0, 1) + assert specificity(':nth-child(2)') == (0, 1, 0) + assert specificity('.bar') == (0, 1, 0) + assert specificity('[baz]') == (0, 1, 0) + assert specificity('[baz="4"]') == (0, 1, 0) + assert specificity('[baz^="4"]') == (0, 1, 0) + assert specificity('#lipsum') == (1, 0, 0) + + assert specificity(':not(*)') == (0, 0, 0) + assert specificity(':not(foo)') == (0, 0, 1) + assert specificity(':not(.foo)') == (0, 1, 0) + assert specificity(':not([foo])') == (0, 1, 0) + assert specificity(':not(:empty)') == (0, 1, 0) + assert specificity(':not(#foo)') == (1, 0, 0) + + assert specificity('foo:empty') == (0, 1, 1) + assert specificity('foo:before') == (0, 0, 2) + assert specificity('foo::before') == (0, 0, 2) + assert specificity('foo:empty::before') == (0, 1, 2) + + assert specificity('#lorem + foo#ipsum:first-child > bar:first-line' + ) == (2, 1, 3) + # }}} + + def test_parse_errors(self): # {{{ + def get_error(css): + try: + parse(css) + except SelectorSyntaxError: + # Py2, Py3, ... + return str(sys.exc_info()[1]).replace("(u'", "('") + + self.ae(get_error('attributes(href)/html/body/a'), ( + "Expected selector, got ")) + assert get_error('attributes(href)') == ( + "Expected selector, got ") + assert get_error('html/body/a') == ( + "Expected selector, got ") + assert get_error(' ') == ( + "Expected selector, got ") + assert get_error('div, ') == ( + "Expected selector, got ") + assert get_error(' , div') == ( + "Expected selector, got ") + assert get_error('p, , div') == ( + "Expected selector, got ") + assert get_error('div > ') == ( + "Expected selector, got ") + assert get_error(' > div') == ( + "Expected selector, got ' at 2>") + assert get_error('foo|#bar') == ( + "Expected ident or '*', got ") + assert get_error('#.foo') == ( + "Expected selector, got ") + assert get_error('.#foo') == ( + "Expected ident, got ") + assert get_error(':#foo') == ( + "Expected ident, got ") + assert get_error('[*]') == ( + "Expected '|', got ") + assert get_error('[foo|]') == ( + "Expected ident, got ") + assert get_error('[#]') == ( + "Expected ident or '*', got ") + assert get_error('[foo=#]') == ( + "Expected string or ident, got ") + assert get_error('[href]a') == ( + "Expected selector, got ") + assert get_error('[rel=stylesheet]') == None + assert get_error('[rel:stylesheet]') == ( + "Operator expected, got ") + assert get_error('[rel=stylesheet') == ( + "Expected ']', got ") + assert get_error(':lang(fr)') == None + assert get_error(':lang(fr') == ( + "Expected an argument, got ") + assert get_error(':contains("foo') == ( + "Unclosed string at 10") + assert get_error('foo!') == ( + "Expected selector, got ") + + # Mis-placed pseudo-elements + assert get_error('a:before:empty') == ( + "Got pseudo-element ::before not at the end of a selector") + assert get_error('li:before a') == ( + "Got pseudo-element ::before not at the end of a selector") + assert get_error(':not(:before)') == ( + "Got pseudo-element ::before inside :not() at 12") + assert get_error(':not(:not(a))') == ( + "Got nested :not()") + # }}} + +# Run tests {{{ +def find_tests(): + return unittest.defaultTestLoader.loadTestsFromTestCase(TestCSSSelectors) + +def run_tests(find_tests=find_tests, for_build=False): + if not for_build: + parser = argparse.ArgumentParser() + parser.add_argument('name', nargs='?', default=None, + help='The name of the test to run') + args = parser.parse_args() + if not for_build and args.name and args.name.startswith('.'): + tests = find_tests() + q = args.name[1:] + if not q.startswith('test_'): + q = 'test_' + q + ans = None + try: + for test in tests: + if test._testMethodName == q: + ans = test + raise StopIteration() + except StopIteration: + pass + if ans is None: + print ('No test named %s found' % args.name) + raise SystemExit(1) + tests = ans + else: + tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if not for_build and args.name else find_tests() + r = unittest.TextTestRunner + if for_build: + r = r(verbosity=0, buffer=True, failfast=True) + else: + r = r(verbosity=4) + result = r.run(tests) + if for_build and result.errors or result.failures: + raise SystemExit(1) + +if __name__ == '__main__': + run_tests() +# }}}