mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Testing for the C tokenizer
This commit is contained in:
parent
adac7e6d1e
commit
04b45413c6
@ -1,255 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import,
|
|
||||||
print_function)
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
||||||
|
|
||||||
from tinycss.tests import BaseTest
|
|
||||||
from tinycss.tokenizer import tokenize_flat as tokenize, regroup
|
|
||||||
|
|
||||||
def jsonify(tokens):
|
|
||||||
"""Turn tokens into "JSON-compatible" data structures."""
|
|
||||||
for token in tokens:
|
|
||||||
if token.type == 'FUNCTION':
|
|
||||||
yield (token.type, token.function_name,
|
|
||||||
list(jsonify(token.content)))
|
|
||||||
elif token.is_container:
|
|
||||||
yield token.type, list(jsonify(token.content))
|
|
||||||
else:
|
|
||||||
yield token.type, token.value
|
|
||||||
|
|
||||||
|
|
||||||
class TestTokenizer(BaseTest):
|
|
||||||
|
|
||||||
def test_token_api(self):
|
|
||||||
for css_source in [
|
|
||||||
'(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
|
|
||||||
]:
|
|
||||||
tokens = list(regroup(tokenize(css_source)))
|
|
||||||
self.ae(len(tokens), 1)
|
|
||||||
self.ae(len(tokens[0].content), 7)
|
|
||||||
|
|
||||||
def test_token_serialize_css(self):
|
|
||||||
for css_source in [
|
|
||||||
r'''p[example="\
|
|
||||||
foo(int x) {\
|
|
||||||
this.x = x;\
|
|
||||||
}\
|
|
||||||
"]''',
|
|
||||||
'"Lorem\\26Ipsum\ndolor" sit',
|
|
||||||
'/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }',
|
|
||||||
'not([[lorem]]{ipsum (42)})',
|
|
||||||
'a[b{d]e}',
|
|
||||||
'a[b{"d',
|
|
||||||
]:
|
|
||||||
for _regroup in (regroup, lambda x: x):
|
|
||||||
tokens = _regroup(tokenize(css_source, ignore_comments=False))
|
|
||||||
result = ''.join(token.as_css() for token in tokens)
|
|
||||||
self.ae(result, css_source)
|
|
||||||
|
|
||||||
def test_comments(self):
|
|
||||||
for ignore_comments, expected_tokens in [
|
|
||||||
(False, [
|
|
||||||
('COMMENT', '/* lorem */'),
|
|
||||||
('S', ' '),
|
|
||||||
('IDENT', 'ipsum'),
|
|
||||||
('[', [
|
|
||||||
('IDENT', 'dolor'),
|
|
||||||
('COMMENT', '/* sit */'),
|
|
||||||
]),
|
|
||||||
('BAD_COMMENT', '/* amet')
|
|
||||||
]),
|
|
||||||
(True, [
|
|
||||||
('S', ' '),
|
|
||||||
('IDENT', 'ipsum'),
|
|
||||||
('[', [
|
|
||||||
('IDENT', 'dolor'),
|
|
||||||
]),
|
|
||||||
]),
|
|
||||||
]:
|
|
||||||
css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
|
|
||||||
tokens = regroup(tokenize(css_source, ignore_comments))
|
|
||||||
result = list(jsonify(tokens))
|
|
||||||
self.ae(result, expected_tokens)
|
|
||||||
|
|
||||||
def test_token_grouping(self):
|
|
||||||
for css_source, expected_tokens in [
|
|
||||||
('', []),
|
|
||||||
(r'Lorem\26 "i\psum"4px', [
|
|
||||||
('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
|
|
||||||
|
|
||||||
('not([[lorem]]{ipsum (42)})', [
|
|
||||||
('FUNCTION', 'not', [
|
|
||||||
('[', [
|
|
||||||
('[', [
|
|
||||||
('IDENT', 'lorem'),
|
|
||||||
]),
|
|
||||||
]),
|
|
||||||
('{', [
|
|
||||||
('IDENT', 'ipsum'),
|
|
||||||
('S', ' '),
|
|
||||||
('(', [
|
|
||||||
('INTEGER', 42),
|
|
||||||
])
|
|
||||||
])
|
|
||||||
])]),
|
|
||||||
|
|
||||||
# Close everything at EOF, no error
|
|
||||||
('a[b{"d', [
|
|
||||||
('IDENT', 'a'),
|
|
||||||
('[', [
|
|
||||||
('IDENT', 'b'),
|
|
||||||
('{', [
|
|
||||||
('STRING', 'd'),
|
|
||||||
]),
|
|
||||||
]),
|
|
||||||
]),
|
|
||||||
|
|
||||||
# Any remaining ), ] or } token is a nesting error
|
|
||||||
('a[b{d]e}', [
|
|
||||||
('IDENT', 'a'),
|
|
||||||
('[', [
|
|
||||||
('IDENT', 'b'),
|
|
||||||
('{', [
|
|
||||||
('IDENT', 'd'),
|
|
||||||
(']', ']'), # The error is visible here
|
|
||||||
('IDENT', 'e'),
|
|
||||||
]),
|
|
||||||
]),
|
|
||||||
]),
|
|
||||||
# ref:
|
|
||||||
('a[b{d}e]', [
|
|
||||||
('IDENT', 'a'),
|
|
||||||
('[', [
|
|
||||||
('IDENT', 'b'),
|
|
||||||
('{', [
|
|
||||||
('IDENT', 'd'),
|
|
||||||
]),
|
|
||||||
('IDENT', 'e'),
|
|
||||||
]),
|
|
||||||
]),
|
|
||||||
]:
|
|
||||||
tokens = regroup(tokenize(css_source, ignore_comments=False))
|
|
||||||
result = list(jsonify(tokens))
|
|
||||||
self.ae(result, expected_tokens)
|
|
||||||
|
|
||||||
def test_positions(self):
|
|
||||||
"""Test the reported line/column position of each token."""
|
|
||||||
css = '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }'
|
|
||||||
tokens = tokenize(css, ignore_comments=False)
|
|
||||||
result = [(token.type, token.line, token.column) for token in tokens]
|
|
||||||
self.ae(result, [
|
|
||||||
('COMMENT', 1, 1), ('S', 2, 9),
|
|
||||||
('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
|
|
||||||
('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
|
|
||||||
('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
|
|
||||||
('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
|
|
||||||
('S', 5, 5), ('}', 5, 6)])
|
|
||||||
|
|
||||||
def test_tokens(self):
|
|
||||||
for css_source, expected_tokens in [
|
|
||||||
('', []),
|
|
||||||
('red -->',
|
|
||||||
[('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
|
|
||||||
# Longest match rule: no CDC
|
|
||||||
('red-->',
|
|
||||||
[('IDENT', 'red--'), ('DELIM', '>')]),
|
|
||||||
|
|
||||||
(r'''p[example="\
|
|
||||||
foo(int x) {\
|
|
||||||
this.x = x;\
|
|
||||||
}\
|
|
||||||
"]''', [
|
|
||||||
('IDENT', 'p'),
|
|
||||||
('[', '['),
|
|
||||||
('IDENT', 'example'),
|
|
||||||
('DELIM', '='),
|
|
||||||
('STRING', 'foo(int x) { this.x = x;}'),
|
|
||||||
(']', ']')]),
|
|
||||||
|
|
||||||
# Numbers are parsed
|
|
||||||
('42 .5 -4pX 1.25em 30%',
|
|
||||||
[('INTEGER', 42), ('S', ' '),
|
|
||||||
('NUMBER', .5), ('S', ' '),
|
|
||||||
# units are normalized to lower-case:
|
|
||||||
('DIMENSION', -4, 'px'), ('S', ' '),
|
|
||||||
('DIMENSION', 1.25, 'em'), ('S', ' '),
|
|
||||||
('PERCENTAGE', 30, '%')]),
|
|
||||||
|
|
||||||
# URLs are extracted
|
|
||||||
('url(foo.png)', [('URI', 'foo.png')]),
|
|
||||||
('url("foo.png")', [('URI', 'foo.png')]),
|
|
||||||
|
|
||||||
# Escaping
|
|
||||||
|
|
||||||
(r'/* Comment with a \ backslash */',
|
|
||||||
[('COMMENT', '/* Comment with a \ backslash */')]), # Unchanged
|
|
||||||
|
|
||||||
# backslash followed by a newline in a string: ignored
|
|
||||||
('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
|
|
||||||
|
|
||||||
# backslash followed by a newline outside a string: stands for itself
|
|
||||||
('Lorem\\\nIpsum', [
|
|
||||||
('IDENT', 'Lorem'), ('DELIM', '\\'),
|
|
||||||
('S', '\n'), ('IDENT', 'Ipsum')]),
|
|
||||||
|
|
||||||
# Cancel the meaning of special characters
|
|
||||||
(r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]), # or not specal
|
|
||||||
(r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
|
|
||||||
(r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
|
|
||||||
(r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
|
|
||||||
(r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
|
|
||||||
(r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
|
|
||||||
(r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
|
|
||||||
(r'Lorem+Ipsum', [('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
|
|
||||||
(r'url(foo\).png)', [('URI', 'foo).png')]),
|
|
||||||
|
|
||||||
# Unicode and backslash escaping
|
|
||||||
('\\26 B', [('IDENT', '&B')]),
|
|
||||||
('\\&B', [('IDENT', '&B')]),
|
|
||||||
('@\\26\tB', [('ATKEYWORD', '@&B')]),
|
|
||||||
('@\\&B', [('ATKEYWORD', '@&B')]),
|
|
||||||
('#\\26\nB', [('HASH', '#&B')]),
|
|
||||||
('#\\&B', [('HASH', '#&B')]),
|
|
||||||
('\\26\r\nB(', [('FUNCTION', '&B(')]),
|
|
||||||
('\\&B(', [('FUNCTION', '&B(')]),
|
|
||||||
(r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
|
|
||||||
(r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]), # max 6 digits
|
|
||||||
(r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
|
|
||||||
(r'"\26 B"', [('STRING', '&B')]),
|
|
||||||
(r"'\000026B'", [('STRING', '&B')]),
|
|
||||||
(r'"\&B"', [('STRING', '&B')]),
|
|
||||||
(r'url("\26 B")', [('URI', '&B')]),
|
|
||||||
(r'url(\26 B)', [('URI', '&B')]),
|
|
||||||
(r'url("\&B")', [('URI', '&B')]),
|
|
||||||
(r'url(\&B)', [('URI', '&B')]),
|
|
||||||
(r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
|
|
||||||
|
|
||||||
# Bad strings
|
|
||||||
|
|
||||||
# String ends at EOF without closing: no error, parsed
|
|
||||||
('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
|
|
||||||
# Unescaped newline: ends the string, error, unparsed
|
|
||||||
('"Lorem\\26Ipsum\n', [
|
|
||||||
('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
|
|
||||||
# Tokenization restarts after the newline, so the second " starts
|
|
||||||
# a new string (which ends at EOF without errors, as above.)
|
|
||||||
('"Lorem\\26Ipsum\ndolor" sit', [
|
|
||||||
('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
|
|
||||||
('IDENT', 'dolor'), ('STRING', ' sit')]),
|
|
||||||
|
|
||||||
]:
|
|
||||||
sources = [css_source]
|
|
||||||
for css_source in sources:
|
|
||||||
tokens = tokenize(css_source, ignore_comments=False)
|
|
||||||
result = [
|
|
||||||
(token.type, token.value) + (
|
|
||||||
() if token.unit is None else (token.unit,))
|
|
||||||
for token in tokens
|
|
||||||
]
|
|
||||||
self.ae(result, expected_tokens)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
281
src/tinycss/tests/tokenizing.py
Normal file
281
src/tinycss/tests/tokenizing.py
Normal file
@ -0,0 +1,281 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from tinycss.tests import BaseTest
|
||||||
|
from tinycss.tokenizer import python_tokenize_flat, c_tokenize_flat, regroup
|
||||||
|
|
||||||
|
def jsonify(tokens):
|
||||||
|
"""Turn tokens into "JSON-compatible" data structures."""
|
||||||
|
for token in tokens:
|
||||||
|
if token.type == 'FUNCTION':
|
||||||
|
yield (token.type, token.function_name,
|
||||||
|
list(jsonify(token.content)))
|
||||||
|
elif token.is_container:
|
||||||
|
yield token.type, list(jsonify(token.content))
|
||||||
|
else:
|
||||||
|
yield token.type, token.value
|
||||||
|
|
||||||
|
if c_tokenize_flat is None:
|
||||||
|
tokenizers = (python_tokenize_flat,)
|
||||||
|
else:
|
||||||
|
tokenizers = (python_tokenize_flat, c_tokenize_flat)
|
||||||
|
|
||||||
|
def token_api(self, tokenize):
|
||||||
|
for css_source in [
|
||||||
|
'(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
|
||||||
|
]:
|
||||||
|
tokens = list(regroup(tokenize(css_source)))
|
||||||
|
self.ae(len(tokens), 1)
|
||||||
|
self.ae(len(tokens[0].content), 7)
|
||||||
|
|
||||||
|
def token_serialize_css(self, tokenize):
|
||||||
|
for tokenize in tokenizers:
|
||||||
|
for css_source in [
|
||||||
|
r'''p[example="\
|
||||||
|
foo(int x) {\
|
||||||
|
this.x = x;\
|
||||||
|
}\
|
||||||
|
"]''',
|
||||||
|
'"Lorem\\26Ipsum\ndolor" sit',
|
||||||
|
'/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }',
|
||||||
|
'not([[lorem]]{ipsum (42)})',
|
||||||
|
'a[b{d]e}',
|
||||||
|
'a[b{"d',
|
||||||
|
]:
|
||||||
|
for _regroup in (regroup, lambda x: x):
|
||||||
|
tokens = _regroup(tokenize(css_source, ignore_comments=False))
|
||||||
|
result = ''.join(token.as_css() for token in tokens)
|
||||||
|
self.ae(result, css_source)
|
||||||
|
|
||||||
|
def comments(self, tokenize):
|
||||||
|
for ignore_comments, expected_tokens in [
|
||||||
|
(False, [
|
||||||
|
('COMMENT', '/* lorem */'),
|
||||||
|
('S', ' '),
|
||||||
|
('IDENT', 'ipsum'),
|
||||||
|
('[', [
|
||||||
|
('IDENT', 'dolor'),
|
||||||
|
('COMMENT', '/* sit */'),
|
||||||
|
]),
|
||||||
|
('BAD_COMMENT', '/* amet')
|
||||||
|
]),
|
||||||
|
(True, [
|
||||||
|
('S', ' '),
|
||||||
|
('IDENT', 'ipsum'),
|
||||||
|
('[', [
|
||||||
|
('IDENT', 'dolor'),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
]:
|
||||||
|
css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
|
||||||
|
tokens = regroup(tokenize(css_source, ignore_comments))
|
||||||
|
result = list(jsonify(tokens))
|
||||||
|
self.ae(result, expected_tokens)
|
||||||
|
|
||||||
|
def token_grouping(self, tokenize):
|
||||||
|
for css_source, expected_tokens in [
|
||||||
|
('', []),
|
||||||
|
(r'Lorem\26 "i\psum"4px', [
|
||||||
|
('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
|
||||||
|
|
||||||
|
('not([[lorem]]{ipsum (42)})', [
|
||||||
|
('FUNCTION', 'not', [
|
||||||
|
('[', [
|
||||||
|
('[', [
|
||||||
|
('IDENT', 'lorem'),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
('{', [
|
||||||
|
('IDENT', 'ipsum'),
|
||||||
|
('S', ' '),
|
||||||
|
('(', [
|
||||||
|
('INTEGER', 42),
|
||||||
|
])
|
||||||
|
])
|
||||||
|
])]),
|
||||||
|
|
||||||
|
# Close everything at EOF, no error
|
||||||
|
('a[b{"d', [
|
||||||
|
('IDENT', 'a'),
|
||||||
|
('[', [
|
||||||
|
('IDENT', 'b'),
|
||||||
|
('{', [
|
||||||
|
('STRING', 'd'),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
|
||||||
|
# Any remaining ), ] or } token is a nesting error
|
||||||
|
('a[b{d]e}', [
|
||||||
|
('IDENT', 'a'),
|
||||||
|
('[', [
|
||||||
|
('IDENT', 'b'),
|
||||||
|
('{', [
|
||||||
|
('IDENT', 'd'),
|
||||||
|
(']', ']'), # The error is visible here
|
||||||
|
('IDENT', 'e'),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
# ref:
|
||||||
|
('a[b{d}e]', [
|
||||||
|
('IDENT', 'a'),
|
||||||
|
('[', [
|
||||||
|
('IDENT', 'b'),
|
||||||
|
('{', [
|
||||||
|
('IDENT', 'd'),
|
||||||
|
]),
|
||||||
|
('IDENT', 'e'),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
]:
|
||||||
|
tokens = regroup(tokenize(css_source, ignore_comments=False))
|
||||||
|
result = list(jsonify(tokens))
|
||||||
|
self.ae(result, expected_tokens)
|
||||||
|
|
||||||
|
def positions(self, tokenize):
|
||||||
|
css = '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }'
|
||||||
|
tokens = tokenize(css, ignore_comments=False)
|
||||||
|
result = [(token.type, token.line, token.column) for token in tokens]
|
||||||
|
self.ae(result, [
|
||||||
|
('COMMENT', 1, 1), ('S', 2, 9),
|
||||||
|
('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
|
||||||
|
('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
|
||||||
|
('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
|
||||||
|
('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
|
||||||
|
('S', 5, 5), ('}', 5, 6)])
|
||||||
|
|
||||||
|
def tokens(self, tokenize):
|
||||||
|
for css_source, expected_tokens in [
|
||||||
|
('', []),
|
||||||
|
('red -->',
|
||||||
|
[('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
|
||||||
|
# Longest match rule: no CDC
|
||||||
|
('red-->',
|
||||||
|
[('IDENT', 'red--'), ('DELIM', '>')]),
|
||||||
|
|
||||||
|
(r'''p[example="\
|
||||||
|
foo(int x) {\
|
||||||
|
this.x = x;\
|
||||||
|
}\
|
||||||
|
"]''', [
|
||||||
|
('IDENT', 'p'),
|
||||||
|
('[', '['),
|
||||||
|
('IDENT', 'example'),
|
||||||
|
('DELIM', '='),
|
||||||
|
('STRING', 'foo(int x) { this.x = x;}'),
|
||||||
|
(']', ']')]),
|
||||||
|
|
||||||
|
# Numbers are parsed
|
||||||
|
('42 .5 -4pX 1.25em 30%',
|
||||||
|
[('INTEGER', 42), ('S', ' '),
|
||||||
|
('NUMBER', .5), ('S', ' '),
|
||||||
|
# units are normalized to lower-case:
|
||||||
|
('DIMENSION', -4, 'px'), ('S', ' '),
|
||||||
|
('DIMENSION', 1.25, 'em'), ('S', ' '),
|
||||||
|
('PERCENTAGE', 30, '%')]),
|
||||||
|
|
||||||
|
# URLs are extracted
|
||||||
|
('url(foo.png)', [('URI', 'foo.png')]),
|
||||||
|
('url("foo.png")', [('URI', 'foo.png')]),
|
||||||
|
|
||||||
|
# Escaping
|
||||||
|
|
||||||
|
(r'/* Comment with a \ backslash */',
|
||||||
|
[('COMMENT', '/* Comment with a \ backslash */')]), # Unchanged
|
||||||
|
|
||||||
|
# backslash followed by a newline in a string: ignored
|
||||||
|
('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
|
||||||
|
|
||||||
|
# backslash followed by a newline outside a string: stands for itself
|
||||||
|
('Lorem\\\nIpsum', [
|
||||||
|
('IDENT', 'Lorem'), ('DELIM', '\\'),
|
||||||
|
('S', '\n'), ('IDENT', 'Ipsum')]),
|
||||||
|
|
||||||
|
# Cancel the meaning of special characters
|
||||||
|
(r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]), # or not specal
|
||||||
|
(r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
|
||||||
|
(r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
|
||||||
|
(r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
|
||||||
|
(r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
|
||||||
|
(r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
|
||||||
|
(r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
|
||||||
|
(r'Lorem+Ipsum', [('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
|
||||||
|
(r'url(foo\).png)', [('URI', 'foo).png')]),
|
||||||
|
|
||||||
|
# Unicode and backslash escaping
|
||||||
|
('\\26 B', [('IDENT', '&B')]),
|
||||||
|
('\\&B', [('IDENT', '&B')]),
|
||||||
|
('@\\26\tB', [('ATKEYWORD', '@&B')]),
|
||||||
|
('@\\&B', [('ATKEYWORD', '@&B')]),
|
||||||
|
('#\\26\nB', [('HASH', '#&B')]),
|
||||||
|
('#\\&B', [('HASH', '#&B')]),
|
||||||
|
('\\26\r\nB(', [('FUNCTION', '&B(')]),
|
||||||
|
('\\&B(', [('FUNCTION', '&B(')]),
|
||||||
|
(r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
|
||||||
|
(r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]), # max 6 digits
|
||||||
|
(r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
|
||||||
|
(r'"\26 B"', [('STRING', '&B')]),
|
||||||
|
(r"'\000026B'", [('STRING', '&B')]),
|
||||||
|
(r'"\&B"', [('STRING', '&B')]),
|
||||||
|
(r'url("\26 B")', [('URI', '&B')]),
|
||||||
|
(r'url(\26 B)', [('URI', '&B')]),
|
||||||
|
(r'url("\&B")', [('URI', '&B')]),
|
||||||
|
(r'url(\&B)', [('URI', '&B')]),
|
||||||
|
(r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
|
||||||
|
|
||||||
|
# Bad strings
|
||||||
|
|
||||||
|
# String ends at EOF without closing: no error, parsed
|
||||||
|
('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
|
||||||
|
# Unescaped newline: ends the string, error, unparsed
|
||||||
|
('"Lorem\\26Ipsum\n', [
|
||||||
|
('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
|
||||||
|
# Tokenization restarts after the newline, so the second " starts
|
||||||
|
# a new string (which ends at EOF without errors, as above.)
|
||||||
|
('"Lorem\\26Ipsum\ndolor" sit', [
|
||||||
|
('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
|
||||||
|
('IDENT', 'dolor'), ('STRING', ' sit')]),
|
||||||
|
|
||||||
|
]:
|
||||||
|
sources = [css_source]
|
||||||
|
for css_source in sources:
|
||||||
|
tokens = tokenize(css_source, ignore_comments=False)
|
||||||
|
result = [
|
||||||
|
(token.type, token.value) + (
|
||||||
|
() if token.unit is None else (token.unit,))
|
||||||
|
for token in tokens
|
||||||
|
]
|
||||||
|
self.ae(result, expected_tokens)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTokenizer(BaseTest):
|
||||||
|
|
||||||
|
def run_test(self, func):
|
||||||
|
for tokenize in tokenizers:
|
||||||
|
func(self, tokenize)
|
||||||
|
|
||||||
|
def test_token_api(self):
|
||||||
|
self.run_test(token_api)
|
||||||
|
|
||||||
|
def test_token_serialize_css(self):
|
||||||
|
self.run_test(token_serialize_css)
|
||||||
|
|
||||||
|
def test_comments(self):
|
||||||
|
self.run_test(comments)
|
||||||
|
|
||||||
|
def test_token_grouping(self):
|
||||||
|
self.run_test(token_grouping)
|
||||||
|
|
||||||
|
def test_positions(self):
|
||||||
|
"""Test the reported line/column position of each token."""
|
||||||
|
self.run_test(positions)
|
||||||
|
|
||||||
|
def test_tokens(self):
|
||||||
|
self.run_test(tokens)
|
||||||
|
|
@ -14,6 +14,7 @@
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
// Type-specific fields go here.
|
// Type-specific fields go here.
|
||||||
|
PyObject *is_container;
|
||||||
PyObject *type;
|
PyObject *type;
|
||||||
PyObject *_as_css;
|
PyObject *_as_css;
|
||||||
PyObject *value;
|
PyObject *value;
|
||||||
@ -26,6 +27,7 @@ typedef struct {
|
|||||||
static void
|
static void
|
||||||
tokenizer_Token_dealloc(tokenizer_Token* self)
|
tokenizer_Token_dealloc(tokenizer_Token* self)
|
||||||
{
|
{
|
||||||
|
Py_XDECREF(self->is_container); self->is_container = NULL;
|
||||||
Py_XDECREF(self->type); self->type = NULL;
|
Py_XDECREF(self->type); self->type = NULL;
|
||||||
Py_XDECREF(self->_as_css); self->_as_css = NULL;
|
Py_XDECREF(self->_as_css); self->_as_css = NULL;
|
||||||
Py_XDECREF(self->value); self->value = NULL;
|
Py_XDECREF(self->value); self->value = NULL;
|
||||||
@ -47,6 +49,7 @@ tokenizer_Token_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||||||
self->ob_type->tp_free((PyObject*)self); return NULL;
|
self->ob_type->tp_free((PyObject*)self); return NULL;
|
||||||
}
|
}
|
||||||
Py_INCREF(self->type); Py_INCREF(self->_as_css); Py_INCREF(self->value); Py_INCREF(self->unit); Py_INCREF(self->line); Py_INCREF(self->column);
|
Py_INCREF(self->type); Py_INCREF(self->_as_css); Py_INCREF(self->value); Py_INCREF(self->unit); Py_INCREF(self->line); Py_INCREF(self->column);
|
||||||
|
self->is_container = Py_False; Py_INCREF(self->is_container);
|
||||||
|
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
}
|
}
|
||||||
@ -81,6 +84,7 @@ tokenizer_Token_as_css(tokenizer_Token *self, PyObject *args, PyObject *kwargs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static PyMemberDef tokenizer_Token_members[] = {
|
static PyMemberDef tokenizer_Token_members[] = {
|
||||||
|
{"is_container", T_OBJECT_EX, offsetof(tokenizer_Token, is_container), 0, "False unless this token is a container for other tokens"},
|
||||||
{"type", T_OBJECT_EX, offsetof(tokenizer_Token, type), 0, "The token type"},
|
{"type", T_OBJECT_EX, offsetof(tokenizer_Token, type), 0, "The token type"},
|
||||||
{"_as_css", T_OBJECT_EX, offsetof(tokenizer_Token, _as_css), 0, "Internal variable, use as_css() method instead."},
|
{"_as_css", T_OBJECT_EX, offsetof(tokenizer_Token, _as_css), 0, "Internal variable, use as_css() method instead."},
|
||||||
{"value", T_OBJECT_EX, offsetof(tokenizer_Token, value), 0, "The token value"},
|
{"value", T_OBJECT_EX, offsetof(tokenizer_Token, value), 0, "The token value"},
|
||||||
@ -217,7 +221,7 @@ static PyObject* clone_unicode(Py_UNICODE *x, Py_ssize_t sz) {
|
|||||||
#endif
|
#endif
|
||||||
PyObject *ans = PyUnicode_FromUnicode(NULL, sz);
|
PyObject *ans = PyUnicode_FromUnicode(NULL, sz);
|
||||||
if (ans == NULL) return PyErr_NoMemory();
|
if (ans == NULL) return PyErr_NoMemory();
|
||||||
memcpy(PyUnicode_AS_UNICODE(ans), x, sz);
|
memcpy(PyUnicode_AS_UNICODE(ans), x, sz * sizeof(Py_UNICODE));
|
||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -237,8 +241,8 @@ tokenize_flat(PyObject *self, PyObject *args) {
|
|||||||
PyErr_SetString(PyExc_RuntimeError, "tokenizer module not initialized. You must call init() first."); return NULL;
|
PyErr_SetString(PyExc_RuntimeError, "tokenizer module not initialized. You must call init() first."); return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "U|O", &py_source, &ic)) return NULL;
|
if (!PyArg_ParseTuple(args, "UO", &py_source, &ic)) return NULL;
|
||||||
if (ic != NULL && PyObject_IsTrue(ic)) ignore_comments = 1;
|
if (PyObject_IsTrue(ic)) ignore_comments = 1;
|
||||||
source_len = PyUnicode_GET_SIZE(py_source);
|
source_len = PyUnicode_GET_SIZE(py_source);
|
||||||
css_source = PyUnicode_AS_UNICODE(py_source);
|
css_source = PyUnicode_AS_UNICODE(py_source);
|
||||||
|
|
||||||
@ -300,8 +304,7 @@ tokenize_flat(PyObject *self, PyObject *args) {
|
|||||||
if (PyUnicode_GET_SIZE(css_value) > 0) {
|
if (PyUnicode_GET_SIZE(css_value) > 0) {
|
||||||
value = clone_unicode(PyUnicode_AS_UNICODE(css_value), PyUnicode_GET_SIZE(css_value) - 1);
|
value = clone_unicode(PyUnicode_AS_UNICODE(css_value), PyUnicode_GET_SIZE(css_value) - 1);
|
||||||
if (value == NULL) goto error;
|
if (value == NULL) goto error;
|
||||||
}
|
} else { value = css_value; Py_INCREF(value); }
|
||||||
else { value = css_value; Py_INCREF(value); }
|
|
||||||
if (value == NULL) goto error;
|
if (value == NULL) goto error;
|
||||||
TONUMBER(value);
|
TONUMBER(value);
|
||||||
unit = PyUnicode_FromString("%");
|
unit = PyUnicode_FromString("%");
|
||||||
@ -331,7 +334,10 @@ tokenize_flat(PyObject *self, PyObject *args) {
|
|||||||
item = clone_unicode(PyUnicode_AS_UNICODE(value) + 1, PyUnicode_GET_SIZE(value) - 2);
|
item = clone_unicode(PyUnicode_AS_UNICODE(value) + 1, PyUnicode_GET_SIZE(value) - 2);
|
||||||
if (item == NULL) goto error;
|
if (item == NULL) goto error;
|
||||||
Py_DECREF(value); value = item; item = NULL;
|
Py_DECREF(value); value = item; item = NULL;
|
||||||
|
UNESCAPE(value, NEWLINE_UNESCAPE);
|
||||||
}
|
}
|
||||||
|
UNESCAPE(value, SIMPLE_UNESCAPE);
|
||||||
|
UNESCAPE(value, UNICODE_UNESCAPE);
|
||||||
} else
|
} else
|
||||||
|
|
||||||
if (type_ == STRING) {
|
if (type_ == STRING) {
|
||||||
@ -394,7 +400,7 @@ error:
|
|||||||
|
|
||||||
static PyMethodDef tokenizer_methods[] = {
|
static PyMethodDef tokenizer_methods[] = {
|
||||||
{"tokenize_flat", tokenize_flat, METH_VARARGS,
|
{"tokenize_flat", tokenize_flat, METH_VARARGS,
|
||||||
"tokenize_flat()\n\n"
|
"tokenize_flat(css_source, ignore_comments)\n\n Convert CSS source into a flat list of tokens"
|
||||||
},
|
},
|
||||||
|
|
||||||
{"init", tokenize_init, METH_VARARGS,
|
{"init", tokenize_init, METH_VARARGS,
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from . import token_data
|
from tinycss import token_data
|
||||||
|
|
||||||
|
|
||||||
def tokenize_flat(css_source, ignore_comments=True,
|
def tokenize_flat(css_source, ignore_comments=True,
|
||||||
@ -206,11 +206,10 @@ def tokenize_grouped(css_source, ignore_comments=True):
|
|||||||
# Optional Cython version of tokenize_flat
|
# Optional Cython version of tokenize_flat
|
||||||
# Make both versions available with explicit names for tests.
|
# Make both versions available with explicit names for tests.
|
||||||
python_tokenize_flat = tokenize_flat
|
python_tokenize_flat = tokenize_flat
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from . import speedups
|
tok = token_data.load_c_tokenizer()
|
||||||
except ImportError:
|
except (ImportError, RuntimeError):
|
||||||
cython_tokenize_flat = None
|
c_tokenize_flat = None
|
||||||
else:
|
else:
|
||||||
cython_tokenize_flat = speedups.tokenize_flat
|
c_tokenize_flat = lambda s, ignore_comments=False:tok.tokenize_flat(s, ignore_comments)
|
||||||
# Default to the Cython version if available
|
|
||||||
tokenize_flat = cython_tokenize_flat
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user