diff --git a/src/tinycss/tests/tokenizer.py b/src/tinycss/tests/tokenizer.py deleted file mode 100644 index 7005a3b837..0000000000 --- a/src/tinycss/tests/tokenizer.py +++ /dev/null @@ -1,255 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import (unicode_literals, division, absolute_import, - print_function) - -__license__ = 'GPL v3' -__copyright__ = '2014, Kovid Goyal ' - -from tinycss.tests import BaseTest -from tinycss.tokenizer import tokenize_flat as tokenize, regroup - -def jsonify(tokens): - """Turn tokens into "JSON-compatible" data structures.""" - for token in tokens: - if token.type == 'FUNCTION': - yield (token.type, token.function_name, - list(jsonify(token.content))) - elif token.is_container: - yield token.type, list(jsonify(token.content)) - else: - yield token.type, token.value - - -class TestTokenizer(BaseTest): - - def test_token_api(self): - for css_source in [ - '(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])' - ]: - tokens = list(regroup(tokenize(css_source))) - self.ae(len(tokens), 1) - self.ae(len(tokens[0].content), 7) - - def test_token_serialize_css(self): - for css_source in [ -r'''p[example="\ -foo(int x) {\ - this.x = x;\ -}\ -"]''', - '"Lorem\\26Ipsum\ndolor" sit', - '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }', - 'not([[lorem]]{ipsum (42)})', - 'a[b{d]e}', - 'a[b{"d', - ]: - for _regroup in (regroup, lambda x: x): - tokens = _regroup(tokenize(css_source, ignore_comments=False)) - result = ''.join(token.as_css() for token in tokens) - self.ae(result, css_source) - - def test_comments(self): - for ignore_comments, expected_tokens in [ - (False, [ - ('COMMENT', '/* lorem */'), - ('S', ' '), - ('IDENT', 'ipsum'), - ('[', [ - ('IDENT', 'dolor'), - ('COMMENT', '/* sit */'), - ]), - ('BAD_COMMENT', '/* amet') - ]), - (True, [ - ('S', ' '), - ('IDENT', 'ipsum'), - ('[', [ - ('IDENT', 'dolor'), - ]), - ]), - ]: - css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet' - tokens = regroup(tokenize(css_source, ignore_comments)) - result = list(jsonify(tokens)) - self.ae(result, expected_tokens) - - def test_token_grouping(self): - for css_source, expected_tokens in [ - ('', []), - (r'Lorem\26 "i\psum"4px', [ - ('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]), - - ('not([[lorem]]{ipsum (42)})', [ - ('FUNCTION', 'not', [ - ('[', [ - ('[', [ - ('IDENT', 'lorem'), - ]), - ]), - ('{', [ - ('IDENT', 'ipsum'), - ('S', ' '), - ('(', [ - ('INTEGER', 42), - ]) - ]) - ])]), - - # Close everything at EOF, no error - ('a[b{"d', [ - ('IDENT', 'a'), - ('[', [ - ('IDENT', 'b'), - ('{', [ - ('STRING', 'd'), - ]), - ]), - ]), - - # Any remaining ), ] or } token is a nesting error - ('a[b{d]e}', [ - ('IDENT', 'a'), - ('[', [ - ('IDENT', 'b'), - ('{', [ - ('IDENT', 'd'), - (']', ']'), # The error is visible here - ('IDENT', 'e'), - ]), - ]), - ]), - # ref: - ('a[b{d}e]', [ - ('IDENT', 'a'), - ('[', [ - ('IDENT', 'b'), - ('{', [ - ('IDENT', 'd'), - ]), - ('IDENT', 'e'), - ]), - ]), - ]: - tokens = regroup(tokenize(css_source, ignore_comments=False)) - result = list(jsonify(tokens)) - self.ae(result, expected_tokens) - - def test_positions(self): - """Test the reported line/column position of each token.""" - css = '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }' - tokens = tokenize(css, ignore_comments=False) - result = [(token.type, token.line, token.column) for token in tokens] - self.ae(result, [ - ('COMMENT', 1, 1), ('S', 2, 9), - ('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3), - ('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10), - ('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16), - ('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26), - ('S', 5, 5), ('}', 5, 6)]) - - def test_tokens(self): - for css_source, expected_tokens in [ - ('', []), - ('red -->', - [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]), - # Longest match rule: no CDC - ('red-->', - [('IDENT', 'red--'), ('DELIM', '>')]), - - (r'''p[example="\ -foo(int x) {\ - this.x = x;\ -}\ -"]''', [ - ('IDENT', 'p'), - ('[', '['), - ('IDENT', 'example'), - ('DELIM', '='), - ('STRING', 'foo(int x) { this.x = x;}'), - (']', ']')]), - - # Numbers are parsed - ('42 .5 -4pX 1.25em 30%', - [('INTEGER', 42), ('S', ' '), - ('NUMBER', .5), ('S', ' '), - # units are normalized to lower-case: - ('DIMENSION', -4, 'px'), ('S', ' '), - ('DIMENSION', 1.25, 'em'), ('S', ' '), - ('PERCENTAGE', 30, '%')]), - - # URLs are extracted - ('url(foo.png)', [('URI', 'foo.png')]), - ('url("foo.png")', [('URI', 'foo.png')]), - - # Escaping - - (r'/* Comment with a \ backslash */', - [('COMMENT', '/* Comment with a \ backslash */')]), # Unchanged - - # backslash followed by a newline in a string: ignored - ('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]), - - # backslash followed by a newline outside a string: stands for itself - ('Lorem\\\nIpsum', [ - ('IDENT', 'Lorem'), ('DELIM', '\\'), - ('S', '\n'), ('IDENT', 'Ipsum')]), - - # Cancel the meaning of special characters - (r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]), # or not specal - (r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]), - (r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]), - (r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]), - (r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]), - (r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]), - (r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]), - (r'Lorem+Ipsum', [('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]), - (r'url(foo\).png)', [('URI', 'foo).png')]), - - # Unicode and backslash escaping - ('\\26 B', [('IDENT', '&B')]), - ('\\&B', [('IDENT', '&B')]), - ('@\\26\tB', [('ATKEYWORD', '@&B')]), - ('@\\&B', [('ATKEYWORD', '@&B')]), - ('#\\26\nB', [('HASH', '#&B')]), - ('#\\&B', [('HASH', '#&B')]), - ('\\26\r\nB(', [('FUNCTION', '&B(')]), - ('\\&B(', [('FUNCTION', '&B(')]), - (r'12.5\000026B', [('DIMENSION', 12.5, '&b')]), - (r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]), # max 6 digits - (r'12.5\&B', [('DIMENSION', 12.5, '&b')]), - (r'"\26 B"', [('STRING', '&B')]), - (r"'\000026B'", [('STRING', '&B')]), - (r'"\&B"', [('STRING', '&B')]), - (r'url("\26 B")', [('URI', '&B')]), - (r'url(\26 B)', [('URI', '&B')]), - (r'url("\&B")', [('URI', '&B')]), - (r'url(\&B)', [('URI', '&B')]), - (r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]), - - # Bad strings - - # String ends at EOF without closing: no error, parsed - ('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]), - # Unescaped newline: ends the string, error, unparsed - ('"Lorem\\26Ipsum\n', [ - ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]), - # Tokenization restarts after the newline, so the second " starts - # a new string (which ends at EOF without errors, as above.) - ('"Lorem\\26Ipsum\ndolor" sit', [ - ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'), - ('IDENT', 'dolor'), ('STRING', ' sit')]), - - ]: - sources = [css_source] - for css_source in sources: - tokens = tokenize(css_source, ignore_comments=False) - result = [ - (token.type, token.value) + ( - () if token.unit is None else (token.unit,)) - for token in tokens - ] - self.ae(result, expected_tokens) - - - diff --git a/src/tinycss/tests/tokenizing.py b/src/tinycss/tests/tokenizing.py new file mode 100644 index 0000000000..004122cd06 --- /dev/null +++ b/src/tinycss/tests/tokenizing.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2014, Kovid Goyal ' + +from tinycss.tests import BaseTest +from tinycss.tokenizer import python_tokenize_flat, c_tokenize_flat, regroup + +def jsonify(tokens): + """Turn tokens into "JSON-compatible" data structures.""" + for token in tokens: + if token.type == 'FUNCTION': + yield (token.type, token.function_name, + list(jsonify(token.content))) + elif token.is_container: + yield token.type, list(jsonify(token.content)) + else: + yield token.type, token.value + +if c_tokenize_flat is None: + tokenizers = (python_tokenize_flat,) +else: + tokenizers = (python_tokenize_flat, c_tokenize_flat) + +def token_api(self, tokenize): + for css_source in [ + '(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])' + ]: + tokens = list(regroup(tokenize(css_source))) + self.ae(len(tokens), 1) + self.ae(len(tokens[0].content), 7) + +def token_serialize_css(self, tokenize): + for tokenize in tokenizers: + for css_source in [ +r'''p[example="\ +foo(int x) {\ + this.x = x;\ +}\ +"]''', + '"Lorem\\26Ipsum\ndolor" sit', + '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }', + 'not([[lorem]]{ipsum (42)})', + 'a[b{d]e}', + 'a[b{"d', + ]: + for _regroup in (regroup, lambda x: x): + tokens = _regroup(tokenize(css_source, ignore_comments=False)) + result = ''.join(token.as_css() for token in tokens) + self.ae(result, css_source) + +def comments(self, tokenize): + for ignore_comments, expected_tokens in [ + (False, [ + ('COMMENT', '/* lorem */'), + ('S', ' '), + ('IDENT', 'ipsum'), + ('[', [ + ('IDENT', 'dolor'), + ('COMMENT', '/* sit */'), + ]), + ('BAD_COMMENT', '/* amet') + ]), + (True, [ + ('S', ' '), + ('IDENT', 'ipsum'), + ('[', [ + ('IDENT', 'dolor'), + ]), + ]), + ]: + css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet' + tokens = regroup(tokenize(css_source, ignore_comments)) + result = list(jsonify(tokens)) + self.ae(result, expected_tokens) + +def token_grouping(self, tokenize): + for css_source, expected_tokens in [ + ('', []), + (r'Lorem\26 "i\psum"4px', [ + ('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]), + + ('not([[lorem]]{ipsum (42)})', [ + ('FUNCTION', 'not', [ + ('[', [ + ('[', [ + ('IDENT', 'lorem'), + ]), + ]), + ('{', [ + ('IDENT', 'ipsum'), + ('S', ' '), + ('(', [ + ('INTEGER', 42), + ]) + ]) + ])]), + + # Close everything at EOF, no error + ('a[b{"d', [ + ('IDENT', 'a'), + ('[', [ + ('IDENT', 'b'), + ('{', [ + ('STRING', 'd'), + ]), + ]), + ]), + + # Any remaining ), ] or } token is a nesting error + ('a[b{d]e}', [ + ('IDENT', 'a'), + ('[', [ + ('IDENT', 'b'), + ('{', [ + ('IDENT', 'd'), + (']', ']'), # The error is visible here + ('IDENT', 'e'), + ]), + ]), + ]), + # ref: + ('a[b{d}e]', [ + ('IDENT', 'a'), + ('[', [ + ('IDENT', 'b'), + ('{', [ + ('IDENT', 'd'), + ]), + ('IDENT', 'e'), + ]), + ]), + ]: + tokens = regroup(tokenize(css_source, ignore_comments=False)) + result = list(jsonify(tokens)) + self.ae(result, expected_tokens) + +def positions(self, tokenize): + css = '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }' + tokens = tokenize(css, ignore_comments=False) + result = [(token.type, token.line, token.column) for token in tokens] + self.ae(result, [ + ('COMMENT', 1, 1), ('S', 2, 9), + ('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3), + ('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10), + ('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16), + ('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26), + ('S', 5, 5), ('}', 5, 6)]) + +def tokens(self, tokenize): + for css_source, expected_tokens in [ + ('', []), + ('red -->', + [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]), + # Longest match rule: no CDC + ('red-->', + [('IDENT', 'red--'), ('DELIM', '>')]), + +(r'''p[example="\ +foo(int x) {\ + this.x = x;\ +}\ +"]''', [ + ('IDENT', 'p'), + ('[', '['), + ('IDENT', 'example'), + ('DELIM', '='), + ('STRING', 'foo(int x) { this.x = x;}'), + (']', ']')]), + + # Numbers are parsed + ('42 .5 -4pX 1.25em 30%', + [('INTEGER', 42), ('S', ' '), + ('NUMBER', .5), ('S', ' '), + # units are normalized to lower-case: + ('DIMENSION', -4, 'px'), ('S', ' '), + ('DIMENSION', 1.25, 'em'), ('S', ' '), + ('PERCENTAGE', 30, '%')]), + + # URLs are extracted + ('url(foo.png)', [('URI', 'foo.png')]), + ('url("foo.png")', [('URI', 'foo.png')]), + + # Escaping + + (r'/* Comment with a \ backslash */', + [('COMMENT', '/* Comment with a \ backslash */')]), # Unchanged + + # backslash followed by a newline in a string: ignored + ('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]), + + # backslash followed by a newline outside a string: stands for itself + ('Lorem\\\nIpsum', [ + ('IDENT', 'Lorem'), ('DELIM', '\\'), + ('S', '\n'), ('IDENT', 'Ipsum')]), + + # Cancel the meaning of special characters + (r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]), # or not specal + (r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]), + (r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]), + (r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]), + (r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]), + (r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]), + (r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]), + (r'Lorem+Ipsum', [('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]), + (r'url(foo\).png)', [('URI', 'foo).png')]), + + # Unicode and backslash escaping + ('\\26 B', [('IDENT', '&B')]), + ('\\&B', [('IDENT', '&B')]), + ('@\\26\tB', [('ATKEYWORD', '@&B')]), + ('@\\&B', [('ATKEYWORD', '@&B')]), + ('#\\26\nB', [('HASH', '#&B')]), + ('#\\&B', [('HASH', '#&B')]), + ('\\26\r\nB(', [('FUNCTION', '&B(')]), + ('\\&B(', [('FUNCTION', '&B(')]), + (r'12.5\000026B', [('DIMENSION', 12.5, '&b')]), + (r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]), # max 6 digits + (r'12.5\&B', [('DIMENSION', 12.5, '&b')]), + (r'"\26 B"', [('STRING', '&B')]), + (r"'\000026B'", [('STRING', '&B')]), + (r'"\&B"', [('STRING', '&B')]), + (r'url("\26 B")', [('URI', '&B')]), + (r'url(\26 B)', [('URI', '&B')]), + (r'url("\&B")', [('URI', '&B')]), + (r'url(\&B)', [('URI', '&B')]), + (r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]), + + # Bad strings + + # String ends at EOF without closing: no error, parsed + ('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]), + # Unescaped newline: ends the string, error, unparsed + ('"Lorem\\26Ipsum\n', [ + ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]), + # Tokenization restarts after the newline, so the second " starts + # a new string (which ends at EOF without errors, as above.) + ('"Lorem\\26Ipsum\ndolor" sit', [ + ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'), + ('IDENT', 'dolor'), ('STRING', ' sit')]), + + ]: + sources = [css_source] + for css_source in sources: + tokens = tokenize(css_source, ignore_comments=False) + result = [ + (token.type, token.value) + ( + () if token.unit is None else (token.unit,)) + for token in tokens + ] + self.ae(result, expected_tokens) + + +class TestTokenizer(BaseTest): + + def run_test(self, func): + for tokenize in tokenizers: + func(self, tokenize) + + def test_token_api(self): + self.run_test(token_api) + + def test_token_serialize_css(self): + self.run_test(token_serialize_css) + + def test_comments(self): + self.run_test(comments) + + def test_token_grouping(self): + self.run_test(token_grouping) + + def test_positions(self): + """Test the reported line/column position of each token.""" + self.run_test(positions) + + def test_tokens(self): + self.run_test(tokens) + diff --git a/src/tinycss/tokenizer.c b/src/tinycss/tokenizer.c index 71eeb65e19..ac691b7d04 100644 --- a/src/tinycss/tokenizer.c +++ b/src/tinycss/tokenizer.c @@ -14,6 +14,7 @@ typedef struct { PyObject_HEAD // Type-specific fields go here. + PyObject *is_container; PyObject *type; PyObject *_as_css; PyObject *value; @@ -26,6 +27,7 @@ typedef struct { static void tokenizer_Token_dealloc(tokenizer_Token* self) { + Py_XDECREF(self->is_container); self->is_container = NULL; Py_XDECREF(self->type); self->type = NULL; Py_XDECREF(self->_as_css); self->_as_css = NULL; Py_XDECREF(self->value); self->value = NULL; @@ -47,6 +49,7 @@ tokenizer_Token_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->ob_type->tp_free((PyObject*)self); return NULL; } Py_INCREF(self->type); Py_INCREF(self->_as_css); Py_INCREF(self->value); Py_INCREF(self->unit); Py_INCREF(self->line); Py_INCREF(self->column); + self->is_container = Py_False; Py_INCREF(self->is_container); return (PyObject *)self; } @@ -81,6 +84,7 @@ tokenizer_Token_as_css(tokenizer_Token *self, PyObject *args, PyObject *kwargs) } static PyMemberDef tokenizer_Token_members[] = { + {"is_container", T_OBJECT_EX, offsetof(tokenizer_Token, is_container), 0, "False unless this token is a container for other tokens"}, {"type", T_OBJECT_EX, offsetof(tokenizer_Token, type), 0, "The token type"}, {"_as_css", T_OBJECT_EX, offsetof(tokenizer_Token, _as_css), 0, "Internal variable, use as_css() method instead."}, {"value", T_OBJECT_EX, offsetof(tokenizer_Token, value), 0, "The token value"}, @@ -217,7 +221,7 @@ static PyObject* clone_unicode(Py_UNICODE *x, Py_ssize_t sz) { #endif PyObject *ans = PyUnicode_FromUnicode(NULL, sz); if (ans == NULL) return PyErr_NoMemory(); - memcpy(PyUnicode_AS_UNICODE(ans), x, sz); + memcpy(PyUnicode_AS_UNICODE(ans), x, sz * sizeof(Py_UNICODE)); return ans; } @@ -237,8 +241,8 @@ tokenize_flat(PyObject *self, PyObject *args) { PyErr_SetString(PyExc_RuntimeError, "tokenizer module not initialized. You must call init() first."); return NULL; } - if (!PyArg_ParseTuple(args, "U|O", &py_source, &ic)) return NULL; - if (ic != NULL && PyObject_IsTrue(ic)) ignore_comments = 1; + if (!PyArg_ParseTuple(args, "UO", &py_source, &ic)) return NULL; + if (PyObject_IsTrue(ic)) ignore_comments = 1; source_len = PyUnicode_GET_SIZE(py_source); css_source = PyUnicode_AS_UNICODE(py_source); @@ -300,8 +304,7 @@ tokenize_flat(PyObject *self, PyObject *args) { if (PyUnicode_GET_SIZE(css_value) > 0) { value = clone_unicode(PyUnicode_AS_UNICODE(css_value), PyUnicode_GET_SIZE(css_value) - 1); if (value == NULL) goto error; - } - else { value = css_value; Py_INCREF(value); } + } else { value = css_value; Py_INCREF(value); } if (value == NULL) goto error; TONUMBER(value); unit = PyUnicode_FromString("%"); @@ -331,7 +334,10 @@ tokenize_flat(PyObject *self, PyObject *args) { item = clone_unicode(PyUnicode_AS_UNICODE(value) + 1, PyUnicode_GET_SIZE(value) - 2); if (item == NULL) goto error; Py_DECREF(value); value = item; item = NULL; + UNESCAPE(value, NEWLINE_UNESCAPE); } + UNESCAPE(value, SIMPLE_UNESCAPE); + UNESCAPE(value, UNICODE_UNESCAPE); } else if (type_ == STRING) { @@ -394,7 +400,7 @@ error: static PyMethodDef tokenizer_methods[] = { {"tokenize_flat", tokenize_flat, METH_VARARGS, - "tokenize_flat()\n\n" + "tokenize_flat(css_source, ignore_comments)\n\n Convert CSS source into a flat list of tokens" }, {"init", tokenize_init, METH_VARARGS, diff --git a/src/tinycss/tokenizer.py b/src/tinycss/tokenizer.py index e5dd02891f..ec02ed22cf 100644 --- a/src/tinycss/tokenizer.py +++ b/src/tinycss/tokenizer.py @@ -14,7 +14,7 @@ from __future__ import unicode_literals -from . import token_data +from tinycss import token_data def tokenize_flat(css_source, ignore_comments=True, @@ -206,11 +206,10 @@ def tokenize_grouped(css_source, ignore_comments=True): # Optional Cython version of tokenize_flat # Make both versions available with explicit names for tests. python_tokenize_flat = tokenize_flat + try: - from . import speedups -except ImportError: - cython_tokenize_flat = None + tok = token_data.load_c_tokenizer() +except (ImportError, RuntimeError): + c_tokenize_flat = None else: - cython_tokenize_flat = speedups.tokenize_flat - # Default to the Cython version if available - tokenize_flat = cython_tokenize_flat + c_tokenize_flat = lambda s, ignore_comments=False:tok.tokenize_flat(s, ignore_comments)