Testing for the C tokenizer

2025-07-09 03:04:10 -04:00 · 2014-05-22 15:23:25 +05:30 · 2014-05-22 15:23:25 +05:30 · 04b45413c6
commit 04b45413c6
parent adac7e6d1e
4 changed files with 299 additions and 268 deletions
--- a/src/tinycss/tests/tokenizer.py
+++ b/src/tinycss/tests/tokenizer.py
@ -1,255 +0,0 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__ = 'GPL v3'
 __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
 from tinycss.tests import BaseTest
 from tinycss.tokenizer import tokenize_flat as tokenize, regroup
 def jsonify(tokens):
    """Turn tokens into "JSON-compatible" data structures."""
    for token in tokens:
        if token.type == 'FUNCTION':
            yield (token.type, token.function_name,
                   list(jsonify(token.content)))
        elif token.is_container:
            yield token.type, list(jsonify(token.content))
        else:
            yield token.type, token.value
 class TestTokenizer(BaseTest):
    def test_token_api(self):
        for css_source in [
                '(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
        ]:
            tokens = list(regroup(tokenize(css_source)))
            self.ae(len(tokens), 1)
            self.ae(len(tokens[0].content), 7)
    def test_token_serialize_css(self):
        for css_source in [
 r'''p[example="\
 foo(int x) {\
    this.x = x;\
 }\
 "]''',
            '"Lorem\\26Ipsum\ndolor" sit',
            '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }',
            'not([[lorem]]{ipsum (42)})',
            'a[b{d]e}',
            'a[b{"d',
        ]:
            for _regroup in (regroup, lambda x: x):
                tokens = _regroup(tokenize(css_source, ignore_comments=False))
                result = ''.join(token.as_css() for token in tokens)
                self.ae(result, css_source)
    def test_comments(self):
        for ignore_comments, expected_tokens in [
            (False, [
                ('COMMENT', '/* lorem */'),
                ('S', ' '),
                ('IDENT', 'ipsum'),
                ('[', [
                    ('IDENT', 'dolor'),
                    ('COMMENT', '/* sit */'),
                ]),
                ('BAD_COMMENT', '/* amet')
            ]),
            (True, [
                ('S', ' '),
                ('IDENT', 'ipsum'),
                ('[', [
                    ('IDENT', 'dolor'),
                ]),
            ]),
        ]:
            css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
            tokens = regroup(tokenize(css_source, ignore_comments))
            result = list(jsonify(tokens))
            self.ae(result, expected_tokens)
    def test_token_grouping(self):
        for css_source, expected_tokens in [
            ('', []),
            (r'Lorem\26 "i\psum"4px', [
                ('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
            ('not([[lorem]]{ipsum (42)})', [
                ('FUNCTION', 'not', [
                    ('[', [
                        ('[', [
                            ('IDENT', 'lorem'),
                        ]),
                    ]),
                    ('{', [
                        ('IDENT', 'ipsum'),
                        ('S', ' '),
                        ('(', [
                            ('INTEGER', 42),
                        ])
                    ])
                ])]),
            # Close everything at EOF, no error
            ('a[b{"d', [
                ('IDENT', 'a'),
                ('[', [
                    ('IDENT', 'b'),
                    ('{', [
                        ('STRING', 'd'),
                    ]),
                ]),
            ]),
            # Any remaining ), ] or } token is a nesting error
            ('a[b{d]e}', [
                ('IDENT', 'a'),
                ('[', [
                    ('IDENT', 'b'),
                    ('{', [
                        ('IDENT', 'd'),
                        (']', ']'),  # The error is visible here
                        ('IDENT', 'e'),
                    ]),
                ]),
            ]),
            # ref:
            ('a[b{d}e]', [
                ('IDENT', 'a'),
                ('[', [
                    ('IDENT', 'b'),
                    ('{', [
                        ('IDENT', 'd'),
                    ]),
                    ('IDENT', 'e'),
                ]),
            ]),
        ]:
            tokens = regroup(tokenize(css_source, ignore_comments=False))
            result = list(jsonify(tokens))
            self.ae(result, expected_tokens)
    def test_positions(self):
        """Test the reported line/column position of each token."""
        css = '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }'
        tokens = tokenize(css, ignore_comments=False)
        result = [(token.type, token.line, token.column) for token in tokens]
        self.ae(result, [
            ('COMMENT', 1, 1), ('S', 2, 9),
            ('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
            ('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
            ('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
            ('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
            ('S', 5, 5), ('}', 5, 6)])
    def test_tokens(self):
        for css_source, expected_tokens in [
            ('', []),
            ('red -->',
                [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
            # Longest match rule: no CDC
            ('red-->',
                [('IDENT', 'red--'), ('DELIM', '>')]),
    (r'''p[example="\
 foo(int x) {\
    this.x = x;\
 }\
 "]''', [
                ('IDENT', 'p'),
                ('[', '['),
                ('IDENT', 'example'),
                ('DELIM', '='),
                ('STRING', 'foo(int x) {    this.x = x;}'),
                (']', ']')]),
            # Numbers are parsed
            ('42 .5 -4pX 1.25em 30%',
                [('INTEGER', 42), ('S', ' '),
                ('NUMBER', .5), ('S', ' '),
                # units are normalized to lower-case:
                ('DIMENSION', -4, 'px'), ('S', ' '),
                ('DIMENSION', 1.25, 'em'), ('S', ' '),
                ('PERCENTAGE', 30, '%')]),
            # URLs are extracted
            ('url(foo.png)', [('URI', 'foo.png')]),
            ('url("foo.png")', [('URI', 'foo.png')]),
            # Escaping
            (r'/* Comment with a \ backslash */',
                [('COMMENT', '/* Comment with a \ backslash */')]),  # Unchanged
            # backslash followed by a newline in a string: ignored
            ('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
            # backslash followed by a newline outside a string: stands for itself
            ('Lorem\\\nIpsum', [
                ('IDENT', 'Lorem'), ('DELIM', '\\'),
                ('S', '\n'), ('IDENT', 'Ipsum')]),
            # Cancel the meaning of special characters
            (r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]),  # or not specal
            (r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
            (r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
            (r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
            (r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
            (r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
            (r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
            (r'Lorem+Ipsum', [('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
            (r'url(foo\).png)', [('URI', 'foo).png')]),
            # Unicode and backslash escaping
            ('\\26 B', [('IDENT', '&B')]),
            ('\\&B', [('IDENT', '&B')]),
            ('@\\26\tB', [('ATKEYWORD', '@&B')]),
            ('@\\&B', [('ATKEYWORD', '@&B')]),
            ('#\\26\nB', [('HASH', '#&B')]),
            ('#\\&B', [('HASH', '#&B')]),
            ('\\26\r\nB(', [('FUNCTION', '&B(')]),
            ('\\&B(', [('FUNCTION', '&B(')]),
            (r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
            (r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]),  # max 6 digits
            (r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
            (r'"\26 B"', [('STRING', '&B')]),
            (r"'\000026B'", [('STRING', '&B')]),
            (r'"\&B"', [('STRING', '&B')]),
            (r'url("\26 B")', [('URI', '&B')]),
            (r'url(\26 B)', [('URI', '&B')]),
            (r'url("\&B")', [('URI', '&B')]),
            (r'url(\&B)', [('URI', '&B')]),
            (r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
            # Bad strings
            # String ends at EOF without closing: no error, parsed
            ('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
            # Unescaped newline: ends the string, error, unparsed
            ('"Lorem\\26Ipsum\n', [
                ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
            # Tokenization restarts after the newline, so the second " starts
            # a new string (which ends at EOF without errors, as above.)
            ('"Lorem\\26Ipsum\ndolor" sit', [
                ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
                ('IDENT', 'dolor'), ('STRING', ' sit')]),
        ]:
            sources = [css_source]
            for css_source in sources:
                tokens = tokenize(css_source, ignore_comments=False)
                result = [
                    (token.type, token.value) + (
                        () if token.unit is None else (token.unit,))
                    for token in tokens
                ]
                self.ae(result, expected_tokens)
--- a/src/tinycss/tests/tokenizing.py
+++ b/src/tinycss/tests/tokenizing.py
@ -0,0 +1,281 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__ = 'GPL v3'
 __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
 from tinycss.tests import BaseTest
 from tinycss.tokenizer import python_tokenize_flat, c_tokenize_flat, regroup
 def jsonify(tokens):
    """Turn tokens into "JSON-compatible" data structures."""
    for token in tokens:
        if token.type == 'FUNCTION':
            yield (token.type, token.function_name,
                   list(jsonify(token.content)))
        elif token.is_container:
            yield token.type, list(jsonify(token.content))
        else:
            yield token.type, token.value
 if c_tokenize_flat is None:
    tokenizers = (python_tokenize_flat,)
 else:
    tokenizers = (python_tokenize_flat, c_tokenize_flat)
 def token_api(self, tokenize):
    for css_source in [
            '(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
    ]:
        tokens = list(regroup(tokenize(css_source)))
        self.ae(len(tokens), 1)
        self.ae(len(tokens[0].content), 7)
 def token_serialize_css(self, tokenize):
    for tokenize in tokenizers:
        for css_source in [
 r'''p[example="\
 foo(int x) {\
    this.x = x;\
 }\
 "]''',
            '"Lorem\\26Ipsum\ndolor" sit',
            '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }',
            'not([[lorem]]{ipsum (42)})',
            'a[b{d]e}',
            'a[b{"d',
        ]:
            for _regroup in (regroup, lambda x: x):
                tokens = _regroup(tokenize(css_source, ignore_comments=False))
                result = ''.join(token.as_css() for token in tokens)
                self.ae(result, css_source)
 def comments(self, tokenize):
    for ignore_comments, expected_tokens in [
        (False, [
            ('COMMENT', '/* lorem */'),
            ('S', ' '),
            ('IDENT', 'ipsum'),
            ('[', [
                ('IDENT', 'dolor'),
                ('COMMENT', '/* sit */'),
            ]),
            ('BAD_COMMENT', '/* amet')
        ]),
        (True, [
            ('S', ' '),
            ('IDENT', 'ipsum'),
            ('[', [
                ('IDENT', 'dolor'),
            ]),
        ]),
    ]:
        css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
        tokens = regroup(tokenize(css_source, ignore_comments))
        result = list(jsonify(tokens))
        self.ae(result, expected_tokens)
 def token_grouping(self, tokenize):
    for css_source, expected_tokens in [
        ('', []),
        (r'Lorem\26 "i\psum"4px', [
            ('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
        ('not([[lorem]]{ipsum (42)})', [
            ('FUNCTION', 'not', [
                ('[', [
                    ('[', [
                        ('IDENT', 'lorem'),
                    ]),
                ]),
                ('{', [
                    ('IDENT', 'ipsum'),
                    ('S', ' '),
                    ('(', [
                        ('INTEGER', 42),
                    ])
                ])
            ])]),
        # Close everything at EOF, no error
        ('a[b{"d', [
            ('IDENT', 'a'),
            ('[', [
                ('IDENT', 'b'),
                ('{', [
                    ('STRING', 'd'),
                ]),
            ]),
        ]),
        # Any remaining ), ] or } token is a nesting error
        ('a[b{d]e}', [
            ('IDENT', 'a'),
            ('[', [
                ('IDENT', 'b'),
                ('{', [
                    ('IDENT', 'd'),
                    (']', ']'),  # The error is visible here
                    ('IDENT', 'e'),
                ]),
            ]),
        ]),
        # ref:
        ('a[b{d}e]', [
            ('IDENT', 'a'),
            ('[', [
                ('IDENT', 'b'),
                ('{', [
                    ('IDENT', 'd'),
                ]),
                ('IDENT', 'e'),
            ]),
        ]),
    ]:
        tokens = regroup(tokenize(css_source, ignore_comments=False))
        result = list(jsonify(tokens))
        self.ae(result, expected_tokens)
 def positions(self, tokenize):
    css = '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }'
    tokens = tokenize(css, ignore_comments=False)
    result = [(token.type, token.line, token.column) for token in tokens]
    self.ae(result, [
        ('COMMENT', 1, 1), ('S', 2, 9),
        ('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
        ('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
        ('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
        ('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
        ('S', 5, 5), ('}', 5, 6)])
 def tokens(self, tokenize):
    for css_source, expected_tokens in [
        ('', []),
        ('red -->',
            [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
        # Longest match rule: no CDC
        ('red-->',
            [('IDENT', 'red--'), ('DELIM', '>')]),
 (r'''p[example="\
 foo(int x) {\
    this.x = x;\
 }\
 "]''', [
            ('IDENT', 'p'),
            ('[', '['),
            ('IDENT', 'example'),
            ('DELIM', '='),
            ('STRING', 'foo(int x) {    this.x = x;}'),
            (']', ']')]),
        # Numbers are parsed
        ('42 .5 -4pX 1.25em 30%',
            [('INTEGER', 42), ('S', ' '),
            ('NUMBER', .5), ('S', ' '),
            # units are normalized to lower-case:
            ('DIMENSION', -4, 'px'), ('S', ' '),
            ('DIMENSION', 1.25, 'em'), ('S', ' '),
            ('PERCENTAGE', 30, '%')]),
        # URLs are extracted
        ('url(foo.png)', [('URI', 'foo.png')]),
        ('url("foo.png")', [('URI', 'foo.png')]),
        # Escaping
        (r'/* Comment with a \ backslash */',
            [('COMMENT', '/* Comment with a \ backslash */')]),  # Unchanged
        # backslash followed by a newline in a string: ignored
        ('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
        # backslash followed by a newline outside a string: stands for itself
        ('Lorem\\\nIpsum', [
            ('IDENT', 'Lorem'), ('DELIM', '\\'),
            ('S', '\n'), ('IDENT', 'Ipsum')]),
        # Cancel the meaning of special characters
        (r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]),  # or not specal
        (r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
        (r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
        (r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
        (r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
        (r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
        (r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
        (r'Lorem+Ipsum', [('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
        (r'url(foo\).png)', [('URI', 'foo).png')]),
        # Unicode and backslash escaping
        ('\\26 B', [('IDENT', '&B')]),
        ('\\&B', [('IDENT', '&B')]),
        ('@\\26\tB', [('ATKEYWORD', '@&B')]),
        ('@\\&B', [('ATKEYWORD', '@&B')]),
        ('#\\26\nB', [('HASH', '#&B')]),
        ('#\\&B', [('HASH', '#&B')]),
        ('\\26\r\nB(', [('FUNCTION', '&B(')]),
        ('\\&B(', [('FUNCTION', '&B(')]),
        (r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
        (r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]),  # max 6 digits
        (r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
        (r'"\26 B"', [('STRING', '&B')]),
        (r"'\000026B'", [('STRING', '&B')]),
        (r'"\&B"', [('STRING', '&B')]),
        (r'url("\26 B")', [('URI', '&B')]),
        (r'url(\26 B)', [('URI', '&B')]),
        (r'url("\&B")', [('URI', '&B')]),
        (r'url(\&B)', [('URI', '&B')]),
        (r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
        # Bad strings
        # String ends at EOF without closing: no error, parsed
        ('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
        # Unescaped newline: ends the string, error, unparsed
        ('"Lorem\\26Ipsum\n', [
            ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
        # Tokenization restarts after the newline, so the second " starts
        # a new string (which ends at EOF without errors, as above.)
        ('"Lorem\\26Ipsum\ndolor" sit', [
            ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
            ('IDENT', 'dolor'), ('STRING', ' sit')]),
    ]:
        sources = [css_source]
        for css_source in sources:
            tokens = tokenize(css_source, ignore_comments=False)
            result = [
                (token.type, token.value) + (
                    () if token.unit is None else (token.unit,))
                for token in tokens
            ]
            self.ae(result, expected_tokens)
 class TestTokenizer(BaseTest):
    def run_test(self, func):
        for tokenize in tokenizers:
            func(self, tokenize)
    def test_token_api(self):
        self.run_test(token_api)
    def test_token_serialize_css(self):
        self.run_test(token_serialize_css)
    def test_comments(self):
        self.run_test(comments)
    def test_token_grouping(self):
        self.run_test(token_grouping)
    def test_positions(self):
        """Test the reported line/column position of each token."""
        self.run_test(positions)
    def test_tokens(self):
        self.run_test(tokens)
--- a/src/tinycss/tokenizer.c
+++ b/src/tinycss/tokenizer.c
@ -14,6 +14,7 @@
 typedef struct {
    PyObject_HEAD
    // Type-specific fields go here.
    PyObject *is_container;
    PyObject *type;
    PyObject *_as_css;
    PyObject *value;
@ -26,6 +27,7 @@ typedef struct {
 static void
 tokenizer_Token_dealloc(tokenizer_Token* self)
 {
    Py_XDECREF(self->is_container); self->is_container = NULL;
    Py_XDECREF(self->type); self->type = NULL;
    Py_XDECREF(self->_as_css); self->_as_css = NULL;
    Py_XDECREF(self->value); self->value = NULL;
@ -47,6 +49,7 @@ tokenizer_Token_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
        self->ob_type->tp_free((PyObject*)self); return NULL;
    }
    Py_INCREF(self->type); Py_INCREF(self->_as_css); Py_INCREF(self->value); Py_INCREF(self->unit); Py_INCREF(self->line); Py_INCREF(self->column);
    self->is_container = Py_False; Py_INCREF(self->is_container);
    return (PyObject *)self;
 }
@ -81,6 +84,7 @@ tokenizer_Token_as_css(tokenizer_Token *self, PyObject *args, PyObject *kwargs)
 }
 static PyMemberDef tokenizer_Token_members[] = {
    {"is_container", T_OBJECT_EX, offsetof(tokenizer_Token, is_container), 0, "False unless this token is a  container for other tokens"},
    {"type", T_OBJECT_EX, offsetof(tokenizer_Token, type), 0, "The token type"},
    {"_as_css", T_OBJECT_EX, offsetof(tokenizer_Token, _as_css), 0, "Internal variable, use as_css() method instead."},
    {"value", T_OBJECT_EX, offsetof(tokenizer_Token, value), 0, "The token value"},
@ -217,7 +221,7 @@ static PyObject* clone_unicode(Py_UNICODE *x, Py_ssize_t sz) {
 #endif
    PyObject *ans = PyUnicode_FromUnicode(NULL, sz);
    if (ans == NULL) return PyErr_NoMemory();
-    memcpy(PyUnicode_AS_UNICODE(ans), x, sz);
+    memcpy(PyUnicode_AS_UNICODE(ans), x, sz * sizeof(Py_UNICODE));
    return ans;
 }
@ -237,8 +241,8 @@ tokenize_flat(PyObject *self, PyObject *args) {
        PyErr_SetString(PyExc_RuntimeError, "tokenizer module not initialized. You must call init() first."); return NULL;
    }
-    if (!PyArg_ParseTuple(args, "U|O", &py_source, &ic)) return NULL;
+    if (!PyArg_ParseTuple(args, "UO", &py_source, &ic)) return NULL;
-    if (ic != NULL && PyObject_IsTrue(ic)) ignore_comments = 1;
+    if (PyObject_IsTrue(ic)) ignore_comments = 1;
    source_len = PyUnicode_GET_SIZE(py_source);
    css_source = PyUnicode_AS_UNICODE(py_source);
@ -300,8 +304,7 @@ tokenize_flat(PyObject *self, PyObject *args) {
                if (PyUnicode_GET_SIZE(css_value) > 0) {
                    value = clone_unicode(PyUnicode_AS_UNICODE(css_value), PyUnicode_GET_SIZE(css_value) - 1);
                    if (value == NULL) goto error;
-                }
+                } else { value = css_value; Py_INCREF(value); }
                else { value = css_value; Py_INCREF(value); }
                if (value == NULL) goto error;
                TONUMBER(value);
                unit = PyUnicode_FromString("%");
@ -331,7 +334,10 @@ tokenize_flat(PyObject *self, PyObject *args) {
                    item = clone_unicode(PyUnicode_AS_UNICODE(value) + 1, PyUnicode_GET_SIZE(value) - 2);
                    if (item == NULL) goto error;
                    Py_DECREF(value); value = item; item = NULL;
                    UNESCAPE(value, NEWLINE_UNESCAPE);
                }
                UNESCAPE(value, SIMPLE_UNESCAPE);
                UNESCAPE(value, UNICODE_UNESCAPE);
            } else
            if (type_ == STRING) {
@ -394,7 +400,7 @@ error:
 static PyMethodDef tokenizer_methods[] = {
    {"tokenize_flat", tokenize_flat, METH_VARARGS,
-        "tokenize_flat()\n\n"
+        "tokenize_flat(css_source, ignore_comments)\n\n Convert CSS source into a flat list of tokens"
    },
    {"init", tokenize_init, METH_VARARGS,
--- a/src/tinycss/tokenizer.py
+++ b/src/tinycss/tokenizer.py
@ -14,7 +14,7 @@
 from __future__ import unicode_literals
-from . import token_data
+from tinycss import token_data
 def tokenize_flat(css_source, ignore_comments=True,
@ -206,11 +206,10 @@ def tokenize_grouped(css_source, ignore_comments=True):
 # Optional Cython version of tokenize_flat
 # Make both versions available with explicit names for tests.
 python_tokenize_flat = tokenize_flat
 try:
-    from . import speedups
+    tok = token_data.load_c_tokenizer()
-except ImportError:
+except (ImportError, RuntimeError):
-    cython_tokenize_flat = None
+    c_tokenize_flat = None
 else:
-    cython_tokenize_flat = speedups.tokenize_flat
+    c_tokenize_flat = lambda s, ignore_comments=False:tok.tokenize_flat(s, ignore_comments)
    # Default to the Cython version if available
    tokenize_flat = cython_tokenize_flat