Testing for the C tokenizer

2025-08-30 23:00:21 -04:00 · 2014-05-22 15:23:25 +05:30 · 2014-05-22 15:23:25 +05:30 · 04b45413c6
commit 04b45413c6
parent adac7e6d1e
4 changed files with 299 additions and 268 deletions
--- a/src/tinycss/tests/tokenizer.py
+++ b/src/tinycss/tests/tokenizer.py
@ -1,255 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=utf-8
-from __future__ import (unicode_literals, division, absolute_import,
-                        print_function)
-
-__license__ = 'GPL v3'
-__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
-
-from tinycss.tests import BaseTest
-from tinycss.tokenizer import tokenize_flat as tokenize, regroup
-
-def jsonify(tokens):
-    """Turn tokens into "JSON-compatible" data structures."""
-    for token in tokens:
-        if token.type == 'FUNCTION':
-            yield (token.type, token.function_name,
-                   list(jsonify(token.content)))
-        elif token.is_container:
-            yield token.type, list(jsonify(token.content))
-        else:
-            yield token.type, token.value
-
-
-class TestTokenizer(BaseTest):
-
-    def test_token_api(self):
-        for css_source in [
-                '(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
-        ]:
-            tokens = list(regroup(tokenize(css_source)))
-            self.ae(len(tokens), 1)
-            self.ae(len(tokens[0].content), 7)
-
-    def test_token_serialize_css(self):
-        for css_source in [
-r'''p[example="\
-foo(int x) {\
-    this.x = x;\
-}\
-"]''',
-            '"Lorem\\26Ipsum\ndolor" sit',
-            '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }',
-            'not([[lorem]]{ipsum (42)})',
-            'a[b{d]e}',
-            'a[b{"d',
-        ]:
-            for _regroup in (regroup, lambda x: x):
-                tokens = _regroup(tokenize(css_source, ignore_comments=False))
-                result = ''.join(token.as_css() for token in tokens)
-                self.ae(result, css_source)
-
-    def test_comments(self):
-        for ignore_comments, expected_tokens in [
-            (False, [
-                ('COMMENT', '/* lorem */'),
-                ('S', ' '),
-                ('IDENT', 'ipsum'),
-                ('[', [
-                    ('IDENT', 'dolor'),
-                    ('COMMENT', '/* sit */'),
-                ]),
-                ('BAD_COMMENT', '/* amet')
-            ]),
-            (True, [
-                ('S', ' '),
-                ('IDENT', 'ipsum'),
-                ('[', [
-                    ('IDENT', 'dolor'),
-                ]),
-            ]),
-        ]:
-            css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
-            tokens = regroup(tokenize(css_source, ignore_comments))
-            result = list(jsonify(tokens))
-            self.ae(result, expected_tokens)
-
-    def test_token_grouping(self):
-        for css_source, expected_tokens in [
-            ('', []),
-            (r'Lorem\26 "i\psum"4px', [
-                ('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
-
-            ('not([[lorem]]{ipsum (42)})', [
-                ('FUNCTION', 'not', [
-                    ('[', [
-                        ('[', [
-                            ('IDENT', 'lorem'),
-                        ]),
-                    ]),
-                    ('{', [
-                        ('IDENT', 'ipsum'),
-                        ('S', ' '),
-                        ('(', [
-                            ('INTEGER', 42),
-                        ])
-                    ])
-                ])]),
-
-            # Close everything at EOF, no error
-            ('a[b{"d', [
-                ('IDENT', 'a'),
-                ('[', [
-                    ('IDENT', 'b'),
-                    ('{', [
-                        ('STRING', 'd'),
-                    ]),
-                ]),
-            ]),
-
-            # Any remaining ), ] or } token is a nesting error
-            ('a[b{d]e}', [
-                ('IDENT', 'a'),
-                ('[', [
-                    ('IDENT', 'b'),
-                    ('{', [
-                        ('IDENT', 'd'),
-                        (']', ']'),  # The error is visible here
-                        ('IDENT', 'e'),
-                    ]),
-                ]),
-            ]),
-            # ref:
-            ('a[b{d}e]', [
-                ('IDENT', 'a'),
-                ('[', [
-                    ('IDENT', 'b'),
-                    ('{', [
-                        ('IDENT', 'd'),
-                    ]),
-                    ('IDENT', 'e'),
-                ]),
-            ]),
-        ]:
-            tokens = regroup(tokenize(css_source, ignore_comments=False))
-            result = list(jsonify(tokens))
-            self.ae(result, expected_tokens)
-
-    def test_positions(self):
-        """Test the reported line/column position of each token."""
-        css = '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }'
-        tokens = tokenize(css, ignore_comments=False)
-        result = [(token.type, token.line, token.column) for token in tokens]
-        self.ae(result, [
-            ('COMMENT', 1, 1), ('S', 2, 9),
-            ('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
-            ('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
-            ('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
-            ('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
-            ('S', 5, 5), ('}', 5, 6)])
-
-    def test_tokens(self):
-        for css_source, expected_tokens in [
-            ('', []),
-            ('red -->',
-                [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
-            # Longest match rule: no CDC
-            ('red-->',
-                [('IDENT', 'red--'), ('DELIM', '>')]),
-
-    (r'''p[example="\
-foo(int x) {\
-    this.x = x;\
-}\
-"]''', [
-                ('IDENT', 'p'),
-                ('[', '['),
-                ('IDENT', 'example'),
-                ('DELIM', '='),
-                ('STRING', 'foo(int x) {    this.x = x;}'),
-                (']', ']')]),
-
-            # Numbers are parsed
-            ('42 .5 -4pX 1.25em 30%',
-                [('INTEGER', 42), ('S', ' '),
-                ('NUMBER', .5), ('S', ' '),
-                # units are normalized to lower-case:
-                ('DIMENSION', -4, 'px'), ('S', ' '),
-                ('DIMENSION', 1.25, 'em'), ('S', ' '),
-                ('PERCENTAGE', 30, '%')]),
-
-            # URLs are extracted
-            ('url(foo.png)', [('URI', 'foo.png')]),
-            ('url("foo.png")', [('URI', 'foo.png')]),
-
-            # Escaping
-
-            (r'/* Comment with a \ backslash */',
-                [('COMMENT', '/* Comment with a \ backslash */')]),  # Unchanged
-
-            # backslash followed by a newline in a string: ignored
-            ('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
-
-            # backslash followed by a newline outside a string: stands for itself
-            ('Lorem\\\nIpsum', [
-                ('IDENT', 'Lorem'), ('DELIM', '\\'),
-                ('S', '\n'), ('IDENT', 'Ipsum')]),
-
-            # Cancel the meaning of special characters
-            (r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]),  # or not specal
-            (r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
-            (r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
-            (r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
-            (r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
-            (r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
-            (r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
-            (r'Lorem+Ipsum', [('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
-            (r'url(foo\).png)', [('URI', 'foo).png')]),
-
-            # Unicode and backslash escaping
-            ('\\26 B', [('IDENT', '&B')]),
-            ('\\&B', [('IDENT', '&B')]),
-            ('@\\26\tB', [('ATKEYWORD', '@&B')]),
-            ('@\\&B', [('ATKEYWORD', '@&B')]),
-            ('#\\26\nB', [('HASH', '#&B')]),
-            ('#\\&B', [('HASH', '#&B')]),
-            ('\\26\r\nB(', [('FUNCTION', '&B(')]),
-            ('\\&B(', [('FUNCTION', '&B(')]),
-            (r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
-            (r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]),  # max 6 digits
-            (r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
-            (r'"\26 B"', [('STRING', '&B')]),
-            (r"'\000026B'", [('STRING', '&B')]),
-            (r'"\&B"', [('STRING', '&B')]),
-            (r'url("\26 B")', [('URI', '&B')]),
-            (r'url(\26 B)', [('URI', '&B')]),
-            (r'url("\&B")', [('URI', '&B')]),
-            (r'url(\&B)', [('URI', '&B')]),
-            (r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
-
-            # Bad strings
-
-            # String ends at EOF without closing: no error, parsed
-            ('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
-            # Unescaped newline: ends the string, error, unparsed
-            ('"Lorem\\26Ipsum\n', [
-                ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
-            # Tokenization restarts after the newline, so the second " starts
-            # a new string (which ends at EOF without errors, as above.)
-            ('"Lorem\\26Ipsum\ndolor" sit', [
-                ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
-                ('IDENT', 'dolor'), ('STRING', ' sit')]),
-
-        ]:
-            sources = [css_source]
-            for css_source in sources:
-                tokens = tokenize(css_source, ignore_comments=False)
-                result = [
-                    (token.type, token.value) + (
-                        () if token.unit is None else (token.unit,))
-                    for token in tokens
-                ]
-                self.ae(result, expected_tokens)
-
-
-
--- a/src/tinycss/tests/tokenizing.py
+++ b/src/tinycss/tests/tokenizing.py
@ -0,0 +1,281 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from tinycss.tests import BaseTest
+from tinycss.tokenizer import python_tokenize_flat, c_tokenize_flat, regroup
+
+def jsonify(tokens):
+    """Turn tokens into "JSON-compatible" data structures."""
+    for token in tokens:
+        if token.type == 'FUNCTION':
+            yield (token.type, token.function_name,
+                   list(jsonify(token.content)))
+        elif token.is_container:
+            yield token.type, list(jsonify(token.content))
+        else:
+            yield token.type, token.value
+
+if c_tokenize_flat is None:
+    tokenizers = (python_tokenize_flat,)
+else:
+    tokenizers = (python_tokenize_flat, c_tokenize_flat)
+
+def token_api(self, tokenize):
+    for css_source in [
+            '(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
+    ]:
+        tokens = list(regroup(tokenize(css_source)))
+        self.ae(len(tokens), 1)
+        self.ae(len(tokens[0].content), 7)
+
+def token_serialize_css(self, tokenize):
+    for tokenize in tokenizers:
+        for css_source in [
+r'''p[example="\
+foo(int x) {\
+    this.x = x;\
+}\
+"]''',
+            '"Lorem\\26Ipsum\ndolor" sit',
+            '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }',
+            'not([[lorem]]{ipsum (42)})',
+            'a[b{d]e}',
+            'a[b{"d',
+        ]:
+            for _regroup in (regroup, lambda x: x):
+                tokens = _regroup(tokenize(css_source, ignore_comments=False))
+                result = ''.join(token.as_css() for token in tokens)
+                self.ae(result, css_source)
+
+def comments(self, tokenize):
+    for ignore_comments, expected_tokens in [
+        (False, [
+            ('COMMENT', '/* lorem */'),
+            ('S', ' '),
+            ('IDENT', 'ipsum'),
+            ('[', [
+                ('IDENT', 'dolor'),
+                ('COMMENT', '/* sit */'),
+            ]),
+            ('BAD_COMMENT', '/* amet')
+        ]),
+        (True, [
+            ('S', ' '),
+            ('IDENT', 'ipsum'),
+            ('[', [
+                ('IDENT', 'dolor'),
+            ]),
+        ]),
+    ]:
+        css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
+        tokens = regroup(tokenize(css_source, ignore_comments))
+        result = list(jsonify(tokens))
+        self.ae(result, expected_tokens)
+
+def token_grouping(self, tokenize):
+    for css_source, expected_tokens in [
+        ('', []),
+        (r'Lorem\26 "i\psum"4px', [
+            ('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
+
+        ('not([[lorem]]{ipsum (42)})', [
+            ('FUNCTION', 'not', [
+                ('[', [
+                    ('[', [
+                        ('IDENT', 'lorem'),
+                    ]),
+                ]),
+                ('{', [
+                    ('IDENT', 'ipsum'),
+                    ('S', ' '),
+                    ('(', [
+                        ('INTEGER', 42),
+                    ])
+                ])
+            ])]),
+
+        # Close everything at EOF, no error
+        ('a[b{"d', [
+            ('IDENT', 'a'),
+            ('[', [
+                ('IDENT', 'b'),
+                ('{', [
+                    ('STRING', 'd'),
+                ]),
+            ]),
+        ]),
+
+        # Any remaining ), ] or } token is a nesting error
+        ('a[b{d]e}', [
+            ('IDENT', 'a'),
+            ('[', [
+                ('IDENT', 'b'),
+                ('{', [
+                    ('IDENT', 'd'),
+                    (']', ']'),  # The error is visible here
+                    ('IDENT', 'e'),
+                ]),
+            ]),
+        ]),
+        # ref:
+        ('a[b{d}e]', [
+            ('IDENT', 'a'),
+            ('[', [
+                ('IDENT', 'b'),
+                ('{', [
+                    ('IDENT', 'd'),
+                ]),
+                ('IDENT', 'e'),
+            ]),
+        ]),
+    ]:
+        tokens = regroup(tokenize(css_source, ignore_comments=False))
+        result = list(jsonify(tokens))
+        self.ae(result, expected_tokens)
+
+def positions(self, tokenize):
+    css = '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }'
+    tokens = tokenize(css, ignore_comments=False)
+    result = [(token.type, token.line, token.column) for token in tokens]
+    self.ae(result, [
+        ('COMMENT', 1, 1), ('S', 2, 9),
+        ('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
+        ('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
+        ('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
+        ('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
+        ('S', 5, 5), ('}', 5, 6)])
+
+def tokens(self, tokenize):
+    for css_source, expected_tokens in [
+        ('', []),
+        ('red -->',
+            [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
+        # Longest match rule: no CDC
+        ('red-->',
+            [('IDENT', 'red--'), ('DELIM', '>')]),
+
+(r'''p[example="\
+foo(int x) {\
+    this.x = x;\
+}\
+"]''', [
+            ('IDENT', 'p'),
+            ('[', '['),
+            ('IDENT', 'example'),
+            ('DELIM', '='),
+            ('STRING', 'foo(int x) {    this.x = x;}'),
+            (']', ']')]),
+
+        # Numbers are parsed
+        ('42 .5 -4pX 1.25em 30%',
+            [('INTEGER', 42), ('S', ' '),
+            ('NUMBER', .5), ('S', ' '),
+            # units are normalized to lower-case:
+            ('DIMENSION', -4, 'px'), ('S', ' '),
+            ('DIMENSION', 1.25, 'em'), ('S', ' '),
+            ('PERCENTAGE', 30, '%')]),
+
+        # URLs are extracted
+        ('url(foo.png)', [('URI', 'foo.png')]),
+        ('url("foo.png")', [('URI', 'foo.png')]),
+
+        # Escaping
+
+        (r'/* Comment with a \ backslash */',
+            [('COMMENT', '/* Comment with a \ backslash */')]),  # Unchanged
+
+        # backslash followed by a newline in a string: ignored
+        ('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
+
+        # backslash followed by a newline outside a string: stands for itself
+        ('Lorem\\\nIpsum', [
+            ('IDENT', 'Lorem'), ('DELIM', '\\'),
+            ('S', '\n'), ('IDENT', 'Ipsum')]),
+
+        # Cancel the meaning of special characters
+        (r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]),  # or not specal
+        (r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
+        (r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
+        (r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
+        (r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
+        (r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
+        (r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
+        (r'Lorem+Ipsum', [('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
+        (r'url(foo\).png)', [('URI', 'foo).png')]),
+
+        # Unicode and backslash escaping
+        ('\\26 B', [('IDENT', '&B')]),
+        ('\\&B', [('IDENT', '&B')]),
+        ('@\\26\tB', [('ATKEYWORD', '@&B')]),
+        ('@\\&B', [('ATKEYWORD', '@&B')]),
+        ('#\\26\nB', [('HASH', '#&B')]),
+        ('#\\&B', [('HASH', '#&B')]),
+        ('\\26\r\nB(', [('FUNCTION', '&B(')]),
+        ('\\&B(', [('FUNCTION', '&B(')]),
+        (r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
+        (r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]),  # max 6 digits
+        (r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
+        (r'"\26 B"', [('STRING', '&B')]),
+        (r"'\000026B'", [('STRING', '&B')]),
+        (r'"\&B"', [('STRING', '&B')]),
+        (r'url("\26 B")', [('URI', '&B')]),
+        (r'url(\26 B)', [('URI', '&B')]),
+        (r'url("\&B")', [('URI', '&B')]),
+        (r'url(\&B)', [('URI', '&B')]),
+        (r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
+
+        # Bad strings
+
+        # String ends at EOF without closing: no error, parsed
+        ('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
+        # Unescaped newline: ends the string, error, unparsed
+        ('"Lorem\\26Ipsum\n', [
+            ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
+        # Tokenization restarts after the newline, so the second " starts
+        # a new string (which ends at EOF without errors, as above.)
+        ('"Lorem\\26Ipsum\ndolor" sit', [
+            ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
+            ('IDENT', 'dolor'), ('STRING', ' sit')]),
+
+    ]:
+        sources = [css_source]
+        for css_source in sources:
+            tokens = tokenize(css_source, ignore_comments=False)
+            result = [
+                (token.type, token.value) + (
+                    () if token.unit is None else (token.unit,))
+                for token in tokens
+            ]
+            self.ae(result, expected_tokens)
+
+
+class TestTokenizer(BaseTest):
+
+    def run_test(self, func):
+        for tokenize in tokenizers:
+            func(self, tokenize)
+
+    def test_token_api(self):
+        self.run_test(token_api)
+
+    def test_token_serialize_css(self):
+        self.run_test(token_serialize_css)
+
+    def test_comments(self):
+        self.run_test(comments)
+
+    def test_token_grouping(self):
+        self.run_test(token_grouping)
+
+    def test_positions(self):
+        """Test the reported line/column position of each token."""
+        self.run_test(positions)
+
+    def test_tokens(self):
+        self.run_test(tokens)
+
--- a/src/tinycss/tokenizer.c
+++ b/src/tinycss/tokenizer.c
@ -14,6 +14,7 @@
 typedef struct {
    PyObject_HEAD
    // Type-specific fields go here.
+    PyObject *is_container;
    PyObject *type;
    PyObject *_as_css;
    PyObject *value;
@ -26,6 +27,7 @@ typedef struct {
 static void
 tokenizer_Token_dealloc(tokenizer_Token* self)
 {
+    Py_XDECREF(self->is_container); self->is_container = NULL;
    Py_XDECREF(self->type); self->type = NULL;
    Py_XDECREF(self->_as_css); self->_as_css = NULL;
    Py_XDECREF(self->value); self->value = NULL;
@ -47,6 +49,7 @@ tokenizer_Token_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
        self->ob_type->tp_free((PyObject*)self); return NULL;
    }
    Py_INCREF(self->type); Py_INCREF(self->_as_css); Py_INCREF(self->value); Py_INCREF(self->unit); Py_INCREF(self->line); Py_INCREF(self->column);
+    self->is_container = Py_False; Py_INCREF(self->is_container);

    return (PyObject *)self;
 }
@ -81,6 +84,7 @@ tokenizer_Token_as_css(tokenizer_Token *self, PyObject *args, PyObject *kwargs)
 }

 static PyMemberDef tokenizer_Token_members[] = {
+    {"is_container", T_OBJECT_EX, offsetof(tokenizer_Token, is_container), 0, "False unless this token is a  container for other tokens"},
    {"type", T_OBJECT_EX, offsetof(tokenizer_Token, type), 0, "The token type"},
    {"_as_css", T_OBJECT_EX, offsetof(tokenizer_Token, _as_css), 0, "Internal variable, use as_css() method instead."},
    {"value", T_OBJECT_EX, offsetof(tokenizer_Token, value), 0, "The token value"},
@ -217,7 +221,7 @@ static PyObject* clone_unicode(Py_UNICODE *x, Py_ssize_t sz) {
 #endif
    PyObject *ans = PyUnicode_FromUnicode(NULL, sz);
    if (ans == NULL) return PyErr_NoMemory();
-    memcpy(PyUnicode_AS_UNICODE(ans), x, sz);
+    memcpy(PyUnicode_AS_UNICODE(ans), x, sz * sizeof(Py_UNICODE));
    return ans;
 }

@ -237,8 +241,8 @@ tokenize_flat(PyObject *self, PyObject *args) {
        PyErr_SetString(PyExc_RuntimeError, "tokenizer module not initialized. You must call init() first."); return NULL;
    }

-    if (!PyArg_ParseTuple(args, "U|O", &py_source, &ic)) return NULL;
-    if (ic != NULL && PyObject_IsTrue(ic)) ignore_comments = 1;
+    if (!PyArg_ParseTuple(args, "UO", &py_source, &ic)) return NULL;
+    if (PyObject_IsTrue(ic)) ignore_comments = 1;
    source_len = PyUnicode_GET_SIZE(py_source);
    css_source = PyUnicode_AS_UNICODE(py_source);

@ -300,8 +304,7 @@ tokenize_flat(PyObject *self, PyObject *args) {
                if (PyUnicode_GET_SIZE(css_value) > 0) {
                    value = clone_unicode(PyUnicode_AS_UNICODE(css_value), PyUnicode_GET_SIZE(css_value) - 1);
                    if (value == NULL) goto error;
-                }
-                else { value = css_value; Py_INCREF(value); }
+                } else { value = css_value; Py_INCREF(value); }
                if (value == NULL) goto error;
                TONUMBER(value);
                unit = PyUnicode_FromString("%");
@ -331,7 +334,10 @@ tokenize_flat(PyObject *self, PyObject *args) {
                    item = clone_unicode(PyUnicode_AS_UNICODE(value) + 1, PyUnicode_GET_SIZE(value) - 2);
                    if (item == NULL) goto error;
                    Py_DECREF(value); value = item; item = NULL;
+                    UNESCAPE(value, NEWLINE_UNESCAPE);
                }
+                UNESCAPE(value, SIMPLE_UNESCAPE);
+                UNESCAPE(value, UNICODE_UNESCAPE);
            } else

            if (type_ == STRING) {
@ -394,7 +400,7 @@ error:

 static PyMethodDef tokenizer_methods[] = {
    {"tokenize_flat", tokenize_flat, METH_VARARGS,
-        "tokenize_flat()\n\n"
+        "tokenize_flat(css_source, ignore_comments)\n\n Convert CSS source into a flat list of tokens"
    },

    {"init", tokenize_init, METH_VARARGS,
--- a/src/tinycss/tokenizer.py
+++ b/src/tinycss/tokenizer.py
@ -14,7 +14,7 @@

 from __future__ import unicode_literals

-from . import token_data
+from tinycss import token_data


 def tokenize_flat(css_source, ignore_comments=True,
@ -206,11 +206,10 @@ def tokenize_grouped(css_source, ignore_comments=True):
 # Optional Cython version of tokenize_flat
 # Make both versions available with explicit names for tests.
 python_tokenize_flat = tokenize_flat
+
 try:
-    from . import speedups
-except ImportError:
-    cython_tokenize_flat = None
+    tok = token_data.load_c_tokenizer()
+except (ImportError, RuntimeError):
+    c_tokenize_flat = None
 else:
-    cython_tokenize_flat = speedups.tokenize_flat
-    # Default to the Cython version if available
-    tokenize_flat = cython_tokenize_flat
+    c_tokenize_flat = lambda s, ignore_comments=False:tok.tokenize_flat(s, ignore_comments)