Initial import of tinycss

2025-07-09 03:04:10 -04:00 · 2014-05-21 10:15:50 +05:30 · 2014-05-21 10:15:50 +05:30 · d993534dfc
commit d993534dfc
parent 062d38a156
10 changed files with 2481 additions and 0 deletions
--- a/4
+++ b/4
@ -53,6 +53,10 @@ License: other
 are permitted in any medium without royalty provided the copyright
 notice and this notice are preserved.
 Files: src/tinycss/*
 Copyright: Simon Sapin
 License: BSD
 Files: src/calibre/ebooks/readability/*
 Copyright: Unknown
 License: Apache 2.0
--- a/src/tinycss/init.py
+++ b/src/tinycss/init.py
@ -0,0 +1,44 @@
 # coding: utf8
 """
    tinycss
    -------
    A CSS parser, and nothing else.
    :copyright: (c) 2012 by Simon Sapin.
    :license: BSD, see LICENSE for more details.
 """
 import sys
 from .version import VERSION
 __version__ = VERSION
 from .css21 import CSS21Parser
 from .page3 import CSSPage3Parser
 PARSER_MODULES = {
    'page3': CSSPage3Parser,
 }
 def make_parser(*features, **kwargs):
    """Make a parser object with the chosen features.
    :param features:
        Positional arguments are base classes the new parser class will extend.
        The string ``'page3'`` is accepted as short for
        :class:`~page3.CSSPage3Parser`.
    :param kwargs:
        Keyword arguments are passed to the parser’s constructor.
    :returns:
        An instance of a new subclass of :class:`CSS21Parser`
    """
    if features:
        bases = tuple(PARSER_MODULES.get(f, f) for f in features)
        parser_class = type('CustomCSSParser', bases + (CSS21Parser,), {})
    else:
        parser_class = CSS21Parser
    return parser_class(**kwargs)
--- a/src/tinycss/color3.py
+++ b/src/tinycss/color3.py
@ -0,0 +1,382 @@
 # coding: utf8
 """
    tinycss.colors3
    ---------------
    Parser for CSS 3 color values
    http://www.w3.org/TR/css3-color/
    This module does not provide anything that integrates in a parser class,
    only functions that parse single tokens from (eg.) a property value.
    :copyright: (c) 2012 by Simon Sapin.
    :license: BSD, see LICENSE for more details.
 """
 from __future__ import unicode_literals, division
 import collections
 import itertools
 import re
 from .tokenizer import tokenize_grouped
 class RGBA(collections.namedtuple('RGBA', ['red', 'green', 'blue', 'alpha'])):
    """An RGBA color.
    A tuple of four floats in the 0..1 range: ``(r, g, b, a)``.
    Also has ``red``, ``green``, ``blue`` and ``alpha`` attributes to access
    the same values.
    """
 def parse_color_string(css_string):
    """Parse a CSS string as a color value.
    This is a convenience wrapper around :func:`parse_color` in case you
    have a string that is not from a CSS stylesheet.
    :param css_string:
        An unicode string in CSS syntax.
    :returns:
        Same as :func:`parse_color`.
    """
    tokens = list(tokenize_grouped(css_string.strip()))
    if len(tokens) == 1:
        return parse_color(tokens[0])
 def parse_color(token):
    """Parse single token as a color value.
    :param token:
        A single :class:`~.token_data.Token` or
        :class:`~.token_data.ContainerToken`, as found eg. in a
        property value.
    :returns:
        * ``None``, if the token is not a valid CSS 3 color value.
          (No exception is raised.)
        * For the *currentColor* keyword: the string ``'currentColor'``
        * Every other values (including keywords, HSL and HSLA) is converted
          to RGBA and returned as an :class:`RGBA` object (a 4-tuple with
          attribute access).
          The alpha channel is clipped to [0, 1], but R, G, or B can be
          out of range (eg. ``rgb(-51, 306, 0)`` is represented as
          ``(-.2, 1.2, 0, 1)``.)
    """
    if token.type == 'IDENT':
        return COLOR_KEYWORDS.get(token.value.lower())
    elif token.type == 'HASH':
        for multiplier, regexp in HASH_REGEXPS:
            match = regexp(token.value)
            if match:
                r, g, b = [int(group * multiplier, 16) / 255
                           for group in match.groups()]
                return RGBA(r, g, b, 1.)
    elif token.type == 'FUNCTION':
        args = parse_comma_separated(token.content)
        if args:
            name = token.function_name.lower()
            if name == 'rgb':
                return parse_rgb(args, alpha=1.)
            elif name == 'rgba':
                alpha = parse_alpha(args[3:])
                if alpha is not None:
                    return parse_rgb(args[:3], alpha)
            elif name == 'hsl':
                return parse_hsl(args, alpha=1.)
            elif name == 'hsla':
                alpha = parse_alpha(args[3:])
                if alpha is not None:
                    return parse_hsl(args[:3], alpha)
 def parse_alpha(args):
    """
    If args is a list of a single INTEGER or NUMBER token,
    retur its value clipped to the 0..1 range
    Otherwise, return None.
    """
    if len(args) == 1 and args[0].type in ('NUMBER', 'INTEGER'):
        return min(1, max(0, args[0].value))
 def parse_rgb(args, alpha):
    """
    If args is a list of 3 INTEGER tokens or 3 PERCENTAGE tokens,
    return RGB values as a tuple of 3 floats in 0..1.
    Otherwise, return None.
    """
    types = [arg.type for arg in args]
    if types == ['INTEGER', 'INTEGER', 'INTEGER']:
        r, g, b = [arg.value / 255 for arg in args[:3]]
        return RGBA(r, g, b, alpha)
    elif types == ['PERCENTAGE', 'PERCENTAGE', 'PERCENTAGE']:
        r, g, b = [arg.value / 100 for arg in args[:3]]
        return RGBA(r, g, b, alpha)
 def parse_hsl(args, alpha):
    """
    If args is a list of 1 INTEGER token and 2 PERCENTAGE tokens,
    return RGB values as a tuple of 3 floats in 0..1.
    Otherwise, return None.
    """
    types = [arg.type for arg in args]
    if types == ['INTEGER', 'PERCENTAGE', 'PERCENTAGE']:
        hsl = [arg.value for arg in args[:3]]
        r, g, b = hsl_to_rgb(*hsl)
        return RGBA(r, g, b, alpha)
 def hsl_to_rgb(hue, saturation, lightness):
    """
    :param hue: degrees
    :param saturation: percentage
    :param lightness: percentage
    :returns: (r, g, b) as floats in the 0..1 range
    """
    hue = (hue / 360) % 1
    saturation = min(1, max(0, saturation / 100))
    lightness = min(1, max(0, lightness / 100))
    # Translated from ABC: http://www.w3.org/TR/css3-color/#hsl-color
    def hue_to_rgb(m1, m2, h):
        if h < 0:
            h += 1
        if h > 1:
            h -= 1
        if h * 6 < 1:
            return m1 + (m2 - m1) * h * 6
        if h * 2 < 1:
            return m2
        if h * 3 < 2:
            return m1 + (m2 - m1) * (2 / 3 - h) * 6
        return m1
    if lightness <= 0.5:
        m2 = lightness * (saturation + 1)
    else:
        m2 = lightness + saturation - lightness * saturation
    m1 = lightness * 2 - m2
    return (
        hue_to_rgb(m1, m2, hue + 1 / 3),
        hue_to_rgb(m1, m2, hue),
        hue_to_rgb(m1, m2, hue - 1 / 3),
    )
 def parse_comma_separated(tokens):
    """Parse a list of tokens (typically the content of a function token)
    as arguments made of a single token each, separated by mandatory commas,
    with optional white space around each argument.
    return the argument list without commas or white space;
    or None if the function token content do not match the description above.
    """
    tokens = [token for token in tokens if token.type != 'S']
    if not tokens:
        return []
    if len(tokens) % 2 == 1 and all(
            token.type == 'DELIM' and token.value == ','
            for token in tokens[1::2]):
        return tokens[::2]
 HASH_REGEXPS = (
    (2, re.compile('^#([\da-f])([\da-f])([\da-f])$', re.I).match),
    (1, re.compile('^#([\da-f]{2})([\da-f]{2})([\da-f]{2})$', re.I).match),
 )
 # (r, g, b) in 0..255
 BASIC_COLOR_KEYWORDS = [
    ('black', (0, 0, 0)),
    ('silver', (192, 192, 192)),
    ('gray', (128, 128, 128)),
    ('white', (255, 255, 255)),
    ('maroon', (128, 0, 0)),
    ('red', (255, 0, 0)),
    ('purple', (128, 0, 128)),
    ('fuchsia', (255, 0, 255)),
    ('green', (0, 128, 0)),
    ('lime', (0, 255, 0)),
    ('olive', (128, 128, 0)),
    ('yellow', (255, 255, 0)),
    ('navy', (0, 0, 128)),
    ('blue', (0, 0, 255)),
    ('teal', (0, 128, 128)),
    ('aqua', (0, 255, 255)),
 ]
 # (r, g, b) in 0..255
 EXTENDED_COLOR_KEYWORDS = [
    ('aliceblue', (240, 248, 255)),
    ('antiquewhite', (250, 235, 215)),
    ('aqua', (0, 255, 255)),
    ('aquamarine', (127, 255, 212)),
    ('azure', (240, 255, 255)),
    ('beige', (245, 245, 220)),
    ('bisque', (255, 228, 196)),
    ('black', (0, 0, 0)),
    ('blanchedalmond', (255, 235, 205)),
    ('blue', (0, 0, 255)),
    ('blueviolet', (138, 43, 226)),
    ('brown', (165, 42, 42)),
    ('burlywood', (222, 184, 135)),
    ('cadetblue', (95, 158, 160)),
    ('chartreuse', (127, 255, 0)),
    ('chocolate', (210, 105, 30)),
    ('coral', (255, 127, 80)),
    ('cornflowerblue', (100, 149, 237)),
    ('cornsilk', (255, 248, 220)),
    ('crimson', (220, 20, 60)),
    ('cyan', (0, 255, 255)),
    ('darkblue', (0, 0, 139)),
    ('darkcyan', (0, 139, 139)),
    ('darkgoldenrod', (184, 134, 11)),
    ('darkgray', (169, 169, 169)),
    ('darkgreen', (0, 100, 0)),
    ('darkgrey', (169, 169, 169)),
    ('darkkhaki', (189, 183, 107)),
    ('darkmagenta', (139, 0, 139)),
    ('darkolivegreen', (85, 107, 47)),
    ('darkorange', (255, 140, 0)),
    ('darkorchid', (153, 50, 204)),
    ('darkred', (139, 0, 0)),
    ('darksalmon', (233, 150, 122)),
    ('darkseagreen', (143, 188, 143)),
    ('darkslateblue', (72, 61, 139)),
    ('darkslategray', (47, 79, 79)),
    ('darkslategrey', (47, 79, 79)),
    ('darkturquoise', (0, 206, 209)),
    ('darkviolet', (148, 0, 211)),
    ('deeppink', (255, 20, 147)),
    ('deepskyblue', (0, 191, 255)),
    ('dimgray', (105, 105, 105)),
    ('dimgrey', (105, 105, 105)),
    ('dodgerblue', (30, 144, 255)),
    ('firebrick', (178, 34, 34)),
    ('floralwhite', (255, 250, 240)),
    ('forestgreen', (34, 139, 34)),
    ('fuchsia', (255, 0, 255)),
    ('gainsboro', (220, 220, 220)),
    ('ghostwhite', (248, 248, 255)),
    ('gold', (255, 215, 0)),
    ('goldenrod', (218, 165, 32)),
    ('gray', (128, 128, 128)),
    ('green', (0, 128, 0)),
    ('greenyellow', (173, 255, 47)),
    ('grey', (128, 128, 128)),
    ('honeydew', (240, 255, 240)),
    ('hotpink', (255, 105, 180)),
    ('indianred', (205, 92, 92)),
    ('indigo', (75, 0, 130)),
    ('ivory', (255, 255, 240)),
    ('khaki', (240, 230, 140)),
    ('lavender', (230, 230, 250)),
    ('lavenderblush', (255, 240, 245)),
    ('lawngreen', (124, 252, 0)),
    ('lemonchiffon', (255, 250, 205)),
    ('lightblue', (173, 216, 230)),
    ('lightcoral', (240, 128, 128)),
    ('lightcyan', (224, 255, 255)),
    ('lightgoldenrodyellow', (250, 250, 210)),
    ('lightgray', (211, 211, 211)),
    ('lightgreen', (144, 238, 144)),
    ('lightgrey', (211, 211, 211)),
    ('lightpink', (255, 182, 193)),
    ('lightsalmon', (255, 160, 122)),
    ('lightseagreen', (32, 178, 170)),
    ('lightskyblue', (135, 206, 250)),
    ('lightslategray', (119, 136, 153)),
    ('lightslategrey', (119, 136, 153)),
    ('lightsteelblue', (176, 196, 222)),
    ('lightyellow', (255, 255, 224)),
    ('lime', (0, 255, 0)),
    ('limegreen', (50, 205, 50)),
    ('linen', (250, 240, 230)),
    ('magenta', (255, 0, 255)),
    ('maroon', (128, 0, 0)),
    ('mediumaquamarine', (102, 205, 170)),
    ('mediumblue', (0, 0, 205)),
    ('mediumorchid', (186, 85, 211)),
    ('mediumpurple', (147, 112, 219)),
    ('mediumseagreen', (60, 179, 113)),
    ('mediumslateblue', (123, 104, 238)),
    ('mediumspringgreen', (0, 250, 154)),
    ('mediumturquoise', (72, 209, 204)),
    ('mediumvioletred', (199, 21, 133)),
    ('midnightblue', (25, 25, 112)),
    ('mintcream', (245, 255, 250)),
    ('mistyrose', (255, 228, 225)),
    ('moccasin', (255, 228, 181)),
    ('navajowhite', (255, 222, 173)),
    ('navy', (0, 0, 128)),
    ('oldlace', (253, 245, 230)),
    ('olive', (128, 128, 0)),
    ('olivedrab', (107, 142, 35)),
    ('orange', (255, 165, 0)),
    ('orangered', (255, 69, 0)),
    ('orchid', (218, 112, 214)),
    ('palegoldenrod', (238, 232, 170)),
    ('palegreen', (152, 251, 152)),
    ('paleturquoise', (175, 238, 238)),
    ('palevioletred', (219, 112, 147)),
    ('papayawhip', (255, 239, 213)),
    ('peachpuff', (255, 218, 185)),
    ('peru', (205, 133, 63)),
    ('pink', (255, 192, 203)),
    ('plum', (221, 160, 221)),
    ('powderblue', (176, 224, 230)),
    ('purple', (128, 0, 128)),
    ('red', (255, 0, 0)),
    ('rosybrown', (188, 143, 143)),
    ('royalblue', (65, 105, 225)),
    ('saddlebrown', (139, 69, 19)),
    ('salmon', (250, 128, 114)),
    ('sandybrown', (244, 164, 96)),
    ('seagreen', (46, 139, 87)),
    ('seashell', (255, 245, 238)),
    ('sienna', (160, 82, 45)),
    ('silver', (192, 192, 192)),
    ('skyblue', (135, 206, 235)),
    ('slateblue', (106, 90, 205)),
    ('slategray', (112, 128, 144)),
    ('slategrey', (112, 128, 144)),
    ('snow', (255, 250, 250)),
    ('springgreen', (0, 255, 127)),
    ('steelblue', (70, 130, 180)),
    ('tan', (210, 180, 140)),
    ('teal', (0, 128, 128)),
    ('thistle', (216, 191, 216)),
    ('tomato', (255, 99, 71)),
    ('turquoise', (64, 224, 208)),
    ('violet', (238, 130, 238)),
    ('wheat', (245, 222, 179)),
    ('white', (255, 255, 255)),
    ('whitesmoke', (245, 245, 245)),
    ('yellow', (255, 255, 0)),
    ('yellowgreen', (154, 205, 50)),
 ]
 # (r, g, b, a) in 0..1 or a string marker
 SPECIAL_COLOR_KEYWORDS = {
    'currentcolor': 'currentColor',
    'transparent': RGBA(0., 0., 0., 0.),
 }
 # RGBA namedtuples of (r, g, b, a) in 0..1 or a string marker
 COLOR_KEYWORDS = SPECIAL_COLOR_KEYWORDS.copy()
 COLOR_KEYWORDS.update(
    # 255 maps to 1, 0 to 0, the rest is linear.
    (keyword, RGBA(r / 255., g / 255., b / 255., 1.))
    for keyword, (r, g, b) in itertools.chain(
        BASIC_COLOR_KEYWORDS, EXTENDED_COLOR_KEYWORDS))
--- a/src/tinycss/css21.py
+++ b/src/tinycss/css21.py
@ -0,0 +1,815 @@
 # coding: utf8
 """
    tinycss.css21
    -------------
    Parser for CSS 2.1
    http://www.w3.org/TR/CSS21/syndata.html
    :copyright: (c) 2012 by Simon Sapin.
    :license: BSD, see LICENSE for more details.
 """
 from __future__ import unicode_literals
 from itertools import chain, islice
 from .decoding import decode
 from .token_data import TokenList
 from .tokenizer import tokenize_grouped
 from .parsing import (strip_whitespace, remove_whitespace, split_on_comma,
                      validate_value, validate_block, validate_any, ParseError)
 #  stylesheet  : [ CDO | CDC | S | statement ]*;
 #  statement   : ruleset | at-rule;
 #  at-rule     : ATKEYWORD S* any* [ block | ';' S* ];
 #  block       : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
 #  ruleset     : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;
 #  selector    : any+;
 #  declaration : property S* ':' S* value;
 #  property    : IDENT;
 #  value       : [ any | block | ATKEYWORD S* ]+;
 #  any         : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
 #                | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
 #                | DASHMATCH | ':' | FUNCTION S* [any|unused]* ')'
 #                | '(' S* [any|unused]* ')' | '[' S* [any|unused]* ']'
 #                ] S*;
 #  unused      : block | ATKEYWORD S* | ';' S* | CDO S* | CDC S*;
 class Stylesheet(object):
    """
    A parsed CSS stylesheet.
    .. attribute:: rules
        A mixed list, in source order, of :class:`RuleSet` and various
        at-rules such as :class:`ImportRule`, :class:`MediaRule`
        and :class:`PageRule`.
        Use their :obj:`at_keyword` attribute to distinguish them.
    .. attribute:: errors
        A list of :class:`~.parsing.ParseError`. Invalid rules and declarations
        are ignored, with the details logged in this list.
    .. attribute:: encoding
        The character encoding that was used to decode the stylesheet
        from bytes, or ``None`` for Unicode stylesheets.
    """
    def __init__(self, rules, errors, encoding):
        self.rules = rules
        self.errors = errors
        self.encoding = encoding
    def __repr__(self):
        return '<{0.__class__.__name__} {1} rules {2} errors>'.format(
            self, len(self.rules), len(self.errors))
 class AtRule(object):
    """
    An unparsed at-rule.
    .. attribute:: at_keyword
        The normalized (lower-case) at-keyword as a string. Eg: ``'@page'``
    .. attribute:: head
        The part of the at-rule between the at-keyword and the ``{``
        marking the body, or the ``;`` marking the end of an at-rule without
        a body.  A :class:`~.token_data.TokenList`.
    .. attribute:: body
        The content of the body between ``{`` and ``}`` as a
        :class:`~.token_data.TokenList`, or ``None`` if there is no body
        (ie. if the rule ends with ``;``).
    The head was validated against the core grammar but **not** the body,
    as the body might contain declarations. In case of an error in a
    declaration, parsing should continue from the next declaration.
    The whole rule should not be ignored as it would be for an error
    in the head.
    These at-rules are expected to be parsed further before reaching
    the user API.
    """
    def __init__(self, at_keyword, head, body, line, column):
        self.at_keyword = at_keyword
        self.head = TokenList(head)
        self.body = TokenList(body) if body is not None else body
        self.line = line
        self.column = column
    def __repr__(self):
        return ('<{0.__class__.__name__} {0.line}:{0.column} {0.at_keyword}>'
                .format(self))
 class RuleSet(object):
    """A ruleset.
    .. attribute:: at_keyword
        Always ``None``. Helps to tell rulesets apart from at-rules.
    .. attribute:: selector
        The selector as a :class:`~.token_data.TokenList`.
        In CSS 3, this is actually called a selector group.
        ``rule.selector.as_css()`` gives the selector as a string.
        This string can be used with *cssselect*, see :ref:`selectors3`.
    .. attribute:: declarations
        The list of :class:`Declaration`, in source order.
    """
    at_keyword = None
    def __init__(self, selector, declarations, line, column):
        self.selector = TokenList(selector)
        self.declarations = declarations
        self.line = line
        self.column = column
    def __repr__(self):
        return ('<{0.__class__.__name__} at {0.line}:{0.column} {1}>'
                .format(self, self.selector.as_css()))
 class Declaration(object):
    """A property declaration.
    .. attribute:: name
        The property name as a normalized (lower-case) string.
    .. attribute:: value
        The property value as a :class:`~.token_data.TokenList`.
        The value is not parsed. UAs using tinycss may only support
        some properties or some values and tinycss does not know which.
        They need to parse values themselves and ignore declarations with
        unknown or unsupported properties or values, and fall back
        on any previous declaration.
        :mod:`tinycss.color3` parses color values, but other values
        will need specific parsing/validation code.
    .. attribute:: priority
        Either the string ``'important'`` or ``None``.
    """
    def __init__(self, name, value, priority, line, column):
        self.name = name
        self.value = TokenList(value)
        self.priority = priority
        self.line = line
        self.column = column
    def __repr__(self):
        priority = ' !' + self.priority if self.priority else ''
        return ('<{0.__class__.__name__} {0.line}:{0.column}'
                ' {0.name}: {1}{2}>'.format(
                    self, self.value.as_css(), priority))
 class PageRule(object):
    """A parsed CSS 2.1 @page rule.
    .. attribute:: at_keyword
        Always ``'@page'``
    .. attribute:: selector
        The page selector.
        In CSS 2.1 this is either ``None`` (no selector), or the string
        ``'first'``, ``'left'`` or ``'right'`` for the pseudo class
        of the same name.
    .. attribute:: specificity
        Specificity of the page selector. This is a tuple of four integers,
        but these tuples are mostly meant to be compared to each other.
    .. attribute:: declarations
        A list of :class:`Declaration`, in source order.
    .. attribute:: at_rules
        The list of parsed at-rules inside the @page block, in source order.
        Always empty for CSS 2.1.
    """
    at_keyword = '@page'
    def __init__(self, selector, specificity, declarations, at_rules,
                 line, column):
        self.selector = selector
        self.specificity = specificity
        self.declarations = declarations
        self.at_rules = at_rules
        self.line = line
        self.column = column
    def __repr__(self):
        return ('<{0.__class__.__name__} {0.line}:{0.column}'
                ' {0.selector}>'.format(self))
 class MediaRule(object):
    """A parsed @media rule.
    .. attribute:: at_keyword
        Always ``'@media'``
    .. attribute:: media
        For CSS 2.1 without media queries: the media types
        as a list of strings.
    .. attribute:: rules
        The list :class:`RuleSet` and various at-rules inside the @media
        block, in source order.
    """
    at_keyword = '@media'
    def __init__(self, media, rules, line, column):
        self.media = media
        self.rules = rules
        self.line = line
        self.column = column
    def __repr__(self):
        return ('<{0.__class__.__name__} {0.line}:{0.column}'
                ' {0.media}>'.format(self))
 class ImportRule(object):
    """A parsed @import rule.
    .. attribute:: at_keyword
        Always ``'@import'``
    .. attribute:: uri
        The URI to be imported, as read from the stylesheet.
        (URIs are not made absolute.)
    .. attribute:: media
        For CSS 2.1 without media queries: the media types
        as a list of strings.
        This attribute is explicitly ``['all']`` if the media was omitted
        in the source.
    """
    at_keyword = '@import'
    def __init__(self, uri, media, line, column):
        self.uri = uri
        self.media = media
        self.line = line
        self.column = column
    def __repr__(self):
        return ('<{0.__class__.__name__} {0.line}:{0.column}'
                ' {0.uri}>'.format(self))
 def _remove_at_charset(tokens):
    """Remove any valid @charset at the beggining of a token stream.
    :param tokens:
        An iterable of tokens
    :returns:
        A possibly truncated iterable of tokens
    """
    tokens = iter(tokens)
    header = list(islice(tokens, 4))
    if [t.type for t in header] == ['ATKEYWORD', 'S', 'STRING', ';']:
        atkw, space, string, semicolon = header
        if ((atkw.value, space.value) == ('@charset', ' ')
                and string.as_css()[0] == '"'):
            # Found a valid @charset rule, only keep what’s after it.
            return tokens
    return chain(header, tokens)
 class CSS21Parser(object):
    """Parser for CSS 2.1
    This parser supports the core CSS syntax as well as @import, @media,
    @page and !important.
    Note that property values are still not parsed, as UAs using this
    parser may only support some properties or some values.
    Currently the parser holds no state. It being a class only allows
    subclassing and overriding its methods.
    """
    # User API:
    def parse_stylesheet_file(self, css_file, protocol_encoding=None,
                             linking_encoding=None, document_encoding=None):
        """Parse a stylesheet from a file or filename.
        Character encoding-related parameters and behavior are the same
        as in :meth:`parse_stylesheet_bytes`.
        :param css_file:
            Either a file (any object with a :meth:`~file.read` method)
            or a filename.
        :return:
            A :class:`Stylesheet`.
        """
        if hasattr(css_file, 'read'):
            css_bytes = css_file.read()
        else:
            with open(css_file, 'rb') as fd:
                css_bytes = fd.read()
        return self.parse_stylesheet_bytes(css_bytes, protocol_encoding,
                                           linking_encoding, document_encoding)
    def parse_stylesheet_bytes(self, css_bytes, protocol_encoding=None,
                               linking_encoding=None, document_encoding=None):
        """Parse a stylesheet from a byte string.
        The character encoding is determined from the passed metadata and the
        ``@charset`` rule in the stylesheet (if any).
        If no encoding information is available or decoding fails,
        decoding defaults to UTF-8 and then fall back on ISO-8859-1.
        :param css_bytes:
            A CSS stylesheet as a byte string.
        :param protocol_encoding:
            The "charset" parameter of a "Content-Type" HTTP header (if any),
            or similar metadata for other protocols.
        :param linking_encoding:
            ``<link charset="">`` or other metadata from the linking mechanism
            (if any)
        :param document_encoding:
            Encoding of the referring style sheet or document (if any)
        :return:
            A :class:`Stylesheet`.
        """
        css_unicode, encoding = decode(css_bytes, protocol_encoding,
                                       linking_encoding, document_encoding)
        return self.parse_stylesheet(css_unicode, encoding=encoding)
    def parse_stylesheet(self, css_unicode, encoding=None):
        """Parse a stylesheet from an Unicode string.
        :param css_unicode:
            A CSS stylesheet as an unicode string.
        :param encoding:
            The character encoding used to decode the stylesheet from bytes,
            if any.
        :return:
            A :class:`Stylesheet`.
        """
        tokens = tokenize_grouped(css_unicode)
        if encoding:
            tokens = _remove_at_charset(tokens)
        rules, errors = self.parse_rules(tokens, context='stylesheet')
        return Stylesheet(rules, errors, encoding)
    def parse_style_attr(self, css_source):
        """Parse a "style" attribute (eg. of an HTML element).
        This method only accepts Unicode as the source (HTML) document
        is supposed to handle the character encoding.
        :param css_source:
            The attribute value, as an unicode string.
        :return:
            A tuple of the list of valid :class:`Declaration` and
            a list of :class:`~.parsing.ParseError`.
        """
        return self.parse_declaration_list(tokenize_grouped(css_source))
    # API for subclasses:
    def parse_rules(self, tokens, context):
        """Parse a sequence of rules (rulesets and at-rules).
        :param tokens:
            An iterable of tokens.
        :param context:
            Either ``'stylesheet'`` or an at-keyword such as ``'@media'``.
            (Most at-rules are only allowed in some contexts.)
        :return:
            A tuple of a list of parsed rules and a list of
            :class:`~.parsing.ParseError`.
        """
        rules = []
        errors = []
        tokens = iter(tokens)
        for token in tokens:
            if token.type not in ('S', 'CDO', 'CDC'):
                try:
                    if token.type == 'ATKEYWORD':
                        rule = self.read_at_rule(token, tokens)
                        result = self.parse_at_rule(
                            rule, rules, errors, context)
                        rules.append(result)
                    else:
                        rule, rule_errors = self.parse_ruleset(token, tokens)
                        rules.append(rule)
                        errors.extend(rule_errors)
                except ParseError as exc:
                    errors.append(exc)
                    # Skip the entire rule
        return rules, errors
    def read_at_rule(self, at_keyword_token, tokens):
        """Read an at-rule from a token stream.
        :param at_keyword_token:
            The ATKEYWORD token that starts this at-rule
            You may have read it already to distinguish the rule
            from a ruleset.
        :param tokens:
            An iterator of subsequent tokens. Will be consumed just enough
            for one at-rule.
        :return:
            An unparsed :class:`AtRule`.
        :raises:
            :class:`~.parsing.ParseError` if the head is invalid for the core
            grammar. The body is **not** validated. See :class:`AtRule`.
        """
        # CSS syntax is case-insensitive
        at_keyword = at_keyword_token.value.lower()
        head = []
        # For the ParseError in case `tokens` is empty:
        token = at_keyword_token
        for token in tokens:
            if token.type in '{;':
                break
            # Ignore white space just after the at-keyword.
            else:
                head.append(token)
        # On unexpected end of stylesheet, pretend that a ';' was there
        head = strip_whitespace(head)
        for head_token in head:
            validate_any(head_token, 'at-rule head')
        body = token.content if token.type == '{' else None
        return AtRule(at_keyword, head, body,
                      at_keyword_token.line, at_keyword_token.column)
    def parse_at_rule(self, rule, previous_rules, errors, context):
        """Parse an at-rule.
        Subclasses that override this method must use ``super()`` and
        pass its return value for at-rules they do not know.
        In CSS 2.1, this method handles @charset, @import, @media and @page
        rules.
        :param rule:
            An unparsed :class:`AtRule`.
        :param previous_rules:
            The list of at-rules and rulesets that have been parsed so far
            in this context. This list can be used to decide if the current
            rule is valid. (For example, @import rules are only allowed
            before anything but a @charset rule.)
        :param context:
            Either ``'stylesheet'`` or an at-keyword such as ``'@media'``.
            (Most at-rules are only allowed in some contexts.)
        :raises:
            :class:`~.parsing.ParseError` if the rule is invalid.
        :return:
            A parsed at-rule
        """
        if rule.at_keyword == '@page':
            if context != 'stylesheet':
                raise ParseError(rule, '@page rule not allowed in ' + context)
            selector, specificity = self.parse_page_selector(rule.head)
            if rule.body is None:
                raise ParseError(rule,
                    'invalid {0} rule: missing block'.format(rule.at_keyword))
            declarations, at_rules, rule_errors = \
                self.parse_declarations_and_at_rules(rule.body, '@page')
            errors.extend(rule_errors)
            return PageRule(selector, specificity, declarations, at_rules,
                            rule.line, rule.column)
        elif rule.at_keyword == '@media':
            if context != 'stylesheet':
                raise ParseError(rule, '@media rule not allowed in ' + context)
            if not rule.head:
                raise ParseError(rule, 'expected media types for @media')
            media = self.parse_media(rule.head)
            if rule.body is None:
                raise ParseError(rule,
                    'invalid {0} rule: missing block'.format(rule.at_keyword))
            rules, rule_errors = self.parse_rules(rule.body, '@media')
            errors.extend(rule_errors)
            return MediaRule(media, rules, rule.line, rule.column)
        elif rule.at_keyword == '@import':
            if context != 'stylesheet':
                raise ParseError(rule,
                    '@import rule not allowed in ' + context)
            for previous_rule in previous_rules:
                if previous_rule.at_keyword not in ('@charset', '@import'):
                    if previous_rule.at_keyword:
                        type_ = 'an {0} rule'.format(previous_rule.at_keyword)
                    else:
                        type_ = 'a ruleset'
                    raise ParseError(previous_rule,
                        '@import rule not allowed after ' + type_)
            head = rule.head
            if not head:
                raise ParseError(rule,
                    'expected URI or STRING for @import rule')
            if head[0].type not in ('URI', 'STRING'):
                raise ParseError(rule,
                    'expected URI or STRING for @import rule, got '
                    + head[0].type)
            uri = head[0].value
            media = self.parse_media(strip_whitespace(head[1:]))
            if rule.body is not None:
                # The position of the ';' token would be best, but we don’t
                # have it anymore here.
                raise ParseError(head[-1], "expected ';', got a block")
            return ImportRule(uri, media, rule.line, rule.column)
        elif rule.at_keyword == '@charset':
            raise ParseError(rule, 'mis-placed or malformed @charset rule')
        else:
            raise ParseError(rule, 'unknown at-rule in {0} context: {1}'
                                    .format(context, rule.at_keyword))
    def parse_media(self, tokens):
        """For CSS 2.1, parse a list of media types.
        Media Queries are expected to override this.
        :param tokens:
            A list of tokens
        :raises:
            :class:`~.parsing.ParseError` on invalid media types/queries
        :returns:
            For CSS 2.1, a list of media types as strings
        """
        if not tokens:
            return ['all']
        media_types = []
        for part in split_on_comma(remove_whitespace(tokens)):
            types = [token.type for token in part]
            if types == ['IDENT']:
                media_types.append(part[0].value)
            else:
                raise ParseError(tokens[0], 'expected a media type'
                    + ((', got ' + ', '.join(types)) if types else ''))
        return media_types
    def parse_page_selector(self, tokens):
        """Parse an @page selector.
        :param tokens:
            An iterable of token, typically from the  ``head`` attribute of
            an unparsed :class:`AtRule`.
        :returns:
            A page selector. For CSS 2.1, this is ``'first'``, ``'left'``,
            ``'right'`` or ``None``.
        :raises:
            :class:`~.parsing.ParseError` on invalid selectors
        """
        if not tokens:
            return None, (0, 0)
        if (len(tokens) == 2 and tokens[0].type == ':'
                and tokens[1].type == 'IDENT'):
            pseudo_class = tokens[1].value
            specificity = {
                'first': (1, 0), 'left': (0, 1), 'right': (0, 1),
            }.get(pseudo_class)
            if specificity:
                return pseudo_class, specificity
        raise ParseError(tokens[0], 'invalid @page selector')
    def parse_declarations_and_at_rules(self, tokens, context):
        """Parse a mixed list of declarations and at rules, as found eg.
        in the body of an @page rule.
        Note that to add supported at-rules inside @page,
        :class:`~.page3.CSSPage3Parser` extends :meth:`parse_at_rule`,
        not this method.
        :param tokens:
            An iterable of token, typically from the  ``body`` attribute of
            an unparsed :class:`AtRule`.
        :param context:
            An at-keyword such as ``'@page'``.
            (Most at-rules are only allowed in some contexts.)
        :returns:
            A tuple of:
            * A list of :class:`Declaration`
            * A list of parsed at-rules (empty for CSS 2.1)
            * A list of :class:`~.parsing.ParseError`
        """
        at_rules = []
        declarations = []
        errors = []
        tokens = iter(tokens)
        for token in tokens:
            if token.type == 'ATKEYWORD':
                try:
                    rule = self.read_at_rule(token, tokens)
                    result = self.parse_at_rule(
                        rule, at_rules, errors, context)
                    at_rules.append(result)
                except ParseError as err:
                    errors.append(err)
            elif token.type != 'S':
                declaration_tokens = []
                while token and token.type != ';':
                    declaration_tokens.append(token)
                    token = next(tokens, None)
                if declaration_tokens:
                    try:
                        declarations.append(
                            self.parse_declaration(declaration_tokens))
                    except ParseError as err:
                        errors.append(err)
        return declarations, at_rules, errors
    def parse_ruleset(self, first_token, tokens):
        """Parse a ruleset: a selector followed by declaration block.
        :param first_token:
            The first token of the ruleset (probably of the selector).
            You may have read it already to distinguish the rule
            from an at-rule.
        :param tokens:
            an iterator of subsequent tokens. Will be consumed just enough
            for one ruleset.
        :return:
            a tuple of a :class:`RuleSet` and an error list.
            The errors are recovered :class:`~.parsing.ParseError` in declarations.
            (Parsing continues from the next declaration on such errors.)
        :raises:
            :class:`~.parsing.ParseError` if the selector is invalid for the
            core grammar.
            Note a that a selector can be valid for the core grammar but
            not for CSS 2.1 or another level.
        """
        selector = []
        for token in chain([first_token], tokens):
            if token.type == '{':
                # Parse/validate once we’ve read the whole rule
                selector = strip_whitespace(selector)
                if not selector:
                    raise ParseError(first_token, 'empty selector')
                for selector_token in selector:
                    validate_any(selector_token, 'selector')
                declarations, errors = self.parse_declaration_list(
                    token.content)
                ruleset = RuleSet(selector, declarations,
                                  first_token.line, first_token.column)
                return ruleset, errors
            else:
                selector.append(token)
        raise ParseError(token, 'no declaration block found for ruleset')
    def parse_declaration_list(self, tokens):
        """Parse a ``;`` separated declaration list.
        You may want to use :meth:`parse_declarations_and_at_rules` (or
        some other method that uses :func:`parse_declaration` directly)
        instead if you have not just declarations in the same context.
        :param tokens:
            an iterable of tokens. Should stop at (before) the end
            of the block, as marked by ``}``.
        :return:
            a tuple of the list of valid :class:`Declaration` and a list
            of :class:`~.parsing.ParseError`
        """
        # split at ';'
        parts = []
        this_part = []
        for token in tokens:
            if token.type == ';':
                parts.append(this_part)
                this_part = []
            else:
                this_part.append(token)
        parts.append(this_part)
        declarations = []
        errors = []
        for tokens in parts:
            tokens = strip_whitespace(tokens)
            if tokens:
                try:
                    declarations.append(self.parse_declaration(tokens))
                except ParseError as exc:
                    errors.append(exc)
                    # Skip the entire declaration
        return declarations, errors
    def parse_declaration(self, tokens):
        """Parse a single declaration.
        :param tokens:
            an iterable of at least one token. Should stop at (before)
            the end of the declaration, as marked by a ``;`` or ``}``.
            Empty declarations (ie. consecutive ``;`` with only white space
            in-between) should be skipped earlier and not passed to
            this method.
        :returns:
            a :class:`Declaration`
        :raises:
            :class:`~.parsing.ParseError` if the tokens do not match the
            'declaration' production of the core grammar.
        """
        tokens = iter(tokens)
        name_token = next(tokens)  # assume there is at least one
        if name_token.type == 'IDENT':
            # CSS syntax is case-insensitive
            property_name = name_token.value.lower()
        else:
            raise ParseError(name_token,
                'expected a property name, got {0}'.format(name_token.type))
        token = name_token  # In case ``tokens`` is now empty
        for token in tokens:
            if token.type == ':':
                break
            elif token.type != 'S':
                raise ParseError(
                    token, "expected ':', got {0}".format(token.type))
        else:
            raise ParseError(token, "expected ':'")
        value = strip_whitespace(list(tokens))
        if not value:
            raise ParseError(token, 'expected a property value')
        validate_value(value)
        value, priority = self.parse_value_priority(value)
        return Declaration(
            property_name, value, priority, name_token.line, name_token.column)
    def parse_value_priority(self, tokens):
        """Separate any ``!important`` marker at the end of a property value.
        :param tokens:
            A list of tokens for the property value.
        :returns:
            A tuple of the actual property value (a list of tokens)
            and the :attr:`~Declaration.priority`.
        """
        value = list(tokens)
        # Walk the token list from the end
        token = value.pop()
        if token.type == 'IDENT' and token.value.lower() == 'important':
            while value:
                token = value.pop()
                if token.type == 'DELIM' and token.value == '!':
                    # Skip any white space before the '!'
                    while value and value[-1].type == 'S':
                        value.pop()
                    if not value:
                        raise ParseError(
                            token, 'expected a value before !important')
                    return value, 'important'
                # Skip white space between '!' and 'important'
                elif token.type != 'S':
                    break
        return tokens, None
--- a/src/tinycss/decoding.py
+++ b/src/tinycss/decoding.py
@ -0,0 +1,254 @@
 # coding: utf8
 """
    tinycss.decoding
    ----------------
    Decoding stylesheets from bytes to Unicode.
    http://www.w3.org/TR/CSS21/syndata.html#charset
    :copyright: (c) 2012 by Simon Sapin.
    :license: BSD, see LICENSE for more details.
 """
 from __future__ import unicode_literals
 from binascii import unhexlify
 import operator
 import re
 import sys
 __all__ = ['decode']  # Everything else is implementation detail
 def decode(css_bytes, protocol_encoding=None,
           linking_encoding=None, document_encoding=None):
    """
    Determine the character encoding from the passed metadata and the
    ``@charset`` rule in the stylesheet (if any); and decode accordingly.
    If no encoding information is available or decoding fails,
    decoding defaults to UTF-8 and then fall back on ISO-8859-1.
    :param css_bytes:
        a CSS stylesheet as a byte string
    :param protocol_encoding:
        The "charset" parameter of a "Content-Type" HTTP header (if any),
        or similar metadata for other protocols.
    :param linking_encoding:
        ``<link charset="">`` or other metadata from the linking mechanism
        (if any)
    :param document_encoding:
        Encoding of the referring style sheet or document (if any)
    :return:
        A tuple of an Unicode string, with any BOM removed, and the
        encoding that was used.
    """
    if protocol_encoding:
        css_unicode = try_encoding(css_bytes, protocol_encoding)
        if css_unicode is not None:
            return css_unicode, protocol_encoding
    for encoding, pattern in ENCODING_MAGIC_NUMBERS:
        match = pattern(css_bytes)
        if match:
            has_at_charset = isinstance(encoding, tuple)
            if has_at_charset:
                extract, endianness = encoding
                encoding = extract(match.group(1))
                # Get an ASCII-only unicode value.
                # This is the only thing that works on both Python 2 and 3
                # for bytes.decode()
                # Non-ASCII encoding names are invalid anyway,
                # but make sure they stay invalid.
                encoding = encoding.decode('ascii', 'replace')
                encoding = encoding.replace('\ufffd', '?')
                if encoding.replace('-', '').replace('_', '').lower() in [
                        'utf16', 'utf32']:
                    encoding += endianness
                encoding = encoding.encode('ascii', 'replace').decode('ascii')
            css_unicode = try_encoding(css_bytes, encoding)
            if css_unicode and not (has_at_charset and not
                                    css_unicode.startswith('@charset "')):
                return css_unicode, encoding
            break
    for encoding in [linking_encoding, document_encoding]:
        if encoding:
            css_unicode = try_encoding(css_bytes, encoding)
            if css_unicode is not None:
                return css_unicode, encoding
    css_unicode = try_encoding(css_bytes, 'UTF-8')
    if css_unicode is not None:
        return css_unicode, 'UTF-8'
    return try_encoding(css_bytes, 'ISO-8859-1', fallback=False), 'ISO-8859-1'
 def try_encoding(css_bytes, encoding, fallback=True):
    if fallback:
        try:
            css_unicode = css_bytes.decode(encoding)
        # LookupError means unknown encoding
        except (UnicodeDecodeError, LookupError):
            return None
    else:
        css_unicode = css_bytes.decode(encoding)
    if css_unicode and css_unicode[0] == '\ufeff':
        # Remove any Byte Order Mark
        css_unicode = css_unicode[1:]
    return css_unicode
 def hex2re(hex_data):
    return re.escape(unhexlify(hex_data.replace(' ', '').encode('ascii')))
 class Slicer(object):
    """Slice()[start:stop:end] == slice(start, stop, end)"""
    def __getitem__(self, slice_):
        return operator.itemgetter(slice_)
 Slice = Slicer()
 # List of (bom_size, encoding, pattern)
 #   bom_size is in bytes and can be zero
 #   encoding is a string or (slice_, endianness) for "as specified"
 #   slice_ is a slice object.How to extract the specified
 ENCODING_MAGIC_NUMBERS = [
    ((Slice[:], ''), re.compile(
        hex2re('EF BB BF 40 63 68 61 72 73 65 74 20 22')
        + b'([^\x22]*?)'
        + hex2re('22 3B')).match),
    ('UTF-8', re.compile(
        hex2re('EF BB BF')).match),
    ((Slice[:], ''), re.compile(
        hex2re('40 63 68 61 72 73 65 74 20 22')
        + b'([^\x22]*?)'
        + hex2re('22 3B')).match),
    ((Slice[1::2], '-BE'), re.compile(
        hex2re('FE FF 00 40 00 63 00 68 00 61 00 72 00 73 00 65 00'
               '74 00 20 00 22')
        + b'((\x00[^\x22])*?)'
        + hex2re('00 22 00 3B')).match),
    ((Slice[1::2], '-BE'), re.compile(
        hex2re('00 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00'
               '20 00 22')
        + b'((\x00[^\x22])*?)'
        + hex2re('00 22 00 3B')).match),
    ((Slice[::2], '-LE'), re.compile(
        hex2re('FF FE 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74'
               '00 20 00 22 00')
        + b'(([^\x22]\x00)*?)'
        + hex2re('22 00 3B 00')).match),
    ((Slice[::2], '-LE'), re.compile(
        hex2re('40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00 20'
               '00 22 00')
        + b'(([^\x22]\x00)*?)'
        + hex2re('22 00 3B 00')).match),
    ((Slice[3::4], '-BE'), re.compile(
        hex2re('00 00 FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00'
               '00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00'
               '00 74 00 00 00 20 00 00 00 22')
        + b'((\x00\x00\x00[^\x22])*?)'
        + hex2re('00 00 00 22 00 00 00 3B')).match),
    ((Slice[3::4], '-BE'), re.compile(
        hex2re('00 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00'
               '00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00'
               '00 20 00 00 00 22')
        + b'((\x00\x00\x00[^\x22])*?)'
        + hex2re('00 00 00 22 00 00 00 3B')).match),
 # Python does not support 2143 or 3412 endianness, AFAIK.
 # I guess we could fix it up ourselves but meh. Patches welcome.
 #    ((Slice[2::4], '-2143'), re.compile(
 #        hex2re('00 00 FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00'
 #               '00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00'
 #               '74 00 00 00 20 00 00 00 22 00')
 #        + b'((\x00\x00[^\x22]\x00)*?)'
 #        + hex2re('00 00 22 00 00 00 3B 00')).match),
 #    ((Slice[2::4], '-2143'), re.compile(
 #        hex2re('00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00'
 #               '00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00'
 #               '20 00 00 00 22 00')
 #        + b'((\x00\x00[^\x22]\x00)*?)'
 #        + hex2re('00 00 22 00 00 00 3B 00')).match),
 #    ((Slice[1::4], '-3412'), re.compile(
 #        hex2re('FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00 00 00'
 #               '61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74'
 #               '00 00 00 20 00 00 00 22 00 00')
 #        + b'((\x00[^\x22]\x00\x00)*?)'
 #        + hex2re('00 22 00 00 00 3B 00 00')).match),
 #    ((Slice[1::4], '-3412'), re.compile(
 #        hex2re('00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00'
 #               '72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20'
 #               '00 00 00 22 00 00')
 #        + b'((\x00[^\x22]\x00\x00)*?)'
 #        + hex2re('00 22 00 00 00 3B 00 00')).match),
    ((Slice[::4], '-LE'), re.compile(
        hex2re('FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61'
               '00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00'
               '00 00 20 00 00 00 22 00 00 00')
        + b'(([^\x22]\x00\x00\x00)*?)'
        + hex2re('22 00 00 00 3B 00 00 00')).match),
    ((Slice[::4], '-LE'), re.compile(
        hex2re('40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72'
               '00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00'
               '00 00 22 00 00 00')
        + b'(([^\x22]\x00\x00\x00)*?)'
        + hex2re('22 00 00 00 3B 00 00 00')).match),
    ('UTF-32-BE', re.compile(
        hex2re('00 00 FE FF')).match),
    ('UTF-32-LE', re.compile(
        hex2re('FF FE 00 00')).match),
 #    ('UTF-32-2143', re.compile(
 #        hex2re('00 00 FF FE')).match),
 #    ('UTF-32-3412', re.compile(
 #        hex2re('FE FF 00 00')).match),
    ('UTF-16-BE', re.compile(
        hex2re('FE FF')).match),
    ('UTF-16-LE', re.compile(
        hex2re('FF FE')).match),
 # Some of there are supported by Python, but I didn’t bother.
 # You know the story with patches ...
 #    # as specified, transcoded from EBCDIC to ASCII
 #    ('as_specified-EBCDIC', re.compile(
 #        hex2re('7C 83 88 81 99 A2 85 A3 40 7F')
 #        + b'([^\x7F]*?)'
 #        + hex2re('7F 5E')).match),
 #    # as specified, transcoded from IBM1026 to ASCII
 #    ('as_specified-IBM1026', re.compile(
 #        hex2re('AE 83 88 81 99 A2 85 A3 40 FC')
 #        + b'([^\xFC]*?)'
 #        + hex2re('FC 5E')).match),
 #    # as specified, transcoded from GSM 03.38 to ASCII
 #    ('as_specified-GSM_03.38', re.compile(
 #        hex2re('00 63 68 61 72 73 65 74 20 22')
 #        + b'([^\x22]*?)'
 #        + hex2re('22 3B')).match),
 ]
--- a/src/tinycss/page3.py
+++ b/src/tinycss/page3.py
@ -0,0 +1,159 @@
 # coding: utf8
 """
    tinycss.page3
    ------------------
    Support for CSS 3 Paged Media syntax:
    http://dev.w3.org/csswg/css3-page/
    Adds support for named page selectors and margin rules.
    :copyright: (c) 2012 by Simon Sapin.
    :license: BSD, see LICENSE for more details.
 """
 from __future__ import unicode_literals, division
 from .css21 import CSS21Parser, ParseError
 class MarginRule(object):
    """A parsed at-rule for margin box.
    .. attribute:: at_keyword
        One of the 16 following strings:
        * ``@top-left-corner``
        * ``@top-left``
        * ``@top-center``
        * ``@top-right``
        * ``@top-right-corner``
        * ``@bottom-left-corner``
        * ``@bottom-left``
        * ``@bottom-center``
        * ``@bottom-right``
        * ``@bottom-right-corner``
        * ``@left-top``
        * ``@left-middle``
        * ``@left-bottom``
        * ``@right-top``
        * ``@right-middle``
        * ``@right-bottom``
    .. attribute:: declarations
        A list of :class:`~.css21.Declaration` objects.
    .. attribute:: line
        Source line where this was read.
    .. attribute:: column
        Source column where this was read.
    """
    def __init__(self, at_keyword, declarations, line, column):
        self.at_keyword = at_keyword
        self.declarations = declarations
        self.line = line
        self.column = column
 class CSSPage3Parser(CSS21Parser):
    """Extend :class:`~.css21.CSS21Parser` for `CSS 3 Paged Media`_ syntax.
    .. _CSS 3 Paged Media: http://dev.w3.org/csswg/css3-page/
    Compared to CSS 2.1, the ``at_rules`` and ``selector`` attributes of
    :class:`~.css21.PageRule` objects are modified:
    * ``at_rules`` is not always empty, it is a list of :class:`MarginRule`
      objects.
    * ``selector``, instead of a single string, is a tuple of the page name
      and the pseudo class. Each of these may be a ``None`` or a string.
    +--------------------------+------------------------+
    | CSS                      | Parsed selectors       |
    +==========================+========================+
    | .. code-block:: css      | .. code-block:: python |
    |                          |                        |
    |     @page {}             |     (None, None)       |
    |     @page :first {}      |     (None, 'first')    |
    |     @page chapter {}     |     ('chapter', None)  |
    |     @page table:right {} |     ('table', 'right') |
    +--------------------------+------------------------+
    """
    PAGE_MARGIN_AT_KEYWORDS = [
        '@top-left-corner',
        '@top-left',
        '@top-center',
        '@top-right',
        '@top-right-corner',
        '@bottom-left-corner',
        '@bottom-left',
        '@bottom-center',
        '@bottom-right',
        '@bottom-right-corner',
        '@left-top',
        '@left-middle',
        '@left-bottom',
        '@right-top',
        '@right-middle',
        '@right-bottom',
    ]
    def parse_at_rule(self, rule, previous_rules, errors, context):
        if rule.at_keyword in self.PAGE_MARGIN_AT_KEYWORDS:
            if context != '@page':
                raise ParseError(rule,
                    '%s rule not allowed in %s' % (rule.at_keyword, context))
            if rule.head:
                raise ParseError(rule.head[0],
                    'unexpected %s token in %s rule header'
                    % (rule.head[0].type, rule.at_keyword))
            declarations, body_errors = self.parse_declaration_list(rule.body)
            errors.extend(body_errors)
            return MarginRule(rule.at_keyword, declarations,
                              rule.line, rule.column)
        return super(CSSPage3Parser, self).parse_at_rule(
            rule, previous_rules, errors, context)
    def parse_page_selector(self, head):
        """Parse an @page selector.
        :param head:
            The ``head`` attribute of an unparsed :class:`AtRule`.
        :returns:
            A page selector. For CSS 2.1, this is 'first', 'left', 'right'
            or None. 'blank' is added by GCPM.
        :raises:
            :class`~parsing.ParseError` on invalid selectors
        """
        if not head:
            return (None, None), (0, 0, 0)
        if head[0].type == 'IDENT':
            name = head.pop(0).value
            while head and head[0].type == 'S':
                head.pop(0)
            if not head:
                return (name, None), (1, 0, 0)
            name_specificity = (1,)
        else:
            name = None
            name_specificity = (0,)
        if (len(head) == 2 and head[0].type == ':'
                and head[1].type == 'IDENT'):
            pseudo_class = head[1].value
            specificity = {
                'first': (1, 0), 'blank': (1, 0),
                'left': (0, 1), 'right': (0, 1),
            }.get(pseudo_class)
            if specificity:
                return (name, pseudo_class), (name_specificity + specificity)
        raise ParseError(head[0], 'invalid @page selector')
--- a/src/tinycss/parsing.py
+++ b/src/tinycss/parsing.py
@ -0,0 +1,165 @@
 # coding: utf8
 """
    tinycss.parsing
    ---------------
    Utilities for parsing lists of tokens.
    :copyright: (c) 2012 by Simon Sapin.
    :license: BSD, see LICENSE for more details.
 """
 from __future__ import unicode_literals
 # TODO: unit tests
 def split_on_comma(tokens):
    """Split a list of tokens on commas, ie ``,`` DELIM tokens.
    Only "top-level" comma tokens are splitting points, not commas inside a
    function or other :class:`ContainerToken`.
    :param tokens:
        An iterable of :class:`~.token_data.Token` or
        :class:`~.token_data.ContainerToken`.
    :returns:
        A list of lists of tokens
    """
    parts = []
    this_part = []
    for token in tokens:
        if token.type == 'DELIM' and token.value == ',':
            parts.append(this_part)
            this_part = []
        else:
            this_part.append(token)
    parts.append(this_part)
    return parts
 def strip_whitespace(tokens):
    """Remove whitespace at the beggining and end of a token list.
    Whitespace tokens in-between other tokens in the list are preserved.
    :param tokens:
        A list of :class:`~.token_data.Token` or
        :class:`~.token_data.ContainerToken`.
    :return:
        A new sub-sequence of the list.
    """
    for i, token in enumerate(tokens):
        if token.type != 'S':
            break
    else:
        return []  # only whitespace
    tokens = tokens[i:]
    while tokens and tokens[-1].type == 'S':
        tokens.pop()
    return tokens
 def remove_whitespace(tokens):
    """Remove any top-level whitespace in a token list.
    Whitespace tokens inside recursive :class:`~.token_data.ContainerToken`
    are preserved.
    :param tokens:
        A list of :class:`~.token_data.Token` or
        :class:`~.token_data.ContainerToken`.
    :return:
        A new sub-sequence of the list.
    """
    return [token for token in tokens if token.type != 'S']
 def validate_value(tokens):
    """Validate a property value.
    :param tokens:
        an iterable of tokens
    :raises:
        :class:`ParseError` if there is any invalid token for the 'value'
        production of the core grammar.
    """
    for token in tokens:
        type_ = token.type
        if type_ == '{':
            validate_block(token.content, 'property value')
        else:
            validate_any(token, 'property value')
 def validate_block(tokens, context):
    """
    :raises:
        :class:`ParseError` if there is any invalid token for the 'block'
        production of the core grammar.
    :param tokens: an iterable of tokens
    :param context: a string for the 'unexpected in ...' message
    """
    for token in tokens:
        type_ = token.type
        if type_ == '{':
            validate_block(token.content, context)
        elif type_ not in (';', 'ATKEYWORD'):
            validate_any(token, context)
 def validate_any(token, context):
    """
    :raises:
        :class:`ParseError` if this is an invalid token for the
        'any' production of the core grammar.
    :param token: a single token
    :param context: a string for the 'unexpected in ...' message
    """
    type_ = token.type
    if type_ in ('FUNCTION', '(', '['):
        for token in token.content:
            validate_any(token, type_)
    elif type_ not in ('S', 'IDENT', 'DIMENSION', 'PERCENTAGE', 'NUMBER',
                       'INTEGER', 'URI', 'DELIM', 'STRING', 'HASH', ':',
                       'UNICODE-RANGE'):
        if type_ in ('}', ')', ']'):
            adjective = 'unmatched'
        else:
            adjective = 'unexpected'
        raise ParseError(token,
            '{0} {1} token in {2}'.format(adjective, type_, context))
 class ParseError(ValueError):
    """Details about a CSS syntax error. Usually indicates that something
    (a rule or a declaration) was ignored and will not appear as a parsed
    object.
    This exception is typically logged in a list rather than being propagated
    to the user API.
    .. attribute:: line
        Source line where the error occured.
    .. attribute:: column
        Column in the source line where the error occured.
    .. attribute:: reason
        What happend (a string).
    """
    def __init__(self, subject, reason):
        self.line = subject.line
        self.column = subject.column
        self.reason = reason
        super(ParseError, self).__init__(
            'Parse error at {0.line}:{0.column}, {0.reason}'.format(self))
--- a/src/tinycss/token_data.py
+++ b/src/tinycss/token_data.py
@ -0,0 +1,441 @@
 # coding: utf8
 """
    tinycss.token_data
    ------------------
    Shared data for both implementations (Cython and Python) of the tokenizer.
    :copyright: (c) 2012 by Simon Sapin.
    :license: BSD, see LICENSE for more details.
 """
 from __future__ import unicode_literals
 import re
 import sys
 import operator
 import functools
 import string
 # * Raw strings with the r'' notation are used so that \ do not need
 #   to be escaped.
 # * Names and regexps are separated by a tabulation.
 # * Macros are re-ordered so that only previous definitions are needed.
 # * {} are used for macro substitution with ``string.Formatter``,
 #   so other uses of { or } have been doubled.
 # * The syntax is otherwise compatible with re.compile.
 # * Some parentheses were added to add capturing groups.
 #   (in unicode, DIMENSION and URI)
 # *** Willful violation: ***
 # Numbers can take a + or - sign, but the sign is a separate DELIM token.
 # Since comments are allowed anywhere between tokens, this makes
 # the following this is valid. It means 10 negative pixels:
 #    margin-top: -/**/10px
 # This makes parsing numbers a pain, so instead we’ll do the same is Firefox
 # and make the sign part as of the 'num' macro. The above CSS will be invalid.
 # See discussion:
 # http://lists.w3.org/Archives/Public/www-style/2011Oct/0028.html
 MACROS = r'''
    nl	\n|\r\n|\r|\f
    w	[ \t\r\n\f]*
    nonascii	[^\0-\237]
    unicode	\\([0-9a-f]{{1,6}})(\r\n|[ \n\r\t\f])?
    simple_escape	[^\n\r\f0-9a-f]
    escape	{unicode}|\\{simple_escape}
    nmstart	[_a-z]|{nonascii}|{escape}
    nmchar	[_a-z0-9-]|{nonascii}|{escape}
    name	{nmchar}+
    ident	[-]?{nmstart}{nmchar}*
    num	[-+]?(?:[0-9]*\.[0-9]+|[0-9]+)
    string1	\"([^\n\r\f\\"]|\\{nl}|{escape})*\"
    string2	\'([^\n\r\f\\']|\\{nl}|{escape})*\'
    string	{string1}|{string2}
    badstring1	\"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
    badstring2	\'([^\n\r\f\\']|\\{nl}|{escape})*\\?
    badstring	{badstring1}|{badstring2}
    badcomment1	\/\*[^*]*\*+([^/*][^*]*\*+)*
    badcomment2	\/\*[^*]*(\*+[^/*][^*]*)*
    badcomment	{badcomment1}|{badcomment2}
    baduri1	url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
    baduri2	url\({w}{string}{w}
    baduri3	url\({w}{badstring}
    baduri	{baduri1}|{baduri2}|{baduri3}
 '''.replace(r'\0', '\0').replace(r'\237', '\237')
 # Removed these tokens. Instead, they’re tokenized as two DELIM each.
 #    INCLUDES	~=
 #    DASHMATCH	|=
 # They are only used in selectors but selectors3 also have ^=, *= and $=.
 # We don’t actually parse selectors anyway
 # Re-ordered so that the longest match is always the first.
 # For example, "url('foo')" matches URI, BAD_URI, FUNCTION and IDENT,
 # but URI would always be a longer match than the others.
 TOKENS = r'''
    S	[ \t\r\n\f]+
    URI	url\({w}({string}|([!#$%&*-\[\]-~]|{nonascii}|{escape})*){w}\)
    BAD_URI	{baduri}
    FUNCTION	{ident}\(
    UNICODE-RANGE	u\+[0-9a-f?]{{1,6}}(-[0-9a-f]{{1,6}})?
    IDENT	{ident}
    ATKEYWORD	@{ident}
    HASH	#{name}
    DIMENSION	({num})({ident})
    PERCENTAGE	{num}%
    NUMBER	{num}
    STRING	{string}
    BAD_STRING	{badstring}
    COMMENT	\/\*[^*]*\*+([^/*][^*]*\*+)*\/
    BAD_COMMENT	{badcomment}
    :	:
    ;	;
    {	\{{
    }	\}}
    (	\(
    )	\)
    [	\[
    ]	\]
    CDO	<!--
    CDC	-->
 '''
 # Strings with {macro} expanded
 COMPILED_MACROS = {}
 COMPILED_TOKEN_REGEXPS = []  # [(name, regexp.match)]  ordered
 COMPILED_TOKEN_INDEXES = {}  # {name: i}  helper for the C speedups
 # Indexed by codepoint value of the first character of a token.
 # Codepoints >= 160 (aka nonascii) all use the index 160.
 # values are (i, name, regexp.match)
 TOKEN_DISPATCH = []
 try:
    unichr
 except NameError:
    # Python 3
    unichr = chr
    unicode = str
 def _init():
    """Import-time initialization."""
    COMPILED_MACROS.clear()
    for line in MACROS.splitlines():
        if line.strip():
            name, value = line.split('\t')
            COMPILED_MACROS[name.strip()] = '(?:%s)' \
                % value.format(**COMPILED_MACROS)
    COMPILED_TOKEN_REGEXPS[:] = (
        (
            name.strip(),
            re.compile(
                value.format(**COMPILED_MACROS),
                # Case-insensitive when matching eg. uRL(foo)
                # but preserve the case in extracted groups
                re.I
            ).match
        )
        for line in TOKENS.splitlines()
        if line.strip()
        for name, value in [line.split('\t')]
    )
    COMPILED_TOKEN_INDEXES.clear()
    for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS):
        COMPILED_TOKEN_INDEXES[name] = i
    dispatch = [[] for i in range(161)]
    for chars, names in [
        (' \t\r\n\f', ['S']),
        ('uU', ['URI', 'BAD_URI', 'UNICODE-RANGE']),
        # \ is an escape outside of another token
        (string.ascii_letters + '\\_-' + unichr(160), ['FUNCTION', 'IDENT']),
        (string.digits + '.+-', ['DIMENSION', 'PERCENTAGE', 'NUMBER']),
        ('@', ['ATKEYWORD']),
        ('#', ['HASH']),
        ('\'"', ['STRING', 'BAD_STRING']),
        ('/', ['COMMENT', 'BAD_COMMENT']),
        ('<', ['CDO']),
        ('-', ['CDC']),
    ]:
        for char in chars:
            dispatch[ord(char)].extend(names)
    for char in ':;{}()[]':
        dispatch[ord(char)] = [char]
    TOKEN_DISPATCH[:] = (
        [
            (index,) + COMPILED_TOKEN_REGEXPS[index]
            for name in names
            for index in [COMPILED_TOKEN_INDEXES[name]]
        ]
        for names in dispatch
    )
 _init()
 def _unicode_replace(match, int=int, unichr=unichr, maxunicode=sys.maxunicode):
    codepoint = int(match.group(1), 16)
    if codepoint <= maxunicode:
        return unichr(codepoint)
    else:
        return '\N{REPLACEMENT CHARACTER}'  # U+FFFD
 UNICODE_UNESCAPE = functools.partial(
    re.compile(COMPILED_MACROS['unicode'], re.I).sub,
    _unicode_replace)
 NEWLINE_UNESCAPE = functools.partial(
    re.compile(r'()\\' + COMPILED_MACROS['nl']).sub,
    '')
 SIMPLE_UNESCAPE = functools.partial(
    re.compile(r'\\(%s)' % COMPILED_MACROS['simple_escape'] , re.I).sub,
    # Same as r'\1', but faster on CPython
    operator.methodcaller('group', 1))
 FIND_NEWLINES = re.compile(COMPILED_MACROS['nl']).finditer
 class Token(object):
    """A single atomic token.
    .. attribute:: is_container
        Always ``False``.
        Helps to tell :class:`Token` apart from :class:`ContainerToken`.
    .. attribute:: type
        The type of token as a string:
        ``S``
            A sequence of white space
        ``IDENT``
            An identifier: a name that does not start with a digit.
            A name is a sequence of letters, digits, ``_``, ``-``, escaped
            characters and non-ASCII characters. Eg: ``margin-left``
        ``HASH``
            ``#`` followed immediately by a name. Eg: ``#ff8800``
        ``ATKEYWORD``
            ``@`` followed immediately by an identifier. Eg: ``@page``
        ``URI``
            Eg: ``url(foo)`` The content may or may not be quoted.
        ``UNICODE-RANGE``
            ``U+`` followed by one or two hexadecimal
            Unicode codepoints. Eg: ``U+20-00FF``
        ``INTEGER``
            An integer with an optional ``+`` or ``-`` sign
        ``NUMBER``
            A non-integer number  with an optional ``+`` or ``-`` sign
        ``DIMENSION``
            An integer or number followed immediately by an
            identifier (the unit). Eg: ``12px``
        ``PERCENTAGE``
            An integer or number followed immediately by ``%``
        ``STRING``
            A string, quoted with ``"`` or ``'``
        ``:`` or ``;``
            That character.
        ``DELIM``
            A single character not matched in another token. Eg: ``,``
        See the source of the :mod:`.token_data` module for the precise
        regular expressions that match various tokens.
        Note that other token types exist in the early tokenization steps,
        but these are ignored, are syntax errors, or are later transformed
        into :class:`ContainerToken` or :class:`FunctionToken`.
    .. attribute:: value
        The parsed value:
        * INTEGER, NUMBER, PERCENTAGE or DIMENSION tokens: the numeric value
          as an int or float.
        * STRING tokens: the unescaped string without quotes
        * URI tokens: the unescaped URI without quotes or
          ``url(`` and ``)`` markers.
        * IDENT, ATKEYWORD or HASH tokens: the unescaped token,
          with ``@`` or ``#`` markers left as-is
        * Other tokens: same as :attr:`as_css`
        *Unescaped* refers to the various escaping methods based on the
        backslash ``\`` character in CSS syntax.
    .. attribute:: unit
        * DIMENSION tokens: the normalized (unescaped, lower-case)
          unit name as a string. eg. ``'px'``
        * PERCENTAGE tokens: the string ``'%'``
        * Other tokens: ``None``
    .. attribute:: line
        The line number in the CSS source of the start of this token.
    .. attribute:: column
        The column number (inside a source line) of the start of this token.
    """
    is_container = False
    __slots__ = 'type', '_as_css', 'value', 'unit', 'line', 'column'
    def __init__(self, type_, css_value, value, unit, line, column):
        self.type = type_
        self._as_css = css_value
        self.value = value
        self.unit = unit
        self.line = line
        self.column = column
    def as_css(self):
        """
        Return as an Unicode string the CSS representation of the token,
        as parsed in the source.
        """
        return self._as_css
    def __repr__(self):
        return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
                .format(self, self.unit or ''))
 class ContainerToken(object):
    """A token that contains other (nested) tokens.
    .. attribute:: is_container
        Always ``True``.
        Helps to tell :class:`ContainerToken` apart from :class:`Token`.
    .. attribute:: type
        The type of token as a string. One of ``{``, ``(``, ``[`` or
        ``FUNCTION``. For ``FUNCTION``, the object is actually a
        :class:`FunctionToken`.
    .. attribute:: unit
        Always ``None``. Included to make :class:`ContainerToken` behave
        more like :class:`Token`.
    .. attribute:: content
        A list of :class:`Token` or nested :class:`ContainerToken`,
        not including the opening or closing token.
    .. attribute:: line
        The line number in the CSS source of the start of this token.
    .. attribute:: column
        The column number (inside a source line) of the start of this token.
    """
    is_container = True
    unit = None
    __slots__ = 'type', '_css_start', '_css_end', 'content', 'line', 'column'
    def __init__(self, type_, css_start, css_end, content, line, column):
        self.type = type_
        self._css_start = css_start
        self._css_end = css_end
        self.content = content
        self.line = line
        self.column = column
    def as_css(self):
        """
        Return as an Unicode string the CSS representation of the token,
        as parsed in the source.
        """
        parts = [self._css_start]
        parts.extend(token.as_css() for token in self.content)
        parts.append(self._css_end)
        return ''.join(parts)
    format_string = '<ContainerToken {0.type} at {0.line}:{0.column}>'
    def __repr__(self):
        return (self.format_string + ' {0.content}').format(self)
 class FunctionToken(ContainerToken):
    """A specialized :class:`ContainerToken` for a ``FUNCTION`` group.
    Has an additional attribute:
    .. attribute:: function_name
        The unescaped name of the function, with the ``(`` marker removed.
    """
    __slots__ = 'function_name',
    def __init__(self, type_, css_start, css_end, function_name, content,
                 line, column):
        super(FunctionToken, self).__init__(
            type_, css_start, css_end, content, line, column)
        # Remove the ( marker:
        self.function_name = function_name[:-1]
    format_string = ('<FunctionToken {0.function_name}() at '
                     '{0.line}:{0.column}>')
 class TokenList(list):
    """
    A mixed list of :class:`~.token_data.Token` and
    :class:`~.token_data.ContainerToken` objects.
    This is a subclass of the builtin :class:`~builtins.list` type.
    It can be iterated, indexed and sliced as usual, but also has some
    additional API:
    """
    @property
    def line(self):
        """The line number in the CSS source of the first token."""
        return self[0].line
    @property
    def column(self):
        """The column number (inside a source line) of the first token."""
        return self[0].column
    def as_css(self):
        """
        Return as an Unicode string the CSS representation of the tokens,
        as parsed in the source.
        """
        return ''.join(token.as_css() for token in self)
--- a/src/tinycss/tokenizer.py
+++ b/src/tinycss/tokenizer.py
@ -0,0 +1,216 @@
 # coding: utf8
 """
    tinycss.tokenizer
    -----------------
    Tokenizer for the CSS core syntax:
    http://www.w3.org/TR/CSS21/syndata.html#tokenization
    This is the pure-python implementation. See also speedups.pyx
    :copyright: (c) 2012 by Simon Sapin.
    :license: BSD, see LICENSE for more details.
 """
 from __future__ import unicode_literals
 from . import token_data
 def tokenize_flat(css_source, ignore_comments=True,
    # Make these local variable to avoid global lookups in the loop
    tokens_dispatch=token_data.TOKEN_DISPATCH,
    unicode_unescape=token_data.UNICODE_UNESCAPE,
    newline_unescape=token_data.NEWLINE_UNESCAPE,
    simple_unescape=token_data.SIMPLE_UNESCAPE,
    find_newlines=token_data.FIND_NEWLINES,
    Token=token_data.Token,
    len=len,
    int=int,
    float=float,
    list=list,
    _None=None,
 ):
    """
    :param css_source:
        CSS as an unicode string
    :param ignore_comments:
        if true (the default) comments will not be included in the
        return value
    :return:
        An iterator of :class:`Token`
    """
    pos = 0
    line = 1
    column = 1
    source_len = len(css_source)
    tokens = []
    while pos < source_len:
        char = css_source[pos]
        if char in ':;{}()[]':
            type_ = char
            css_value = char
        else:
            codepoint = min(ord(char), 160)
            for _index, type_, regexp in tokens_dispatch[codepoint]:
                match = regexp(css_source, pos)
                if match:
                    # First match is the longest. See comments on TOKENS above.
                    css_value = match.group()
                    break
            else:
                # No match.
                # "Any other character not matched by the above rules,
                #  and neither a single nor a double quote."
                # ... but quotes at the start of a token are always matched
                # by STRING or BAD_STRING. So DELIM is any single character.
                type_ = 'DELIM'
                css_value = char
        length = len(css_value)
        next_pos = pos + length
        # A BAD_COMMENT is a comment at EOF. Ignore it too.
        if not (ignore_comments and type_ in ('COMMENT', 'BAD_COMMENT')):
            # Parse numbers, extract strings and URIs, unescape
            unit = _None
            if type_ == 'DIMENSION':
                value = match.group(1)
                value = float(value) if '.' in value else int(value)
                unit = match.group(2)
                unit = simple_unescape(unit)
                unit = unicode_unescape(unit)
                unit = unit.lower()  # normalize
            elif type_ == 'PERCENTAGE':
                value = css_value[:-1]
                value = float(value) if '.' in value else int(value)
                unit = '%'
            elif type_ == 'NUMBER':
                value = css_value
                if '.' in value:
                    value = float(value)
                else:
                    value = int(value)
                    type_ = 'INTEGER'
            elif type_ in ('IDENT', 'ATKEYWORD', 'HASH', 'FUNCTION'):
                value = simple_unescape(css_value)
                value = unicode_unescape(value)
            elif type_ == 'URI':
                value = match.group(1)
                if value and value[0] in '"\'':
                    value = value[1:-1]  # Remove quotes
                    value = newline_unescape(value)
                value = simple_unescape(value)
                value = unicode_unescape(value)
            elif type_ == 'STRING':
                value = css_value[1:-1]  # Remove quotes
                value = newline_unescape(value)
                value = simple_unescape(value)
                value = unicode_unescape(value)
            # BAD_STRING can only be one of:
            # * Unclosed string at the end of the stylesheet:
            #   Close the string, but this is not an error.
            #   Make it a "good" STRING token.
            # * Unclosed string at the (unescaped) end of the line:
            #   Close the string, but this is an error.
            #   Leave it as a BAD_STRING, don’t bother parsing it.
            # See http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
            elif type_ == 'BAD_STRING' and next_pos == source_len:
                type_ = 'STRING'
                value = css_value[1:]  # Remove quote
                value = newline_unescape(value)
                value = simple_unescape(value)
                value = unicode_unescape(value)
            else:
                value = css_value
            tokens.append(Token(type_, css_value, value, unit, line, column))
        pos = next_pos
        newlines = list(find_newlines(css_value))
        if newlines:
            line += len(newlines)
            # Add 1 to have lines start at column 1, not 0
            column = length - newlines[-1].end() + 1
        else:
            column += length
    return tokens
 def regroup(tokens):
    """
    Match pairs of tokens: () [] {} function()
    (Strings in "" or '' are taken care of by the tokenizer.)
    Opening tokens are replaced by a :class:`ContainerToken`.
    Closing tokens are removed. Unmatched closing tokens are invalid
    but left as-is. All nested structures that are still open at
    the end of the stylesheet are implicitly closed.
    :param tokens:
        a *flat* iterable of tokens, as returned by :func:`tokenize_flat`.
    :return:
        A tree of tokens.
    """
    # "global" objects for the inner recursion
    pairs = {'FUNCTION': ')', '(': ')', '[': ']', '{': '}'}
    tokens = iter(tokens)
    eof = [False]
    def _regroup_inner(stop_at=None,
            tokens=tokens, pairs=pairs, eof=eof,
            ContainerToken=token_data.ContainerToken,
            FunctionToken=token_data.FunctionToken):
        for token in tokens:
            type_ = token.type
            if type_ == stop_at:
                return
            end = pairs.get(type_)
            if end is None:
                yield token  # Not a grouping token
            else:
                assert not isinstance(token, ContainerToken), (
                    'Token looks already grouped: {0}'.format(token))
                content = list(_regroup_inner(end))
                if eof[0]:
                    end = ''  # Implicit end of structure at EOF.
                if type_ == 'FUNCTION':
                    yield FunctionToken(token.type, token.as_css(), end,
                                        token.value, content,
                                        token.line, token.column)
                else:
                    yield ContainerToken(token.type, token.as_css(), end,
                                         content,
                                         token.line, token.column)
        else:
            eof[0] = True  # end of file/stylesheet
    return _regroup_inner()
 def tokenize_grouped(css_source, ignore_comments=True):
    """
    :param css_source:
        CSS as an unicode string
    :param ignore_comments:
        if true (the default) comments will not be included in the
        return value
    :return:
        An iterator of :class:`Token`
    """
    return regroup(tokenize_flat(css_source, ignore_comments))
 # Optional Cython version of tokenize_flat
 # Make both versions available with explicit names for tests.
 python_tokenize_flat = tokenize_flat
 try:
    from . import speedups
 except ImportError:
    cython_tokenize_flat = None
 else:
    cython_tokenize_flat = speedups.tokenize_flat
    # Default to the Cython version if available
    tokenize_flat = cython_tokenize_flat
--- a/src/tinycss/version.py
+++ b/src/tinycss/version.py
@ -0,0 +1 @@
 VERSION = '0.3'