From 0ece679a1b0efb329b46e7b99f8a9468a11ba762 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 27 May 2017 10:39:25 +0530 Subject: [PATCH] No longer bundle python-regex --- setup/extensions.json | 6 - src/calibre/constants.py | 1 - src/regex/README | 5 - src/regex/__init__.py | 704 - src/regex/_regex.c | 25183 ----------------------------------- src/regex/_regex.h | 243 - src/regex/_regex_core.py | 4413 ------ src/regex/_regex_unicode.c | 14789 -------------------- src/regex/_regex_unicode.h | 237 - 9 files changed, 45581 deletions(-) delete mode 100644 src/regex/README delete mode 100644 src/regex/__init__.py delete mode 100644 src/regex/_regex.c delete mode 100644 src/regex/_regex.h delete mode 100644 src/regex/_regex_core.py delete mode 100644 src/regex/_regex_unicode.c delete mode 100644 src/regex/_regex_unicode.h diff --git a/setup/extensions.json b/setup/extensions.json index 1f6d1fb5fd..ca374c474e 100644 --- a/setup/extensions.json +++ b/setup/extensions.json @@ -115,12 +115,6 @@ "sources": "calibre/ebooks/djvu/bzzdecoder.c", "windows_inc_dirs": "calibre/utils/chm" }, - { - "name": "_regex", - "sources": "regex/_regex.c regex/_regex_unicode.c", - "headers": "regex/_regex.h", - "optimize_level": 2 - }, { "name": "dukpy", "sources": "duktape/errors.c duktape/context.c duktape/conversions.c duktape/proxy.c duktape/module.c duktape/duktape/duktape.c", diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 4313388ad4..9b76177bcf 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -161,7 +161,6 @@ class Plugins(collections.Mapping): 'unrar', 'imageops', 'qt_hack', - '_regex', 'hunspell', '_patiencediff_c', 'bzzdec', diff --git a/src/regex/README b/src/regex/README deleted file mode 100644 index 2ff93d4dc6..0000000000 --- a/src/regex/README +++ /dev/null @@ -1,5 +0,0 @@ -This regex engine is taken, with thanks, from: https://bitbucket.org/mrabarnett/mrab-regex - -It is licensed under the Python Software Foundation License - -Author: Matthew Barnett diff --git a/src/regex/__init__.py b/src/regex/__init__.py deleted file mode 100644 index 40a5f373a4..0000000000 --- a/src/regex/__init__.py +++ /dev/null @@ -1,704 +0,0 @@ -# -# Secret Labs' Regular Expression Engine -# -# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. -# -# This version of the SRE library can be redistributed under CNRI's -# Python 1.6 license. For any other use, please contact Secret Labs -# AB (info@pythonware.com). -# -# Portions of this engine have been developed in cooperation with -# CNRI. Hewlett-Packard provided funding for 1.6 integration and -# other compatibility work. -# -# 2010-01-16 mrab Python front-end re-written and extended - -r"""Support for regular expressions (RE). - -This module provides regular expression matching operations similar to those -found in Perl. It supports both 8-bit and Unicode strings; both the pattern and -the strings being processed can contain null bytes and characters outside the -US ASCII range. - -Regular expressions can contain both special and ordinary characters. Most -ordinary characters, like "A", "a", or "0", are the simplest regular -expressions; they simply match themselves. You can concatenate ordinary -characters, so last matches the string 'last'. - -There are a few differences between the old (legacy) behaviour and the new -(enhanced) behaviour, which are indicated by VERSION0 or VERSION1. - -The special characters are: - "." Matches any character except a newline. - "^" Matches the start of the string. - "$" Matches the end of the string or just before the - newline at the end of the string. - "*" Matches 0 or more (greedy) repetitions of the preceding - RE. Greedy means that it will match as many repetitions - as possible. - "+" Matches 1 or more (greedy) repetitions of the preceding - RE. - "?" Matches 0 or 1 (greedy) of the preceding RE. - *?,+?,?? Non-greedy versions of the previous three special - characters. - *+,++,?+ Possessive versions of the previous three special - characters. - {m,n} Matches from m to n repetitions of the preceding RE. - {m,n}? Non-greedy version of the above. - {m,n}+ Possessive version of the above. - {...} Fuzzy matching constraints. - "\\" Either escapes special characters or signals a special - sequence. - [...] Indicates a set of characters. A "^" as the first - character indicates a complementing set. - "|" A|B, creates an RE that will match either A or B. - (...) Matches the RE inside the parentheses. The contents are - captured and can be retrieved or matched later in the - string. - (?flags-flags) VERSION1: Sets/clears the flags for the remainder of - the group or pattern; VERSION0: Sets the flags for the - entire pattern. - (?:...) Non-capturing version of regular parentheses. - (?>...) Atomic non-capturing version of regular parentheses. - (?flags-flags:...) Non-capturing version of regular parentheses with local - flags. - (?P...) The substring matched by the group is accessible by - name. - (?...) The substring matched by the group is accessible by - name. - (?P=name) Matches the text matched earlier by the group named - name. - (?#...) A comment; ignored. - (?=...) Matches if ... matches next, but doesn't consume the - string. - (?!...) Matches if ... doesn't match next. - (?<=...) Matches if preceded by .... - (? Matches the text matched by the group named name. - \G Matches the empty string, but only at the position where - the search started. - \K Keeps only what follows for the entire match. - \L Named list. The list is provided as a keyword argument. - \m Matches the empty string, but only at the start of a word. - \M Matches the empty string, but only at the end of a word. - \n Matches the newline character. - \N{name} Matches the named character. - \p{name=value} Matches the character if its property has the specified - value. - \P{name=value} Matches the character if its property hasn't the specified - value. - \r Matches the carriage-return character. - \s Matches any whitespace character; equivalent to - [ \t\n\r\f\v]. - \S Matches any non-whitespace character; equivalent to [^\s]. - \t Matches the tab character. - \uXXXX Matches the Unicode codepoint with 4-digit hex code XXXX. - \UXXXXXXXX Matches the Unicode codepoint with 8-digit hex code - XXXXXXXX. - \v Matches the vertical tab character. - \w Matches any alphanumeric character; equivalent to - [a-zA-Z0-9_] when matching a bytestring or a Unicode string - with the ASCII flag, or the whole range of Unicode - alphanumeric characters (letters plus digits plus - underscore) when matching a Unicode string. With LOCALE, it - will match the set [0-9_] plus characters defined as - letters for the current locale. - \W Matches the complement of \w; equivalent to [^\w]. - \xXX Matches the character with 2-digit hex code XX. - \X Matches a grapheme. - \Z Matches only at the end of the string. - \\ Matches a literal backslash. - -This module exports the following functions: - match Match a regular expression pattern at the beginning of a string. - fullmatch Match a regular expression pattern against all of a string. - search Search a string for the presence of a pattern. - sub Substitute occurrences of a pattern found in a string using a - template string. - subf Substitute occurrences of a pattern found in a string using a - format string. - subn Same as sub, but also return the number of substitutions made. - subfn Same as subf, but also return the number of substitutions made. - split Split a string by the occurrences of a pattern. VERSION1: will - split at zero-width match; VERSION0: won't split at zero-width - match. - splititer Return an iterator yielding the parts of a split string. - findall Find all occurrences of a pattern in a string. - finditer Return an iterator yielding a match object for each match. - compile Compile a pattern into a Pattern object. - purge Clear the regular expression cache. - escape Backslash all non-alphanumerics or special characters in a - string. - -Most of the functions support a concurrent parameter: if True, the GIL will be -released during matching, allowing other Python threads to run concurrently. If -the string changes during matching, the behaviour is undefined. This parameter -is not needed when working on the builtin (immutable) string classes. - -Some of the functions in this module take flags as optional parameters. Most of -these flags can also be set within an RE: - A a ASCII Make \w, \W, \b, \B, \d, and \D match the - corresponding ASCII character categories. Default - when matching a bytestring. - B b BESTMATCH Find the best fuzzy match (default is first). - D DEBUG Print the parsed pattern. - E e ENHANCEMATCH Attempt to improve the fit after finding the first - fuzzy match. - F f FULLCASE Use full case-folding when performing - case-insensitive matching in Unicode. - I i IGNORECASE Perform case-insensitive matching. - L L LOCALE Make \w, \W, \b, \B, \d, and \D dependent on the - current locale. (One byte per character only.) - M m MULTILINE "^" matches the beginning of lines (after a newline) - as well as the string. "$" matches the end of lines - (before a newline) as well as the end of the string. - P p POSIX Perform POSIX-standard matching (leftmost longest). - R r REVERSE Searches backwards. - S s DOTALL "." matches any character at all, including the - newline. - U u UNICODE Make \w, \W, \b, \B, \d, and \D dependent on the - Unicode locale. Default when matching a Unicode - string. - V0 V0 VERSION0 Turn on the old legacy behaviour. - V1 V1 VERSION1 Turn on the new enhanced behaviour. This flag - includes the FULLCASE flag. - W w WORD Make \b and \B work with default Unicode word breaks - and make ".", "^" and "$" work with Unicode line - breaks. - X x VERBOSE Ignore whitespace and comments for nicer looking REs. - -This module also defines an exception 'error'. - -""" - -# Public symbols. -__all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match", - "purge", "search", "split", "splititer", "sub", "subf", "subfn", "subn", - "template", "Scanner", "A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", - "ENHANCEMATCH", "S", "DOTALL", "F", "FULLCASE", "I", "IGNORECASE", "L", - "LOCALE", "M", "MULTILINE", "P", "POSIX", "R", "REVERSE", "T", "TEMPLATE", - "U", "UNICODE", "V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", - "WORD", "error", "Regex"] - -__version__ = "2.4.113" - -# -------------------------------------------------------------------- -# Public interface. - -def match(pattern, string, flags=0, pos=None, endpos=None, partial=False, - concurrent=None, **kwargs): - """Try to apply the pattern at the start of the string, returning a match - object, or None if no match was found.""" - return _compile(pattern, flags, kwargs).match(string, pos, endpos, - concurrent, partial) - -def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False, - concurrent=None, **kwargs): - """Try to apply the pattern against all of the string, returning a match - object, or None if no match was found.""" - return _compile(pattern, flags, kwargs).fullmatch(string, pos, endpos, - concurrent, partial) - -def search(pattern, string, flags=0, pos=None, endpos=None, partial=False, - concurrent=None, **kwargs): - """Search through string looking for a match to the pattern, returning a - match object, or None if no match was found.""" - return _compile(pattern, flags, kwargs).search(string, pos, endpos, - concurrent, partial) - -def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, - concurrent=None, **kwargs): - """Return the string obtained by replacing the leftmost (or rightmost with a - reverse pattern) non-overlapping occurrences of the pattern in string by the - replacement repl. repl can be either a string or a callable; if a string, - backslash escapes in it are processed; if a callable, it's passed the match - object and must return a replacement string to be used.""" - return _compile(pattern, flags, kwargs).sub(repl, string, count, pos, - endpos, concurrent) - -def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None, - concurrent=None, **kwargs): - """Return the string obtained by replacing the leftmost (or rightmost with a - reverse pattern) non-overlapping occurrences of the pattern in string by the - replacement format. format can be either a string or a callable; if a string, - it's treated as a format string; if a callable, it's passed the match object - and must return a replacement string to be used.""" - return _compile(pattern, flags, kwargs).subf(format, string, count, pos, - endpos, concurrent) - -def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, - concurrent=None, **kwargs): - """Return a 2-tuple containing (new_string, number). new_string is the string - obtained by replacing the leftmost (or rightmost with a reverse pattern) - non-overlapping occurrences of the pattern in the source string by the - replacement repl. number is the number of substitutions that were made. repl - can be either a string or a callable; if a string, backslash escapes in it - are processed; if a callable, it's passed the match object and must return a - replacement string to be used.""" - return _compile(pattern, flags, kwargs).subn(repl, string, count, pos, - endpos, concurrent) - -def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None, - concurrent=None, **kwargs): - """Return a 2-tuple containing (new_string, number). new_string is the string - obtained by replacing the leftmost (or rightmost with a reverse pattern) - non-overlapping occurrences of the pattern in the source string by the - replacement format. number is the number of substitutions that were made. format - can be either a string or a callable; if a string, it's treated as a format - string; if a callable, it's passed the match object and must return a - replacement string to be used.""" - return _compile(pattern, flags, kwargs).subfn(format, string, count, pos, - endpos, concurrent) - -def split(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs): - """Split the source string by the occurrences of the pattern, returning a - list containing the resulting substrings. If capturing parentheses are used - in pattern, then the text of all groups in the pattern are also returned as - part of the resulting list. If maxsplit is nonzero, at most maxsplit splits - occur, and the remainder of the string is returned as the final element of - the list.""" - return _compile(pattern, flags, kwargs).split(string, maxsplit, concurrent) - -def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs): - "Return an iterator yielding the parts of a split string." - return _compile(pattern, flags, kwargs).splititer(string, maxsplit, - concurrent) - -def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, - concurrent=None, **kwargs): - """Return a list of all matches in the string. The matches may be overlapped - if overlapped is True. If one or more groups are present in the pattern, - return a list of groups; this will be a list of tuples if the pattern has - more than one group. Empty matches are included in the result.""" - return _compile(pattern, flags, kwargs).findall(string, pos, endpos, - overlapped, concurrent) - -def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, - partial=False, concurrent=None, **kwargs): - """Return an iterator over all matches in the string. The matches may be - overlapped if overlapped is True. For each match, the iterator returns a - match object. Empty matches are included in the result.""" - return _compile(pattern, flags, kwargs).finditer(string, pos, endpos, - overlapped, concurrent, partial) - -def compile(pattern, flags=0, **kwargs): - "Compile a regular expression pattern, returning a pattern object." - return _compile(pattern, flags, kwargs) - -def purge(): - "Clear the regular expression cache" - _cache.clear() - _locale_sensitive.clear() - -def template(pattern, flags=0): - "Compile a template pattern, returning a pattern object." - return _compile(pattern, flags | TEMPLATE) - -def escape(pattern, special_only=False): - "Escape all non-alphanumeric characters or special characters in pattern." - s = [] - if special_only: - for c in pattern: - if c in _METACHARS: - s.append("\\") - s.append(c) - elif c == "\x00": - s.append("\\000") - else: - s.append(c) - else: - for c in pattern: - if c in _ALNUM: - s.append(c) - elif c == "\x00": - s.append("\\000") - else: - s.append("\\") - s.append(c) - - return pattern[ : 0].join(s) - -# -------------------------------------------------------------------- -# Internals. - -from . import _regex_core -from calibre.constants import plugins -_regex = plugins['_regex'][0] -from threading import RLock as _RLock -from locale import getlocale as _getlocale -from ._regex_core import * -from ._regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError, - _UnscopedFlagSet, _check_group_features, _compile_firstset, - _compile_replacement, _flatten_code, _fold_case, _get_required_string, - _parse_pattern, _shrink_cache) -from ._regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source as - _Source, Fuzzy as _Fuzzy) - -# Version 0 is the old behaviour, compatible with the original 're' module. -# Version 1 is the new behaviour, which differs slightly. - -DEFAULT_VERSION = VERSION0 - -_METACHARS = frozenset("()[]{}?*+|^$\\.") - -_regex_core.DEFAULT_VERSION = DEFAULT_VERSION - -# Caches for the patterns and replacements. -_cache = {} -_cache_lock = _RLock() -_named_args = {} -_replacement_cache = {} -_locale_sensitive = {} - -# Maximum size of the cache. -_MAXCACHE = 500 -_MAXREPCACHE = 500 - -def _compile(pattern, flags=0, kwargs={}): - "Compiles a regular expression to a PatternObject." - - # We won't bother to cache the pattern if we're debugging. - debugging = (flags & DEBUG) != 0 - - # What locale is this pattern using? - locale_key = (type(pattern), pattern) - if _locale_sensitive.get(locale_key, True) or (flags & LOCALE) != 0: - # This pattern is, or might be, locale-sensitive. - pattern_locale = _getlocale()[1] - else: - # This pattern is definitely not locale-sensitive. - pattern_locale = None - - if not debugging: - try: - # Do we know what keyword arguments are needed? - args_key = pattern, type(pattern), flags - args_needed = _named_args[args_key] - - # Are we being provided with its required keyword arguments? - args_supplied = set() - if args_needed: - for k, v in args_needed: - try: - args_supplied.add((k, frozenset(kwargs[k]))) - except KeyError: - raise error("missing named list: {!r}".format(k)) - - args_supplied = frozenset(args_supplied) - - # Have we already seen this regular expression and named list? - pattern_key = (pattern, type(pattern), flags, args_supplied, - DEFAULT_VERSION, pattern_locale) - return _cache[pattern_key] - except KeyError: - # It's a new pattern, or new named list for a known pattern. - pass - - # Guess the encoding from the class of the pattern string. - if isinstance(pattern, unicode): - guess_encoding = UNICODE - elif isinstance(pattern, str): - guess_encoding = ASCII - elif isinstance(pattern, _pattern_type): - if flags: - raise ValueError("cannot process flags argument with a compiled pattern") - - return pattern - else: - raise TypeError("first argument must be a string or compiled pattern") - - # Set the default version in the core code in case it has been changed. - _regex_core.DEFAULT_VERSION = DEFAULT_VERSION - - global_flags = flags - - while True: - caught_exception = None - try: - source = _Source(pattern) - info = _Info(global_flags, source.char_type, kwargs) - info.guess_encoding = guess_encoding - source.ignore_space = bool(info.flags & VERBOSE) - parsed = _parse_pattern(source, info) - break - except _UnscopedFlagSet: - # Remember the global flags for the next attempt. - global_flags = info.global_flags - except error, e: - caught_exception = e - - if caught_exception: - raise error(caught_exception.msg, caught_exception.pattern, - caught_exception.pos) - - if not source.at_end(): - raise error("unbalanced parenthesis", pattern, source.pos) - - # Check the global flags for conflicts. - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - if version not in (0, VERSION0, VERSION1): - raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") - - if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE): - raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible") - - if not (info.flags & _ALL_ENCODINGS): - if isinstance(pattern, unicode): - info.flags |= UNICODE - else: - info.flags |= ASCII - - reverse = bool(info.flags & REVERSE) - fuzzy = isinstance(parsed, _Fuzzy) - - # Remember whether this pattern as an inline locale flag. - _locale_sensitive[locale_key] = info.inline_locale - - # Fix the group references. - caught_exception = None - try: - parsed.fix_groups(pattern, reverse, False) - except error, e: - caught_exception = e - - if caught_exception: - raise error(caught_exception.msg, caught_exception.pattern, - caught_exception.pos) - - # Should we print the parsed pattern? - if flags & DEBUG: - parsed.dump(indent=0, reverse=reverse) - - # Optimise the parsed pattern. - parsed = parsed.optimise(info, reverse) - parsed = parsed.pack_characters(info) - - # Get the required string. - req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags) - - # Build the named lists. - named_lists = {} - named_list_indexes = [None] * len(info.named_lists_used) - args_needed = set() - for key, index in info.named_lists_used.items(): - name, case_flags = key - values = frozenset(kwargs[name]) - if case_flags: - items = frozenset(_fold_case(info, v) for v in values) - else: - items = values - named_lists[name] = values - named_list_indexes[index] = items - args_needed.add((name, values)) - - # Check the features of the groups. - _check_group_features(info, parsed) - - # Compile the parsed pattern. The result is a list of tuples. - code = parsed.compile(reverse) - - # Is there a group call to the pattern as a whole? - key = (0, reverse, fuzzy) - ref = info.call_refs.get(key) - if ref is not None: - code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )] - - # Add the final 'success' opcode. - code += [(_OP.SUCCESS, )] - - # Compile the additional copies of the groups that we need. - for group, rev, fuz in info.additional_groups: - code += group.compile(rev, fuz) - - # Flatten the code into a list of ints. - code = _flatten_code(code) - - if not parsed.has_simple_start(): - # Get the first set, if possible. - try: - fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) - fs_code = _flatten_code(fs_code) - code = fs_code + code - except _FirstSetError: - pass - - # The named capture groups. - index_group = dict((v, n) for n, v in info.group_index.items()) - - # Create the PatternObject. - # - # Local flags like IGNORECASE affect the code generation, but aren't needed - # by the PatternObject itself. Conversely, global flags like LOCALE _don't_ - # affect the code generation but _are_ needed by the PatternObject. - compiled_pattern = _regex.compile(pattern, info.flags | version, code, - info.group_index, index_group, named_lists, named_list_indexes, - req_offset, req_chars, req_flags, info.group_count) - - # Do we need to reduce the size of the cache? - if len(_cache) >= _MAXCACHE: - _cache_lock.acquire() - try: - _shrink_cache(_cache, _named_args, _locale_sensitive, _MAXCACHE) - finally: - _cache_lock.release() - - if not debugging: - if (info.flags & LOCALE) == 0: - pattern_locale = None - - args_needed = frozenset(args_needed) - - # Store this regular expression and named list. - pattern_key = (pattern, type(pattern), flags, args_needed, - DEFAULT_VERSION, pattern_locale) - _cache[pattern_key] = compiled_pattern - - # Store what keyword arguments are needed. - _named_args[args_key] = args_needed - - return compiled_pattern - -def _compile_replacement_helper(pattern, template): - "Compiles a replacement template." - # This function is called by the _regex module. - - # Have we seen this before? - key = pattern.pattern, pattern.flags, template - compiled = _replacement_cache.get(key) - if compiled is not None: - return compiled - - if len(_replacement_cache) >= _MAXREPCACHE: - _replacement_cache.clear() - - is_unicode = isinstance(template, unicode) - source = _Source(template) - if is_unicode: - def make_string(char_codes): - return u"".join(unichr(c) for c in char_codes) - else: - def make_string(char_codes): - return "".join(chr(c) for c in char_codes) - - compiled = [] - literal = [] - while True: - ch = source.get() - if not ch: - break - if ch == "\\": - # '_compile_replacement' will return either an int group reference - # or a string literal. It returns items (plural) in order to handle - # a 2-character literal (an invalid escape sequence). - is_group, items = _compile_replacement(source, pattern, is_unicode) - if is_group: - # It's a group, so first flush the literal. - if literal: - compiled.append(make_string(literal)) - literal = [] - compiled.extend(items) - else: - literal.extend(items) - else: - literal.append(ord(ch)) - - # Flush the literal. - if literal: - compiled.append(make_string(literal)) - - _replacement_cache[key] = compiled - - return compiled - -# We define _pattern_type here after all the support objects have been defined. -_pattern_type = type(_compile("", 0, {})) - -# We'll define an alias for the 'compile' function so that the repr of a -# pattern object is eval-able. -Regex = compile - -# Register myself for pickling. -import copy_reg as _copy_reg - -def _pickle(pattern): - return _regex.compile, pattern._pickled_data - -_copy_reg.pickle(_pattern_type, _pickle) - -if not hasattr(str, "format"): - # Strings don't have the .format method (below Python 2.6). - while True: - _start = __doc__.find(" subf") - if _start < 0: - break - - _end = __doc__.find("\n", _start) + 1 - while __doc__.startswith(" ", _end): - _end = __doc__.find("\n", _end) + 1 - - __doc__ = __doc__[ : _start] + __doc__[_end : ] - - __all__ = [_name for _name in __all__ if not _name.startswith("subf")] - - del _start, _end - - del subf, subfn diff --git a/src/regex/_regex.c b/src/regex/_regex.c deleted file mode 100644 index bd77820d3f..0000000000 --- a/src/regex/_regex.c +++ /dev/null @@ -1,25183 +0,0 @@ -/* Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * partial history: - * 1999-10-24 fl created (based on existing template matcher code) - * 2000-03-06 fl first alpha, sort of - * 2000-08-01 fl fixes for 1.6b1 - * 2000-08-07 fl use PyOS_CheckStack() if available - * 2000-09-20 fl added expand method - * 2001-03-20 fl lots of fixes for 2.1b2 - * 2001-04-15 fl export copyright as Python attribute, not global - * 2001-04-28 fl added __copy__ methods (work in progress) - * 2001-05-14 fl fixes for 1.5.2 compatibility - * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) - * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) - * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 - * 2001-10-21 fl added sub/subn primitive - * 2001-10-24 fl added finditer primitive (for 2.2 only) - * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) - * 2002-11-09 fl fixed empty sub/subn return type - * 2003-04-18 mvl fully support 4-byte codes - * 2003-10-17 gn implemented non recursive scheme - * 2009-07-26 mrab completely re-designed matcher code - * 2011-11-18 mrab added support for PEP 393 strings - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * This version of the SRE library can be redistributed under CNRI's - * Python 1.6 license. For any other use, please contact Secret Labs - * AB (info@pythonware.com). - * - * Portions of this engine have been developed in cooperation with - * CNRI. Hewlett-Packard provided funding for 1.6 integration and - * other compatibility work. - */ - -/* #define VERBOSE */ - -#if defined(VERBOSE) -#define TRACE(X) printf X; -#else -#define TRACE(X) -#endif - -#include "Python.h" -#include "structmember.h" /* offsetof */ -#include -#include "_regex.h" -#include "pyport.h" -#include "pythread.h" - -#if PY_VERSION_HEX < 0x02060000 -#if SIZEOF_SIZE_T == SIZEOF_LONG_LONG -#define T_PYSSIZET T_LONGLONG -#elif SIZEOF_SIZE_T == SIZEOF_LONG -#define T_PYSSIZET T_LONG -#else -#error size_t is the same size as neither LONG nor LONGLONG -#endif - -#endif -typedef unsigned char Py_UCS1; -typedef unsigned short Py_UCS2; - -typedef RE_UINT32 RE_CODE; - -/* Properties in the General Category. */ -#define RE_PROP_GC_CN ((RE_PROP_GC << 16) | RE_PROP_CN) -#define RE_PROP_GC_LU ((RE_PROP_GC << 16) | RE_PROP_LU) -#define RE_PROP_GC_LL ((RE_PROP_GC << 16) | RE_PROP_LL) -#define RE_PROP_GC_LT ((RE_PROP_GC << 16) | RE_PROP_LT) -#define RE_PROP_GC_P ((RE_PROP_GC << 16) | RE_PROP_P) - -/* Unlimited repeat count. */ -#define RE_UNLIMITED (~(RE_CODE)0) - -/* The status of a . */ -typedef RE_UINT32 RE_STATUS_T; - -/* Whether to match concurrently, i.e. release the GIL while matching. */ -#define RE_CONC_NO 0 -#define RE_CONC_YES 1 -#define RE_CONC_DEFAULT 2 - -/* The side that could truncate in a partial match. - * - * The values RE_PARTIAL_LEFT and RE_PARTIAL_RIGHT are also used as array - * indexes, so they need to be 0 and 1. - */ -#define RE_PARTIAL_NONE -1 -#define RE_PARTIAL_LEFT 0 -#define RE_PARTIAL_RIGHT 1 - -/* Flags for the kind of 'sub' call: 'sub', 'subn', 'subf', 'subfn'. */ -#define RE_SUB 0x0 -#define RE_SUBN 0x1 -#if PY_VERSION_HEX >= 0x02060000 -#define RE_SUBF 0x2 -#endif - -/* The name of this module, minus the leading underscore. */ -#define RE_MODULE "regex" - -/* Error codes. */ -#define RE_ERROR_INITIALISING 2 /* Initialising object. */ -#define RE_ERROR_SUCCESS 1 /* Successful match. */ -#define RE_ERROR_FAILURE 0 /* Unsuccessful match. */ -#define RE_ERROR_ILLEGAL -1 /* Illegal code. */ -#define RE_ERROR_INTERNAL -2 /* Internal error. */ -#define RE_ERROR_CONCURRENT -3 /* "concurrent" invalid. */ -#define RE_ERROR_MEMORY -4 /* Out of memory. */ -#define RE_ERROR_INTERRUPTED -5 /* Signal handler raised exception. */ -#define RE_ERROR_REPLACEMENT -6 /* Invalid replacement string. */ -#define RE_ERROR_INVALID_GROUP_REF -7 /* Invalid group reference. */ -#define RE_ERROR_GROUP_INDEX_TYPE -8 /* Group index type error. */ -#define RE_ERROR_NO_SUCH_GROUP -9 /* No such group. */ -#define RE_ERROR_INDEX -10 /* String index. */ -#define RE_ERROR_BACKTRACKING -11 /* Too much backtracking. */ -#define RE_ERROR_NOT_STRING -12 /* Not a string. */ -#define RE_ERROR_NOT_UNICODE -13 /* Not a Unicode string. */ -#define RE_ERROR_PARTIAL -15 /* Partial match. */ - -/* The number of backtrack entries per allocated block. */ -#define RE_BACKTRACK_BLOCK_SIZE 64 - -/* The maximum number of backtrack entries to allocate. */ -#define RE_MAX_BACKTRACK_ALLOC (1024 * 1024) - -/* The number of atomic entries per allocated block. */ -#define RE_ATOMIC_BLOCK_SIZE 64 - -/* The initial maximum capacity of the guard block. */ -#define RE_INIT_GUARDS_BLOCK_SIZE 16 - -/* The initial maximum capacity of the node list. */ -#define RE_INIT_NODE_LIST_SIZE 16 - -/* The size increment for various allocation lists. */ -#define RE_LIST_SIZE_INC 16 - -/* The initial maximum capacity of the capture groups. */ -#define RE_INIT_CAPTURE_SIZE 16 - -/* Node bitflags. */ -#define RE_POSITIVE_OP 0x1 -#define RE_ZEROWIDTH_OP 0x2 -#define RE_FUZZY_OP 0x4 -#define RE_REVERSE_OP 0x8 -#define RE_REQUIRED_OP 0x10 - -/* Guards against further matching can occur at the start of the body and the - * tail of a repeat containing a repeat. - */ -#define RE_STATUS_BODY 0x1 -#define RE_STATUS_TAIL 0x2 - -/* Whether a guard is added depends on whether there's a repeat in the body of - * the repeat or a group reference in the body or tail of the repeat. - */ -#define RE_STATUS_NEITHER 0x0 -#define RE_STATUS_REPEAT 0x4 -#define RE_STATUS_LIMITED 0x8 -#define RE_STATUS_REF 0x10 -#define RE_STATUS_VISITED_AG 0x20 -#define RE_STATUS_VISITED_REP 0x40 - -/* Whether a string node has been initialised for fast searching. */ -#define RE_STATUS_FAST_INIT 0x80 - -/* Whether a node us being used. (Additional nodes may be created while the - * pattern is being built. - */ -#define RE_STATUS_USED 0x100 - -/* Whether a node is a string node. */ -#define RE_STATUS_STRING 0x200 - -/* Whether a repeat node is within another repeat. */ -#define RE_STATUS_INNER 0x400 - -/* Various flags stored in a node status member. */ -#define RE_STATUS_SHIFT 11 - -#define RE_STATUS_FUZZY (RE_FUZZY_OP << RE_STATUS_SHIFT) -#define RE_STATUS_REVERSE (RE_REVERSE_OP << RE_STATUS_SHIFT) -#define RE_STATUS_REQUIRED (RE_REQUIRED_OP << RE_STATUS_SHIFT) -#define RE_STATUS_HAS_GROUPS 0x10000 -#define RE_STATUS_HAS_REPEATS 0x20000 - -/* The different error types for fuzzy matching. */ -#define RE_FUZZY_SUB 0 -#define RE_FUZZY_INS 1 -#define RE_FUZZY_DEL 2 -#define RE_FUZZY_ERR 3 -#define RE_FUZZY_COUNT 3 - -/* The various values in a FUZZY node. */ -#define RE_FUZZY_VAL_MAX_BASE 1 -#define RE_FUZZY_VAL_MAX_SUB (RE_FUZZY_VAL_MAX_BASE + RE_FUZZY_SUB) -#define RE_FUZZY_VAL_MAX_INS (RE_FUZZY_VAL_MAX_BASE + RE_FUZZY_INS) -#define RE_FUZZY_VAL_MAX_DEL (RE_FUZZY_VAL_MAX_BASE + RE_FUZZY_DEL) -#define RE_FUZZY_VAL_MAX_ERR (RE_FUZZY_VAL_MAX_BASE + RE_FUZZY_ERR) - -#define RE_FUZZY_VAL_COST_BASE 5 -#define RE_FUZZY_VAL_SUB_COST (RE_FUZZY_VAL_COST_BASE + RE_FUZZY_SUB) -#define RE_FUZZY_VAL_INS_COST (RE_FUZZY_VAL_COST_BASE + RE_FUZZY_INS) -#define RE_FUZZY_VAL_DEL_COST (RE_FUZZY_VAL_COST_BASE + RE_FUZZY_DEL) -#define RE_FUZZY_VAL_MAX_COST (RE_FUZZY_VAL_COST_BASE + RE_FUZZY_ERR) - -/* The various values in an END_FUZZY node. */ -#define RE_FUZZY_VAL_MIN_BASE 1 -#define RE_FUZZY_VAL_MIN_SUB (RE_FUZZY_VAL_MIN_BASE + RE_FUZZY_SUB) -#define RE_FUZZY_VAL_MIN_INS (RE_FUZZY_VAL_MIN_BASE + RE_FUZZY_INS) -#define RE_FUZZY_VAL_MIN_DEL (RE_FUZZY_VAL_MIN_BASE + RE_FUZZY_DEL) -#define RE_FUZZY_VAL_MIN_ERR (RE_FUZZY_VAL_MIN_BASE + RE_FUZZY_ERR) - -/* The maximum number of errors when trying to improve a fuzzy match. */ -#define RE_MAX_ERRORS 10 - -/* The flags which will be set for full Unicode case folding. */ -#define RE_FULL_CASE_FOLDING (RE_FLAG_UNICODE | RE_FLAG_FULLCASE | RE_FLAG_IGNORECASE) - -/* The shortest string prefix for which we'll use a fast string search. */ -#define RE_MIN_FAST_LENGTH 5 - -static char copyright[] = - " RE 2.3.0 Copyright (c) 1997-2002 by Secret Labs AB "; - -/* The exception to raise on error. */ -static PyObject* error_exception; - -/* The dictionary of Unicode properties. */ -static PyObject* property_dict; - -typedef struct RE_State* RE_StatePtr; - -/* Bit-flags for the common character properties supported by locale-sensitive - * matching. - */ -#define RE_LOCALE_ALNUM 0x001 -#define RE_LOCALE_ALPHA 0x002 -#define RE_LOCALE_CNTRL 0x004 -#define RE_LOCALE_DIGIT 0x008 -#define RE_LOCALE_GRAPH 0x010 -#define RE_LOCALE_LOWER 0x020 -#define RE_LOCALE_PRINT 0x040 -#define RE_LOCALE_PUNCT 0x080 -#define RE_LOCALE_SPACE 0x100 -#define RE_LOCALE_UPPER 0x200 - -/* Info about the current locale. - * - * Used by patterns that are locale-sensitive. - */ -typedef struct RE_LocaleInfo { - unsigned short properties[0x100]; - unsigned char uppercase[0x100]; - unsigned char lowercase[0x100]; -} RE_LocaleInfo; - -/* Handlers for ASCII, locale and Unicode. */ -typedef struct RE_EncodingTable { - BOOL (*has_property)(RE_LocaleInfo* locale_info, RE_CODE property, Py_UCS4 - ch); - BOOL (*at_boundary)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_word_start)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_word_end)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_default_boundary)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_default_word_start)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_default_word_end)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_grapheme_boundary)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*is_line_sep)(Py_UCS4 ch); - BOOL (*at_line_start)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_line_end)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*possible_turkic)(RE_LocaleInfo* locale_info, Py_UCS4 ch); - int (*all_cases)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - codepoints); - Py_UCS4 (*simple_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch); - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - folded); - int (*all_turkic_i)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - cases); -} RE_EncodingTable; - -/* Position within the regex and text. */ -typedef struct RE_Position { - struct RE_Node* node; - Py_ssize_t text_pos; -} RE_Position; - -/* Info about fuzzy matching. */ -typedef struct RE_FuzzyInfo { - struct RE_Node* node; - size_t counts[RE_FUZZY_COUNT + 1]; /* Add 1 for total errors. */ - size_t total_cost; -} RE_FuzzyInfo; - -/* Storage for backtrack data. */ -typedef struct RE_BacktrackData { - union { - struct { - size_t capture_change; - BOOL too_few_errors; - } atomic; - struct { - RE_Position position; - } branch; - struct { - RE_FuzzyInfo fuzzy_info; - Py_ssize_t text_pos; - RE_CODE index; - } fuzzy; - struct { - RE_Position position; - size_t count; - struct RE_Node* fuzzy_node; - BOOL too_few_errors; - } fuzzy_insert; - struct { - RE_Position position; - RE_INT8 fuzzy_type; - RE_INT8 step; - } fuzzy_item; - struct { - RE_Position position; - Py_ssize_t string_pos; - RE_INT8 fuzzy_type; - RE_INT8 folded_pos; - RE_INT8 folded_len; - RE_INT8 gfolded_pos; - RE_INT8 gfolded_len; - RE_INT8 step; - } fuzzy_string; - struct { - Py_ssize_t text_pos; - Py_ssize_t current_capture; - RE_CODE private_index; - RE_CODE public_index; - BOOL capture; - } group; - struct { - struct RE_Node* node; - size_t capture_change; - } group_call; - struct { - Py_ssize_t match_pos; - } keep; - struct { - struct RE_Node* node; - size_t capture_change; - BOOL too_few_errors; - BOOL inside; - } lookaround; - struct { - RE_Position position; - Py_ssize_t text_pos; - size_t count; - Py_ssize_t start; - size_t capture_change; - RE_CODE index; - } repeat; - }; - RE_UINT8 op; -} RE_BacktrackData; - -/* Storage for backtrack data is allocated in blocks for speed. */ -typedef struct RE_BacktrackBlock { - RE_BacktrackData items[RE_BACKTRACK_BLOCK_SIZE]; - struct RE_BacktrackBlock* previous; - struct RE_BacktrackBlock* next; - size_t capacity; - size_t count; -} RE_BacktrackBlock; - -/* Storage for atomic data. */ -typedef struct RE_AtomicData { - RE_BacktrackBlock* current_backtrack_block; - size_t backtrack_count; - struct RE_Node* node; - RE_BacktrackData* backtrack; - struct RE_SavedGroups* saved_groups; - struct RE_SavedRepeats* saved_repeats; - struct RE_GroupCallFrame* call_frame; - Py_ssize_t slice_start; - Py_ssize_t slice_end; - Py_ssize_t text_pos; - BOOL is_lookaround; - BOOL has_groups; - BOOL has_repeats; -} RE_AtomicData; - -/* Storage for atomic data is allocated in blocks for speed. */ -typedef struct RE_AtomicBlock { - RE_AtomicData items[RE_ATOMIC_BLOCK_SIZE]; - struct RE_AtomicBlock* previous; - struct RE_AtomicBlock* next; - size_t capacity; - size_t count; -} RE_AtomicBlock; - -/* Storage for saved groups. */ -typedef struct RE_SavedGroups { - struct RE_SavedGroups* previous; - struct RE_SavedGroups* next; - struct RE_GroupSpan* spans; - size_t* counts; -} RE_SavedGroups; - -/* Storage for info around a recursive by 'basic'match'. */ -typedef struct RE_Info { - RE_BacktrackBlock* current_backtrack_block; - size_t backtrack_count; - RE_SavedGroups* current_saved_groups; - struct RE_GroupCallFrame* current_group_call_frame; - BOOL must_advance; -} RE_Info; - -/* Storage for the next node. */ -typedef struct RE_NextNode { - struct RE_Node* node; - struct RE_Node* test; - struct RE_Node* match_next; - Py_ssize_t match_step; -} RE_NextNode; - -/* A pattern node. */ -typedef struct RE_Node { - RE_NextNode next_1; - union { - struct { - RE_NextNode next_2; - } nonstring; - struct { - /* Used only if (node->status & RE_STATUS_STRING) is true. */ - Py_ssize_t* bad_character_offset; - Py_ssize_t* good_suffix_offset; - } string; - }; - Py_ssize_t step; - size_t value_count; - RE_CODE* values; - RE_STATUS_T status; - RE_UINT8 op; - BOOL match; -} RE_Node; - -/* Info about a group's span. */ -typedef struct RE_GroupSpan { - Py_ssize_t start; - Py_ssize_t end; -} RE_GroupSpan; - -/* Span of a guard (inclusive range). */ -typedef struct RE_GuardSpan { - Py_ssize_t low; - Py_ssize_t high; - BOOL protect; -} RE_GuardSpan; - -/* Spans guarded against further matching. */ -typedef struct RE_GuardList { - size_t capacity; - size_t count; - RE_GuardSpan* spans; - Py_ssize_t last_text_pos; - size_t last_low; -} RE_GuardList; - -/* Info about a group. */ -typedef struct RE_GroupData { - RE_GroupSpan span; - size_t capture_count; - size_t capture_capacity; - Py_ssize_t current_capture; - RE_GroupSpan* captures; -} RE_GroupData; - -/* Info about a repeat. */ -typedef struct RE_RepeatData { - RE_GuardList body_guard_list; - RE_GuardList tail_guard_list; - size_t count; - Py_ssize_t start; - size_t capture_change; -} RE_RepeatData; - -/* Storage for saved repeats. */ -typedef struct RE_SavedRepeats { - struct RE_SavedRepeats* previous; - struct RE_SavedRepeats* next; - RE_RepeatData* repeats; -} RE_SavedRepeats; - -/* Guards for fuzzy sections. */ -typedef struct RE_FuzzyGuards { - RE_GuardList body_guard_list; - RE_GuardList tail_guard_list; -} RE_FuzzyGuards; - -/* Info about a capture group. */ -typedef struct RE_GroupInfo { - Py_ssize_t end_index; - RE_Node* node; - BOOL referenced; - BOOL has_name; -} RE_GroupInfo; - -/* Info about a call_ref. */ -typedef struct RE_CallRefInfo { - RE_Node* node; - BOOL defined; - BOOL used; -} RE_CallRefInfo; - -/* Info about a repeat. */ -typedef struct RE_RepeatInfo { - RE_STATUS_T status; -} RE_RepeatInfo; - -/* Stack frame for a group call. */ -typedef struct RE_GroupCallFrame { - struct RE_GroupCallFrame* previous; - struct RE_GroupCallFrame* next; - RE_Node* node; - RE_GroupData* groups; - RE_RepeatData* repeats; -} RE_GroupCallFrame; - -/* Info about a string argument. */ -typedef struct RE_StringInfo { -#if PY_VERSION_HEX >= 0x02060000 - Py_buffer view; /* View of the string if it's a buffer object. */ -#endif - void* characters; /* Pointer to the characters of the string. */ - Py_ssize_t length; /* Length of the string. */ - Py_ssize_t charsize; /* Size of the characters in the string. */ - BOOL is_unicode; /* Whether the string is Unicode. */ - BOOL should_release; /* Whether the buffer should be released. */ -} RE_StringInfo; - -/* Info about where the next match was found, starting from a certain search - * position. This is used when a pattern starts with a BRANCH. - */ -#define MAX_SEARCH_POSITIONS 7 - -/* Info about a search position. */ -typedef struct { - Py_ssize_t start_pos; - Py_ssize_t match_pos; -} RE_SearchPosition; - -/* The state object used during matching. */ -typedef struct RE_State { - struct PatternObject* pattern; /* Parent PatternObject. */ - /* Info about the string being matched. */ - PyObject* string; -#if PY_VERSION_HEX >= 0x02060000 - Py_buffer view; /* View of the string if it's a buffer object. */ -#endif - Py_ssize_t charsize; - void* text; - Py_ssize_t text_length; - /* The slice of the string being searched. */ - Py_ssize_t slice_start; - Py_ssize_t slice_end; - /* Info about the capture groups. */ - RE_GroupData* groups; - Py_ssize_t lastindex; - Py_ssize_t lastgroup; - /* Info about the repeats. */ - RE_RepeatData* repeats; - Py_ssize_t search_anchor; /* Where the last match finished. */ - Py_ssize_t match_pos; /* The start position of the match. */ - Py_ssize_t text_pos; /* The current position of the match. */ - Py_ssize_t final_newline; /* The index of newline at end of string, or -1. */ - Py_ssize_t final_line_sep; /* The index of line separator at end of string, or -1. */ - /* Storage for backtrack info. */ - RE_BacktrackBlock backtrack_block; - RE_BacktrackBlock* current_backtrack_block; - Py_ssize_t backtrack_allocated; - RE_BacktrackData* backtrack; - RE_AtomicBlock* current_atomic_block; - /* Storage for saved capture groups. */ - RE_SavedGroups* first_saved_groups; - RE_SavedGroups* current_saved_groups; - RE_SavedRepeats* first_saved_repeats; - RE_SavedRepeats* current_saved_repeats; - /* Info about the best POSIX match (leftmost longest). */ - Py_ssize_t best_match_pos; - Py_ssize_t best_text_pos; - RE_GroupData* best_match_groups; - /* Miscellaneous. */ - Py_ssize_t min_width; /* The minimum width of the string to match (assuming it's not a fuzzy pattern). */ - RE_EncodingTable* encoding; /* The 'encoding' of the string being searched. */ - RE_LocaleInfo* locale_info; /* Info about the locale, if needed. */ - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - void* (*point_to)(void* text, Py_ssize_t pos); - PyThread_type_lock lock; /* A lock for accessing the state across threads. */ - RE_FuzzyInfo fuzzy_info; /* Info about fuzzy matching. */ - size_t total_fuzzy_counts[RE_FUZZY_COUNT]; /* Totals for fuzzy matching. */ - size_t best_fuzzy_counts[RE_FUZZY_COUNT]; /* Best totals for fuzzy matching. */ - RE_FuzzyGuards* fuzzy_guards; /* The guards for a fuzzy match. */ - size_t total_errors; /* The total number of errors of a fuzzy match. */ - size_t max_errors; /* The maximum permitted number of errors. */ - size_t fewest_errors; /* The fewest errors so far of an enhanced fuzzy match. */ - /* The group call stack. */ - RE_GroupCallFrame* first_group_call_frame; - RE_GroupCallFrame* current_group_call_frame; - RE_GuardList* group_call_guard_list; - RE_SearchPosition search_positions[MAX_SEARCH_POSITIONS]; /* Where the search matches next. */ - size_t capture_change; /* Incremented every time a captive group changes. */ - Py_ssize_t req_pos; /* The position where the required string matched. */ - Py_ssize_t req_end; /* The end position where the required string matched. */ - int partial_side; /* The side that could truncate in a partial match. */ - RE_UINT16 iterations; /* The number of iterations the matching engine has performed since checking for KeyboardInterrupt. */ - BOOL is_unicode; /* Whether the string to be matched is Unicode. */ - BOOL should_release; /* Whether the buffer should be released. */ - BOOL overlapped; /* Whether the matches can be overlapped. */ - BOOL reverse; /* Whether it's a reverse pattern. */ - BOOL visible_captures; /* Whether the 'captures' method will be visible. */ - BOOL version_0; /* Whether to perform version_0 behaviour (same as re module). */ - BOOL must_advance; /* Whether the end of the match must advance past its start. */ - BOOL is_multithreaded; /* Whether to release the GIL while matching. */ - BOOL too_few_errors; /* Whether there were too few fuzzy errors. */ - BOOL match_all; /* Whether to match all of the string ('fullmatch'). */ - BOOL found_match; /* Whether a POSIX match has been found. */ -} RE_State; - -/* Storage for the regex state and thread state. - * - * Scanner objects can sometimes be shared across threads, which means that - * their RE_State structs are also shared. This isn't safe when the GIL is - * released, so in such instances we have a lock (mutex) in the RE_State struct - * to protect it during matching. We also need a thread-safe place to store the - * thread state when releasing the GIL. - */ -typedef struct RE_SafeState { - RE_State* re_state; - PyThreadState* thread_state; -} RE_SafeState; - -/* The PatternObject created from a regular expression. */ -typedef struct PatternObject { - PyObject_HEAD - PyObject* pattern; /* Pattern source (or None). */ - Py_ssize_t flags; /* Flags used when compiling pattern source. */ - PyObject* packed_code_list; - PyObject* weakreflist; /* List of weak references */ - /* Nodes into which the regular expression is compiled. */ - RE_Node* start_node; - RE_Node* start_test; - size_t true_group_count; /* The true number of capture groups. */ - size_t public_group_count; /* The number of public capture groups. */ - size_t repeat_count; /* The number of repeats. */ - Py_ssize_t group_end_index; /* The number of group closures. */ - PyObject* groupindex; - PyObject* indexgroup; - PyObject* named_lists; - size_t named_lists_count; - PyObject** partial_named_lists[2]; - PyObject* named_list_indexes; - /* Storage for the pattern nodes. */ - size_t node_capacity; - size_t node_count; - RE_Node** node_list; - /* Info about the capture groups. */ - size_t group_info_capacity; - RE_GroupInfo* group_info; - /* Info about the call_refs. */ - size_t call_ref_info_capacity; - size_t call_ref_info_count; - RE_CallRefInfo* call_ref_info; - Py_ssize_t pattern_call_ref; - /* Info about the repeats. */ - size_t repeat_info_capacity; - RE_RepeatInfo* repeat_info; - Py_ssize_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ - RE_EncodingTable* encoding; /* Encoding handlers. */ - RE_LocaleInfo* locale_info; /* Info about the locale, if needed. */ - RE_GroupData* groups_storage; - RE_RepeatData* repeats_storage; - size_t fuzzy_count; /* The number of fuzzy sections. */ - /* Additional info. */ - Py_ssize_t req_offset; /* The offset to the required string. */ - PyObject* required_chars; - Py_ssize_t req_flags; - RE_Node* req_string; /* The required string. */ - BOOL is_fuzzy; /* Whether it's a fuzzy pattern. */ - BOOL do_search_start; /* Whether to do an initial search. */ - BOOL recursive; /* Whether the entire pattern is recursive. */ -} PatternObject; - -/* The MatchObject created when a match is found. */ -typedef struct MatchObject { - PyObject_HEAD - PyObject* string; /* Link to the target string or NULL if detached. */ - PyObject* substring; /* Link to (a substring of) the target string. */ - Py_ssize_t substring_offset; /* Offset into the target string. */ - PatternObject* pattern; /* Link to the regex (pattern) object. */ - Py_ssize_t pos; /* Start of current slice. */ - Py_ssize_t endpos; /* End of current slice. */ - Py_ssize_t match_start; /* Start of matched slice. */ - Py_ssize_t match_end; /* End of matched slice. */ - Py_ssize_t lastindex; /* Last group seen by the engine (-1 if none). */ - Py_ssize_t lastgroup; /* Last named group seen by the engine (-1 if none). */ - size_t group_count; /* The number of groups. */ - RE_GroupData* groups; /* The capture groups. */ - PyObject* regs; - size_t fuzzy_counts[RE_FUZZY_COUNT]; - BOOL partial; /* Whether it's a partial match. */ -} MatchObject; - -/* The ScannerObject. */ -typedef struct ScannerObject { - PyObject_HEAD - PatternObject* pattern; - RE_State state; - int status; -} ScannerObject; - -/* The SplitterObject. */ -typedef struct SplitterObject { - PyObject_HEAD - PatternObject* pattern; - RE_State state; - Py_ssize_t maxsplit; - Py_ssize_t last_pos; - Py_ssize_t split_count; - Py_ssize_t index; - int status; -} SplitterObject; -#if PY_VERSION_HEX >= 0x02060000 - -/* The CaptureObject. */ -typedef struct CaptureObject { - PyObject_HEAD - Py_ssize_t group_index; - MatchObject** match_indirect; -} CaptureObject; -#endif - -/* Info used when compiling a pattern to nodes. */ -typedef struct RE_CompileArgs { - RE_CODE* code; /* The start of the compiled pattern. */ - RE_CODE* end_code; /* The end of the compiled pattern. */ - PatternObject* pattern; /* The pattern object. */ - Py_ssize_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ - RE_Node* start; /* The start node. */ - RE_Node* end; /* The end node. */ - size_t repeat_depth; /* The nesting depth of the repeat. */ - BOOL forward; /* Whether it's a forward (not reverse) pattern. */ - BOOL visible_captures; /* Whether all of the captures will be visible. */ - BOOL has_captures; /* Whether the pattern has capture groups. */ - BOOL is_fuzzy; /* Whether the pattern (or some part of it) is fuzzy. */ - BOOL within_fuzzy; /* Whether the subpattern is within a fuzzy section. */ - BOOL has_groups; /* Whether the subpattern contains captures. */ - BOOL has_repeats; /* Whether the subpattern contains repeats. */ -} RE_CompileArgs; - -/* The string slices which will be concatenated to make the result string of - * the 'sub' method. - * - * This allows us to avoid creating a list of slices if there of fewer than 2 - * of them. Empty strings aren't recorded, so if 'list' and 'item' are both - * NULL then the result is an empty string. - */ -typedef struct RE_JoinInfo { - PyObject* list; /* The list of slices if there are more than 2 of them. */ - PyObject* item; /* The slice if there is only 1 of them. */ - BOOL reversed; /* Whether the slices have been found in reverse order. */ - BOOL is_unicode; /* Whether the string is Unicode. */ -} RE_JoinInfo; - -/* Info about fuzzy matching. */ -typedef struct { - RE_Node* new_node; - Py_ssize_t new_text_pos; - Py_ssize_t limit; - Py_ssize_t new_string_pos; - int step; - int new_folded_pos; - int folded_len; - int new_gfolded_pos; - int new_group_pos; - int fuzzy_type; - BOOL permit_insertion; -} RE_FuzzyData; - -typedef struct RE_BestEntry { - Py_ssize_t match_pos; - Py_ssize_t text_pos; -} RE_BestEntry; - -typedef struct RE_BestList { - size_t capacity; - size_t count; - RE_BestEntry* entries; -} RE_BestList; - -/* A stack of guard checks. */ -typedef struct RE_Check { - RE_Node* node; - RE_STATUS_T result; -} RE_Check; - -typedef struct RE_CheckStack { - Py_ssize_t capacity; - Py_ssize_t count; - RE_Check* items; -} RE_CheckStack; - -/* A stack of nodes. */ -typedef struct RE_NodeStack { - Py_ssize_t capacity; - Py_ssize_t count; - RE_Node** items; -} RE_NodeStack; - -/* Function types for getting info from a MatchObject. */ -typedef PyObject* (*RE_GetByIndexFunc)(MatchObject* self, Py_ssize_t index); - -/* Returns the magnitude of a 'Py_ssize_t' value. */ -Py_LOCAL_INLINE(Py_ssize_t) abs_ssize_t(Py_ssize_t x) { - return x >= 0 ? x : -x; -} - -/* Returns the minimum of 2 'Py_ssize_t' values. */ -Py_LOCAL_INLINE(Py_ssize_t) min_ssize_t(Py_ssize_t x, Py_ssize_t y) { - return x <= y ? x : y; -} - -/* Returns the maximum of 2 'Py_ssize_t' values. */ -Py_LOCAL_INLINE(Py_ssize_t) max_ssize_t(Py_ssize_t x, Py_ssize_t y) { - return x >= y ? x : y; -} - -/* Returns the minimum of 2 'size_t' values. */ -Py_LOCAL_INLINE(size_t) min_size_t(size_t x, size_t y) { - return x <= y ? x : y; -} - -/* Returns the maximum of 2 'size_t' values. */ -Py_LOCAL_INLINE(size_t) max_size_t(size_t x, size_t y) { - return x >= y ? x : y; -} - -/* Returns the 'maximum' of 2 RE_STATUS_T values. */ -Py_LOCAL_INLINE(RE_STATUS_T) max_status_2(RE_STATUS_T x, RE_STATUS_T y) { - return x >= y ? x : y; -} - -/* Returns the 'maximum' of 3 RE_STATUS_T values. */ -Py_LOCAL_INLINE(RE_STATUS_T) max_status_3(RE_STATUS_T x, RE_STATUS_T y, - RE_STATUS_T z) { - return max_status_2(x, max_status_2(y, z)); -} - -/* Returns the 'maximum' of 4 RE_STATUS_T values. */ -Py_LOCAL_INLINE(RE_STATUS_T) max_status_4(RE_STATUS_T w, RE_STATUS_T x, - RE_STATUS_T y, RE_STATUS_T z) { - return max_status_2(max_status_2(w, x), max_status_2(y, z)); -} - -/* Gets a character at a position assuming 1 byte per character. */ -static Py_UCS4 bytes1_char_at(void* text, Py_ssize_t pos) { - return *((Py_UCS1*)text + pos); -} - -/* Sets a character at a position assuming 1 byte per character. */ -static void bytes1_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { - *((Py_UCS1*)text + pos) = (Py_UCS1)ch; -} - -/* Gets a pointer to a position assuming 1 byte per character. */ -static void* bytes1_point_to(void* text, Py_ssize_t pos) { - return (Py_UCS1*)text + pos; -} - -/* Gets a character at a position assuming 2 bytes per character. */ -static Py_UCS4 bytes2_char_at(void* text, Py_ssize_t pos) { - return *((Py_UCS2*)text + pos); -} - -/* Sets a character at a position assuming 2 bytes per character. */ -static void bytes2_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { - *((Py_UCS2*)text + pos) = (Py_UCS2)ch; -} - -/* Gets a pointer to a position assuming 2 bytes per character. */ -static void* bytes2_point_to(void* text, Py_ssize_t pos) { - return (Py_UCS2*)text + pos; -} - -/* Gets a character at a position assuming 4 bytes per character. */ -static Py_UCS4 bytes4_char_at(void* text, Py_ssize_t pos) { - return *((Py_UCS4*)text + pos); -} - -/* Sets a character at a position assuming 4 bytes per character. */ -static void bytes4_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { - *((Py_UCS4*)text + pos) = (Py_UCS4)ch; -} - -/* Gets a pointer to a position assuming 4 bytes per character. */ -static void* bytes4_point_to(void* text, Py_ssize_t pos) { - return (Py_UCS4*)text + pos; -} - -/* Default for whether a position is on a word boundary. */ -static BOOL at_boundary_always(RE_State* state, Py_ssize_t text_pos) { - return TRUE; -} - -/* Converts a BOOL to success/failure. */ -Py_LOCAL_INLINE(int) bool_as_status(BOOL value) { - return value ? RE_ERROR_SUCCESS : RE_ERROR_FAILURE; -} - -/* ASCII-specific. */ - -Py_LOCAL_INLINE(BOOL) unicode_has_property(RE_CODE property, Py_UCS4 ch); - -/* Checks whether a character has a property. */ -Py_LOCAL_INLINE(BOOL) ascii_has_property(RE_CODE property, Py_UCS4 ch) { - if (ch > RE_ASCII_MAX) { - /* Outside the ASCII range. */ - RE_UINT32 value; - - value = property & 0xFFFF; - - return value == 0; - } - - return unicode_has_property(property, ch); -} - -/* Wrapper for calling 'ascii_has_property' via a pointer. */ -static BOOL ascii_has_property_wrapper(RE_LocaleInfo* locale_info, RE_CODE - property, Py_UCS4 ch) { - return ascii_has_property(property, ch); -} - -/* Checks whether there's a word character to the left. */ -Py_LOCAL_INLINE(BOOL) ascii_word_left(RE_State* state, Py_ssize_t text_pos) { - return text_pos > 0 && ascii_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); -} - -/* Checks whether there's a word character to the right. */ -Py_LOCAL_INLINE(BOOL) ascii_word_right(RE_State* state, Py_ssize_t text_pos) { - return text_pos < state->text_length && ascii_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); -} - -/* Checks whether a position is on a word boundary. */ -static BOOL ascii_at_boundary(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = ascii_word_left(state, text_pos); - right = ascii_word_right(state, text_pos); - - return left != right; -} - -/* Checks whether a position is at the start of a word. */ -static BOOL ascii_at_word_start(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = ascii_word_left(state, text_pos); - right = ascii_word_right(state, text_pos); - - return !left && right; -} - -/* Checks whether a position is at the end of a word. */ -static BOOL ascii_at_word_end(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = ascii_word_left(state, text_pos); - right = ascii_word_right(state, text_pos); - - return left && !right; -} - -/* Checks whether a character is a line separator. */ -static BOOL ascii_is_line_sep(Py_UCS4 ch) { - return 0x0A <= ch && ch <= 0x0D; -} - -/* Checks whether a position is at the start of a line. */ -static BOOL ascii_at_line_start(RE_State* state, Py_ssize_t text_pos) { - Py_UCS4 ch; - - if (text_pos <= 0) - return TRUE; - - ch = state->char_at(state->text, text_pos - 1); - - if (ch == 0x0D) { - if (text_pos >= state->text_length) - return TRUE; - - /* No line break inside CRLF. */ - return state->char_at(state->text, text_pos) != 0x0A; - } - - return 0x0A <= ch && ch <= 0x0D; -} - -/* Checks whether a position is at the end of a line. */ -static BOOL ascii_at_line_end(RE_State* state, Py_ssize_t text_pos) { - Py_UCS4 ch; - - if (text_pos >= state->text_length) - return TRUE; - - ch = state->char_at(state->text, text_pos); - - if (ch == 0x0A) { - if (text_pos <= 0) - return TRUE; - - /* No line break inside CRLF. */ - return state->char_at(state->text, text_pos - 1) != 0x0D; - } - - return 0x0A <= ch && ch <= 0x0D; -} - -/* Checks whether a character could be Turkic (variants of I/i). For ASCII, it - * won't be. - */ -static BOOL ascii_possible_turkic(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return FALSE; -} - -/* Gets all the cases of a character. */ -static int ascii_all_cases(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - codepoints) { - int count; - - count = 0; - - codepoints[count++] = ch; - - if (('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z')) - /* It's a letter, so add the other case. */ - codepoints[count++] = ch ^ 0x20; - - return count; -} - -/* Returns a character with its case folded. */ -static Py_UCS4 ascii_simple_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - if ('A' <= ch && ch <= 'Z') - /* Uppercase folds to lowercase. */ - return ch ^ 0x20; - - return ch; -} - -/* Returns a character with its case folded. */ -static int ascii_full_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch, - Py_UCS4* folded) { - if ('A' <= ch && ch <= 'Z') - /* Uppercase folds to lowercase. */ - folded[0] = ch ^ 0x20; - else - folded[0] = ch; - - return 1; -} - -/* Gets all the case variants of Turkic 'I'. The given character will be listed - * first. - */ -static int ascii_all_turkic_i(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - cases) { - int count; - - count = 0; - - cases[count++] = ch; - - if (ch != 'I') - cases[count++] = 'I'; - - if (ch != 'i') - cases[count++] = 'i'; - - return count; -} - -/* The handlers for ASCII characters. */ -static RE_EncodingTable ascii_encoding = { - ascii_has_property_wrapper, - ascii_at_boundary, - ascii_at_word_start, - ascii_at_word_end, - ascii_at_boundary, /* No special "default word boundary" for ASCII. */ - ascii_at_word_start, /* No special "default start of word" for ASCII. */ - ascii_at_word_end, /* No special "default end of a word" for ASCII. */ - at_boundary_always, /* No special "grapheme boundary" for ASCII. */ - ascii_is_line_sep, - ascii_at_line_start, - ascii_at_line_end, - ascii_possible_turkic, - ascii_all_cases, - ascii_simple_case_fold, - ascii_full_case_fold, - ascii_all_turkic_i, -}; - -/* Locale-specific. */ - -/* Checks whether a character has the 'alnum' property in the given locale. */ -Py_LOCAL_INLINE(BOOL) locale_isalnum(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & - RE_LOCALE_ALNUM) != 0; -} - -/* Checks whether a character has the 'alpha' property in the given locale. */ -Py_LOCAL_INLINE(BOOL) locale_isalpha(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & - RE_LOCALE_ALPHA) != 0; -} - -/* Checks whether a character has the 'cntrl' property in the given locale. */ -Py_LOCAL_INLINE(BOOL) locale_iscntrl(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & - RE_LOCALE_CNTRL) != 0; -} - -/* Checks whether a character has the 'digit' property in the given locale. */ -Py_LOCAL_INLINE(BOOL) locale_isdigit(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & - RE_LOCALE_DIGIT) != 0; -} - -/* Checks whether a character has the 'graph' property in the given locale. */ -Py_LOCAL_INLINE(BOOL) locale_isgraph(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & - RE_LOCALE_GRAPH) != 0; -} - -/* Checks whether a character has the 'lower' property in the given locale. */ -Py_LOCAL_INLINE(BOOL) locale_islower(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & - RE_LOCALE_LOWER) != 0; -} - -/* Checks whether a character has the 'print' property in the given locale. */ -Py_LOCAL_INLINE(BOOL) locale_isprint(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & - RE_LOCALE_PRINT) != 0; -} - -/* Checks whether a character has the 'punct' property in the given locale. */ -Py_LOCAL_INLINE(BOOL) locale_ispunct(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & - RE_LOCALE_PUNCT) != 0; -} - -/* Checks whether a character has the 'space' property in the given locale. */ -Py_LOCAL_INLINE(BOOL) locale_isspace(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & - RE_LOCALE_SPACE) != 0; -} - -/* Checks whether a character has the 'upper' property in the given locale. */ -Py_LOCAL_INLINE(BOOL) locale_isupper(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & - RE_LOCALE_UPPER) != 0; -} - -/* Converts a character to lowercase in the given locale. */ -Py_LOCAL_INLINE(Py_UCS4) locale_tolower(RE_LocaleInfo* locale_info, Py_UCS4 ch) - { - return ch <= RE_LOCALE_MAX ? locale_info->lowercase[ch] : ch; -} - -/* Converts a character to uppercase in the given locale. */ -Py_LOCAL_INLINE(Py_UCS4) locale_toupper(RE_LocaleInfo* locale_info, Py_UCS4 ch) - { - return ch <= RE_LOCALE_MAX ? locale_info->uppercase[ch] : ch; -} - -/* Checks whether a character has a property. */ -Py_LOCAL_INLINE(BOOL) locale_has_property(RE_LocaleInfo* locale_info, RE_CODE - property, Py_UCS4 ch) { - RE_UINT32 value; - RE_UINT32 v; - - value = property & 0xFFFF; - - if (ch > RE_LOCALE_MAX) - /* Outside the locale range. */ - return value == 0; - - switch (property >> 16) { - case RE_PROP_ALNUM >> 16: - v = locale_isalnum(locale_info, ch) != 0; - break; - case RE_PROP_ALPHA >> 16: - v = locale_isalpha(locale_info, ch) != 0; - break; - case RE_PROP_ANY >> 16: - v = 1; - break; - case RE_PROP_ASCII >> 16: - v = ch <= RE_ASCII_MAX; - break; - case RE_PROP_BLANK >> 16: - v = ch == '\t' || ch == ' '; - break; - case RE_PROP_GC: - switch (property) { - case RE_PROP_ASSIGNED: - v = ch <= RE_LOCALE_MAX; - break; - case RE_PROP_CASEDLETTER: - v = locale_isalpha(locale_info, ch) ? value : 0xFFFF; - break; - case RE_PROP_CNTRL: - v = locale_iscntrl(locale_info, ch) ? value : 0xFFFF; - break; - case RE_PROP_DIGIT: - v = locale_isdigit(locale_info, ch) ? value : 0xFFFF; - break; - case RE_PROP_GC_CN: - v = ch > RE_LOCALE_MAX; - break; - case RE_PROP_GC_LL: - v = locale_islower(locale_info, ch) ? value : 0xFFFF; - break; - case RE_PROP_GC_LU: - v = locale_isupper(locale_info, ch) ? value : 0xFFFF; - break; - case RE_PROP_GC_P: - v = locale_ispunct(locale_info, ch) ? value : 0xFFFF; - break; - default: - v = 0xFFFF; - break; - } - break; - case RE_PROP_GRAPH >> 16: - v = locale_isgraph(locale_info, ch) != 0; - break; - case RE_PROP_LOWER >> 16: - v = locale_islower(locale_info, ch) != 0; - break; - case RE_PROP_POSIX_ALNUM >> 16: - v = re_get_posix_alnum(ch) != 0; - break; - case RE_PROP_POSIX_DIGIT >> 16: - v = re_get_posix_digit(ch) != 0; - break; - case RE_PROP_POSIX_PUNCT >> 16: - v = re_get_posix_punct(ch) != 0; - break; - case RE_PROP_POSIX_XDIGIT >> 16: - v = re_get_posix_xdigit(ch) != 0; - break; - case RE_PROP_PRINT >> 16: - v = locale_isprint(locale_info, ch) != 0; - break; - case RE_PROP_SPACE >> 16: - v = locale_isspace(locale_info, ch) != 0; - break; - case RE_PROP_UPPER >> 16: - v = locale_isupper(locale_info, ch) != 0; - break; - case RE_PROP_WORD >> 16: - v = ch == '_' || locale_isalnum(locale_info, ch) != 0; - break; - case RE_PROP_XDIGIT >> 16: - v = re_get_hex_digit(ch) != 0; - break; - default: - v = 0; - break; - } - - return v == value; -} - -/* Wrapper for calling 'locale_has_property' via a pointer. */ -static BOOL locale_has_property_wrapper(RE_LocaleInfo* locale_info, RE_CODE - property, Py_UCS4 ch) { - return locale_has_property(locale_info, property, ch); -} - -/* Checks whether there's a word character to the left. */ -Py_LOCAL_INLINE(BOOL) locale_word_left(RE_State* state, Py_ssize_t text_pos) { - return text_pos > 0 && locale_has_property(state->locale_info, - RE_PROP_WORD, state->char_at(state->text, text_pos - 1)); -} - -/* Checks whether there's a word character to the right. */ -Py_LOCAL_INLINE(BOOL) locale_word_right(RE_State* state, Py_ssize_t text_pos) { - return text_pos < state->text_length && - locale_has_property(state->locale_info, RE_PROP_WORD, - state->char_at(state->text, text_pos)); -} - -/* Checks whether a position is on a word boundary. */ -static BOOL locale_at_boundary(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = locale_word_left(state, text_pos); - right = locale_word_right(state, text_pos); - - return left != right; -} - -/* Checks whether a position is at the start of a word. */ -static BOOL locale_at_word_start(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = locale_word_left(state, text_pos); - right = locale_word_right(state, text_pos); - - return !left && right; -} - -/* Checks whether a position is at the end of a word. */ -static BOOL locale_at_word_end(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = locale_word_left(state, text_pos); - right = locale_word_right(state, text_pos); - - return left && !right; -} - -/* Checks whether a character could be Turkic (variants of I/i). */ -static BOOL locale_possible_turkic(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return locale_toupper(locale_info, ch) == 'I' || - locale_tolower(locale_info, ch) == 'i'; -} - -/* Gets all the cases of a character. */ -static int locale_all_cases(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - codepoints) { - int count; - Py_UCS4 other; - - count = 0; - - codepoints[count++] = ch; - - other = locale_toupper(locale_info, ch); - if (other != ch) - codepoints[count++] = other; - - other = locale_tolower(locale_info, ch); - if (other != ch) - codepoints[count++] = other; - - return count; -} - -/* Returns a character with its case folded. */ -static Py_UCS4 locale_simple_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch) - { - return locale_tolower(locale_info, ch); -} - -/* Returns a character with its case folded. */ -static int locale_full_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch, - Py_UCS4* folded) { - folded[0] = locale_tolower(locale_info, ch); - - return 1; -} - -/* Gets all the case variants of Turkic 'I'. The given character will be listed - * first. - */ -static int locale_all_turkic_i(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - cases) { - int count; - Py_UCS4 other; - - count = 0; - - cases[count++] = ch; - - if (ch != 'I') - cases[count++] = 'I'; - - if (ch != 'i') - cases[count++] = 'i'; - - /* Uppercase 'i' will be either dotted (Turkic) or dotless (non-Turkic). */ - other = locale_toupper(locale_info, 'i'); - if (other != ch && other != 'I') - cases[count++] = other; - - /* Lowercase 'I' will be either dotless (Turkic) or dotted (non-Turkic). */ - other = locale_tolower(locale_info, 'I'); - if (other != ch && other != 'i') - cases[count++] = other; - - return count; -} - -/* The handlers for locale characters. */ -static RE_EncodingTable locale_encoding = { - locale_has_property_wrapper, - locale_at_boundary, - locale_at_word_start, - locale_at_word_end, - locale_at_boundary, /* No special "default word boundary" for locale. */ - locale_at_word_start, /* No special "default start of a word" for locale. */ - locale_at_word_end, /* No special "default end of a word" for locale. */ - at_boundary_always, /* No special "grapheme boundary" for locale. */ - ascii_is_line_sep, /* Assume locale line separators are same as ASCII. */ - ascii_at_line_start, /* Assume locale line separators are same as ASCII. */ - ascii_at_line_end, /* Assume locale line separators are same as ASCII. */ - locale_possible_turkic, - locale_all_cases, - locale_simple_case_fold, - locale_full_case_fold, - locale_all_turkic_i, -}; - -/* Unicode-specific. */ - -/* Checks whether a Unicode character has a property. */ -Py_LOCAL_INLINE(BOOL) unicode_has_property(RE_CODE property, Py_UCS4 ch) { - RE_UINT32 prop; - RE_UINT32 value; - RE_UINT32 v; - - prop = property >> 16; - if (prop >= sizeof(re_get_property) / sizeof(re_get_property[0])) - return FALSE; - - value = property & 0xFFFF; - v = re_get_property[prop](ch); - - if (v == value) - return TRUE; - - if (prop == RE_PROP_GC) { - switch (value) { - case RE_PROP_ASSIGNED: - return v != RE_PROP_CN; - case RE_PROP_C: - return (RE_PROP_C_MASK & (1 << v)) != 0; - case RE_PROP_CASEDLETTER: - return v == RE_PROP_LU || v == RE_PROP_LL || v == RE_PROP_LT; - case RE_PROP_L: - return (RE_PROP_L_MASK & (1 << v)) != 0; - case RE_PROP_M: - return (RE_PROP_M_MASK & (1 << v)) != 0; - case RE_PROP_N: - return (RE_PROP_N_MASK & (1 << v)) != 0; - case RE_PROP_P: - return (RE_PROP_P_MASK & (1 << v)) != 0; - case RE_PROP_S: - return (RE_PROP_S_MASK & (1 << v)) != 0; - case RE_PROP_Z: - return (RE_PROP_Z_MASK & (1 << v)) != 0; - } - } - - return FALSE; -} - -/* Wrapper for calling 'unicode_has_property' via a pointer. */ -static BOOL unicode_has_property_wrapper(RE_LocaleInfo* locale_info, RE_CODE - property, Py_UCS4 ch) { - return unicode_has_property(property, ch); -} - -/* Checks whether there's a word character to the left. */ -Py_LOCAL_INLINE(BOOL) unicode_word_left(RE_State* state, Py_ssize_t text_pos) { - return text_pos > 0 && unicode_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); -} - -/* Checks whether there's a word character to the right. */ -Py_LOCAL_INLINE(BOOL) unicode_word_right(RE_State* state, Py_ssize_t text_pos) - { - return text_pos < state->text_length && unicode_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); -} - -/* Checks whether a position is on a word boundary. */ -static BOOL unicode_at_boundary(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = unicode_word_left(state, text_pos); - right = unicode_word_right(state, text_pos); - - return left != right; -} - -/* Checks whether a position is at the start of a word. */ -static BOOL unicode_at_word_start(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = unicode_word_left(state, text_pos); - right = unicode_word_right(state, text_pos); - - return !left && right; -} - -/* Checks whether a position is at the end of a word. */ -static BOOL unicode_at_word_end(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = unicode_word_left(state, text_pos); - right = unicode_word_right(state, text_pos); - - return left && !right; -} - -/* Checks whether a character is a Unicode vowel. - * - * Only a limited number are treated as vowels. - */ -Py_LOCAL_INLINE(BOOL) is_unicode_vowel(Py_UCS4 ch) { - switch (Py_UNICODE_TOLOWER((Py_UNICODE)ch)) { - case 'a': case 0xE0: case 0xE1: case 0xE2: - case 'e': case 0xE8: case 0xE9: case 0xEA: - case 'i': case 0xEC: case 0xED: case 0xEE: - case 'o': case 0xF2: case 0xF3: case 0xF4: - case 'u': case 0xF9: case 0xFA: case 0xFB: - return TRUE; - default: - return FALSE; - } -} - -/* Checks whether a character is a Unicode apostrophe. - * - * This could be U+0027 (APOSTROPHE) or U+2019 (RIGHT SINGLE QUOTATION MARK / - * curly apostrophe). - */ -static BOOL is_unicode_apostrophe(Py_UCS4 ch) { - return ch == 0x27 || ch == 0x2019; -} - -/* Checks whether a position is on a default word boundary. - * - * The rules are defined here: - * http://www.unicode.org/reports/tr29/#Default_Word_Boundaries - */ -static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - int prop; - int prop_m1; - Py_ssize_t pos_m1; - Py_ssize_t pos_m2; - int prop_m2; - Py_ssize_t pos_p0; - int prop_p0; - Py_ssize_t pos_p1; - int prop_p1; - - /* Break at the start and end of the text, unless the text is empty. */ - if (state->text_length == 0) - return FALSE; - - /* WB1 */ - if (text_pos <= 0) - return TRUE; - - /* WB2 */ - if (text_pos >= state->text_length) - return TRUE; - - char_at = state->char_at; - - prop = (int)re_get_word_break(char_at(state->text, text_pos)); - prop_m1 = (int)re_get_word_break(char_at(state->text, text_pos - 1)); - - /* Don't break within CRLF. */ - /* WB3 */ - if (prop_m1 == RE_BREAK_CR && prop == RE_BREAK_LF) - return FALSE; - - /* Otherwise break before and after Newlines (including CR and LF). */ - /* WB3a */ - if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 == - RE_BREAK_LF) - return TRUE; - - /* WB3b */ - if (prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == RE_BREAK_LF) - return TRUE; - - /* Don't break within emoji zwj sequences. */ - /* WB3c */ - if (prop_m1 == RE_BREAK_ZWJ && (prop == RE_BREAK_GLUEAFTERZWJ || prop == - RE_BREAK_EBASEGAZ)) - return FALSE; - - /* WB4 */ - /* Get the property of the previous character, ignoring Format and Extend - * characters. - */ - pos_m1 = text_pos - 1; - prop_m1 = RE_BREAK_OTHER; - while (pos_m1 >= 0) { - prop_m1 = (int)re_get_word_break(char_at(state->text, pos_m1)); - if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT && prop_m1 - != RE_BREAK_ZWJ) - break; - - --pos_m1; - } - - /* Get the property of the preceding character, ignoring Format and Extend - * characters. - */ - pos_m2 = pos_m1 - 1; - prop_m2 = RE_BREAK_OTHER; - while (pos_m2 >= 0) { - prop_m2 = (int)re_get_word_break(char_at(state->text, pos_m2)); - if (prop_m2 != RE_BREAK_EXTEND && prop_m2 != RE_BREAK_FORMAT && prop_m2 - != RE_BREAK_ZWJ) - break; - - --pos_m2; - } - - /* Get the property of the next character, ignoring Format and Extend - * characters. - */ - pos_p0 = text_pos; - prop_p0 = prop; - while (pos_p0 < state->text_length) { - prop_p0 = (int)re_get_word_break(char_at(state->text, pos_p0)); - if (prop_p0 != RE_BREAK_EXTEND && prop_p0 != RE_BREAK_FORMAT && prop_p0 - != RE_BREAK_ZWJ) - break; - - ++pos_p0; - } - - /* Get the property of the following character, ignoring Format and Extend - * characters. - */ - pos_p1 = pos_p0 + 1; - prop_p1 = RE_BREAK_OTHER; - while (pos_p1 < state->text_length) { - prop_p1 = (int)re_get_word_break(char_at(state->text, pos_p1)); - if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT && prop_p1 - != RE_BREAK_ZWJ) - break; - - ++pos_p1; - } - - /* Don't break between most letters. */ - /* WB5 */ - if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && - (prop_p0 == RE_BREAK_ALETTER || prop_p0 == RE_BREAK_HEBREWLETTER)) - return FALSE; - - /* Break between apostrophe and vowels (French, Italian). */ - /* WB5a */ - if (pos_m1 >= 0 && is_unicode_apostrophe(char_at(state->text, pos_m1)) && - is_unicode_vowel(char_at(state->text, text_pos))) - return TRUE; - - /* Don't break letters across certain punctuation. */ - /* WB6 */ - if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && - (prop_p0 == RE_BREAK_MIDLETTER || prop_p0 == RE_BREAK_MIDNUMLET || - prop_p0 == RE_BREAK_SINGLEQUOTE) && (prop_p1 == RE_BREAK_ALETTER || - prop_p1 == RE_BREAK_HEBREWLETTER)) - return FALSE; - - /* WB7 */ - if ((prop_m2 == RE_BREAK_ALETTER || prop_m2 == RE_BREAK_HEBREWLETTER) && - (prop_m1 == RE_BREAK_MIDLETTER || prop_m1 == RE_BREAK_MIDNUMLET || - prop_m1 == RE_BREAK_SINGLEQUOTE) && (prop_p0 == RE_BREAK_ALETTER || - prop_p0 == RE_BREAK_HEBREWLETTER)) - return FALSE; - - /* WB7a */ - if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_SINGLEQUOTE) - return FALSE; - - /* WB7b */ - if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_DOUBLEQUOTE && - prop_p1 == RE_BREAK_HEBREWLETTER) - return FALSE; - - /* WB7c */ - if (prop_m2 == RE_BREAK_HEBREWLETTER && prop_m1 == RE_BREAK_DOUBLEQUOTE && - prop_p0 == RE_BREAK_HEBREWLETTER) - return FALSE; - - /* Don't break within sequences of digits, or digits adjacent to letters - * ("3a", or "A3"). - */ - /* WB8 */ - if (prop_m1 == RE_BREAK_NUMERIC && prop_p0 == RE_BREAK_NUMERIC) - return FALSE; - - /* WB9 */ - if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && - prop_p0 == RE_BREAK_NUMERIC) - return FALSE; - - /* WB10 */ - if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_ALETTER || prop_p0 - == RE_BREAK_HEBREWLETTER)) - return FALSE; - - /* Don't break within sequences, such as "3.2" or "3,456.789". */ - /* WB11 */ - if (prop_m2 == RE_BREAK_NUMERIC && (prop_m1 == RE_BREAK_MIDNUM || prop_m1 - == RE_BREAK_MIDNUMLET || prop_m1 == RE_BREAK_SINGLEQUOTE) && prop_p0 == - RE_BREAK_NUMERIC) - return FALSE; - - /* WB12 */ - if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_MIDNUM || prop_p0 - == RE_BREAK_MIDNUMLET || prop_p0 == RE_BREAK_SINGLEQUOTE) && prop_p1 == - RE_BREAK_NUMERIC) - return FALSE; - - /* Don't break between Katakana. */ - /* WB13 */ - if (prop_m1 == RE_BREAK_KATAKANA && prop_p0 == RE_BREAK_KATAKANA) - return FALSE; - - /* Don't break from extenders. */ - /* WB13a */ - if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER || - prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_KATAKANA || prop_m1 == - RE_BREAK_EXTENDNUMLET) && prop_p0 == RE_BREAK_EXTENDNUMLET) - return FALSE; - - /* WB13b */ - if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop_p0 == RE_BREAK_ALETTER || - prop_p0 == RE_BREAK_HEBREWLETTER || prop_p0 == RE_BREAK_NUMERIC || - prop_p0 == RE_BREAK_KATAKANA)) - return FALSE; - - /* Don't break within emoji modifier sequences. */ - /* WB14 */ - if ((prop_m1 == RE_BREAK_EBASE || prop_m1 == RE_BREAK_EBASEGAZ) && prop_p0 - == RE_BREAK_EMODIFIER) - return FALSE; - - /* Don't break within emoji flag sequences. That is, don't break between - * regional indicator (RI) symbols if there is an odd number of RI - * characters before the break point. - */ - /* WB15 and WB16 */ - prop = (int)re_get_word_break(char_at(state->text, text_pos)); - if (prop == RE_BREAK_REGIONALINDICATOR) { - Py_ssize_t pos; - - pos = text_pos - 1; - while (pos >= 0) { - prop = (int)re_get_word_break(char_at(state->text, pos)); - if (prop != RE_BREAK_REGIONALINDICATOR) - break; - - --pos; - } - ++pos; - - if ((text_pos - pos) % 2 != 0) - return FALSE; - } - - /* Otherwise, break everywhere (including around ideographs). */ - /* WB999 */ - return TRUE; -} - -/* Checks whether a position is at the start/end of a word. */ -Py_LOCAL_INLINE(BOOL) unicode_at_default_word_start_or_end(RE_State* state, - Py_ssize_t text_pos, BOOL at_start) { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - BOOL before; - BOOL after; - Py_UCS4 char_0; - Py_UCS4 char_m1; - int prop; - int prop_m1; - Py_ssize_t pos_m1; - Py_ssize_t pos_p1; - int prop_p1; - Py_UCS4 char_p1; - Py_ssize_t pos_m2; - int prop_m2; - Py_UCS4 char_m2; - - char_at = state->char_at; - - /* At the start or end of the text. */ - if (text_pos <= 0 || text_pos >= state->text_length) { - before = unicode_word_left(state, text_pos); - after = unicode_word_right(state, text_pos); - - return before != at_start && after == at_start; - } - - char_0 = char_at(state->text, text_pos); - char_m1 = char_at(state->text, text_pos - 1); - prop = (int)re_get_word_break(char_0); - prop_m1 = (int)re_get_word_break(char_m1); - - /* No break within CRLF. */ - if (prop_m1 == RE_BREAK_CR && prop == RE_BREAK_LF) - return FALSE; - - /* Break before and after Newlines (including CR and LF). */ - if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 == - RE_BREAK_LF || prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == - RE_BREAK_LF) { - before = unicode_has_property(RE_PROP_WORD, char_m1); - after = unicode_has_property(RE_PROP_WORD, char_0); - - return before != at_start && after == at_start; - } - - /* No break just before Format or Extend characters. */ - if (prop == RE_BREAK_EXTEND || prop == RE_BREAK_FORMAT) - return FALSE; - - /* Get the property of the previous character. */ - pos_m1 = text_pos - 1; - prop_m1 = RE_BREAK_OTHER; - while (pos_m1 >= 0) { - char_m1 = char_at(state->text, pos_m1); - prop_m1 = (int)re_get_word_break(char_m1); - if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) - break; - - --pos_m1; - } - - /* No break between most letters. */ - if (prop_m1 == RE_BREAK_ALETTER && prop == RE_BREAK_ALETTER) - return FALSE; - - if (pos_m1 >= 0 && is_unicode_apostrophe(char_m1) && - is_unicode_vowel(char_0)) - return TRUE; - - pos_p1 = text_pos + 1; - prop_p1 = RE_BREAK_OTHER; - while (pos_p1 < state->text_length) { - char_p1 = char_at(state->text, pos_p1); - prop_p1 = (int)re_get_word_break(char_p1); - if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT) - break; - - ++pos_p1; - } - - /* No break letters across certain punctuation. */ - if (prop_m1 == RE_BREAK_ALETTER && (prop == RE_BREAK_MIDLETTER || prop == - RE_BREAK_MIDNUMLET) && prop_p1 == RE_BREAK_ALETTER) - return FALSE; - - pos_m2 = pos_m1 - 1; - prop_m2 = RE_BREAK_OTHER; - while (pos_m2 >= 0) { - char_m2 = char_at(state->text, pos_m2); - prop_m2 = (int)re_get_word_break(char_m2); - if (prop_m2 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) - break; - - --pos_m2; - } - - if (prop_m2 == RE_BREAK_ALETTER && (prop_m1 == RE_BREAK_MIDLETTER || - prop_m1 == RE_BREAK_MIDNUMLET) && prop == RE_BREAK_ALETTER) - return FALSE; - - /* No break within sequences of digits, or digits adjacent to letters - * ("3a", or "A3"). - */ - if ((prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_ALETTER) && prop == - RE_BREAK_NUMERIC) - return FALSE; - - if (prop_m1 == RE_BREAK_NUMERIC && prop == RE_BREAK_ALETTER) - return FALSE; - - /* No break within sequences, such as "3.2" or "3,456.789". */ - if (prop_m2 == RE_BREAK_NUMERIC && (prop_m1 == RE_BREAK_MIDNUM || prop_m1 - == RE_BREAK_MIDNUMLET) && prop == RE_BREAK_NUMERIC) - return FALSE; - - if (prop_m1 == RE_BREAK_NUMERIC && (prop == RE_BREAK_MIDNUM || prop == - RE_BREAK_MIDNUMLET) && prop_p1 == RE_BREAK_NUMERIC) - return FALSE; - - /* No break between Katakana. */ - if (prop_m1 == RE_BREAK_KATAKANA && prop == RE_BREAK_KATAKANA) - return FALSE; - - /* No break from extenders. */ - if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_NUMERIC || prop_m1 - == RE_BREAK_KATAKANA || prop_m1 == RE_BREAK_EXTENDNUMLET) && prop == - RE_BREAK_EXTENDNUMLET) - return FALSE; - - if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop == RE_BREAK_ALETTER || prop - == RE_BREAK_NUMERIC || prop == RE_BREAK_KATAKANA)) - return FALSE; - - /* Otherwise, break everywhere (including around ideographs). */ - before = unicode_has_property(RE_PROP_WORD, char_m1); - after = unicode_has_property(RE_PROP_WORD, char_0); - - return before != at_start && after == at_start; -} - -/* Checks whether a position is at the start of a word. */ -static BOOL unicode_at_default_word_start(RE_State* state, Py_ssize_t text_pos) - { - return unicode_at_default_word_start_or_end(state, text_pos, TRUE); -} - -/* Checks whether a position is at the end of a word. */ -static BOOL unicode_at_default_word_end(RE_State* state, Py_ssize_t text_pos) { - return unicode_at_default_word_start_or_end(state, text_pos, FALSE); -} - -/* Checks whether a position is on a grapheme boundary. - * - * The rules are defined here: - * http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries - */ -static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos) - { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - int prop; - int prop_m1; - - /* Break at the start and end of text, unless the text is empty. */ - if (state->text_length == 0) - return FALSE; - - /* Break at the start and end of the text. */ - /* GB1 */ - if (text_pos <= 0) - return TRUE; - - /* GB2 */ - if (text_pos >= state->text_length) - return TRUE; - - char_at = state->char_at; - - prop = (int)re_get_grapheme_cluster_break(char_at(state->text, text_pos)); - prop_m1 = (int)re_get_grapheme_cluster_break(char_at(state->text, text_pos - - 1)); - - /* Don't break within CRLF. */ - /* GB3 */ - if (prop_m1 == RE_GBREAK_CR && prop == RE_GBREAK_LF) - return FALSE; - - /* Otherwise break before and after controls (including CR and LF). */ - /* GB4 */ - if (prop_m1 == RE_GBREAK_CONTROL || prop_m1 == RE_GBREAK_CR || prop_m1 == - RE_GBREAK_LF) - return TRUE; - - /* GB5 */ - if (prop == RE_GBREAK_CONTROL || prop == RE_GBREAK_CR || prop == - RE_GBREAK_LF) - return TRUE; - - /* Don't break Hangul syllable sequences. */ - /* GB6 */ - if (prop_m1 == RE_GBREAK_L && (prop == RE_GBREAK_L || prop == RE_GBREAK_V - || prop == RE_GBREAK_LV || prop == RE_GBREAK_LVT)) - return FALSE; - - /* GB7 */ - if ((prop_m1 == RE_GBREAK_LV || prop_m1 == RE_GBREAK_V) && (prop == - RE_GBREAK_V || prop == RE_GBREAK_T)) - return FALSE; - - /* GB8 */ - if ((prop_m1 == RE_GBREAK_LVT || prop_m1 == RE_GBREAK_T) && (prop == - RE_GBREAK_T)) - return FALSE; - - /* Don't break just before Extend characters. */ - /* GB9 */ - if (prop == RE_GBREAK_EXTEND || prop == RE_GBREAK_ZWJ) - return FALSE; - - /* Don't break before SpacingMarks, or after Prepend characters. */ - /* GB9a */ - if (prop == RE_GBREAK_SPACINGMARK) - return FALSE; - - /* GB9b */ - if (prop_m1 == RE_GBREAK_PREPEND) - return FALSE; - - /* Don't break within emoji modifier sequences or emoji zwj sequences. */ - /* GB10 */ - if (prop == RE_GBREAK_EMODIFIER) { - Py_ssize_t pos; - - pos = text_pos - 1; - while (pos >= 0) { - int prev_prop; - - prev_prop = (int)re_get_grapheme_cluster_break(char_at(state->text, - pos)); - if (prev_prop != RE_GBREAK_EXTEND) { - if (prev_prop == RE_GBREAK_EBASE || prev_prop == - RE_GBREAK_EBASEGAZ) - return FALSE; - break; - } - --pos; - } - } - - /* GB11 */ - if (prop_m1 == RE_GBREAK_ZWJ && (prop == RE_GBREAK_GLUEAFTERZWJ || prop == - RE_GBREAK_EBASEGAZ)) - return FALSE; - - /* Don't break within emoji flag sequences. That is, don't break between - * regional indicator (RI) symbols if there is an odd number of RI - * characters before the break point. - */ - /* GB12 and GB13 */ - if (prop == RE_GBREAK_REGIONALINDICATOR) { - Py_ssize_t pos; - - pos = text_pos - 1; - while (pos >= 0) { - prop = (int)re_get_grapheme_cluster_break(char_at(state->text, - pos)); - if (prop != RE_GBREAK_REGIONALINDICATOR) - break; - - --pos; - } - ++pos; - - if ((text_pos - pos) % 2 != 0) - return FALSE; - } - - /* Otherwise, break everywhere. */ - /* GB999 */ - return TRUE; -} - -/* Checks whether a character is a line separator. */ -static BOOL unicode_is_line_sep(Py_UCS4 ch) { - return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == - 0x2029; -} - -/* Checks whether a position is at the start of a line. */ -static BOOL unicode_at_line_start(RE_State* state, Py_ssize_t text_pos) { - Py_UCS4 ch; - - if (text_pos <= 0) - return TRUE; - - ch = state->char_at(state->text, text_pos - 1); - - if (ch == 0x0D) { - if (text_pos >= state->text_length) - return TRUE; - - /* No line break inside CRLF. */ - return state->char_at(state->text, text_pos) != 0x0A; - } - - return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == - 0x2029; -} - -/* Checks whether a position is at the end of a line. */ -static BOOL unicode_at_line_end(RE_State* state, Py_ssize_t text_pos) { - Py_UCS4 ch; - - if (text_pos >= state->text_length) - return TRUE; - - ch = state->char_at(state->text, text_pos); - - if (ch == 0x0A) { - if (text_pos <= 0) - return TRUE; - - /* No line break inside CRLF. */ - return state->char_at(state->text, text_pos - 1) != 0x0D; - } - - return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == - 0x2029; -} - -/* Checks whether a character could be Turkic (variants of I/i). */ -static BOOL unicode_possible_turkic(RE_LocaleInfo* locale_info, Py_UCS4 ch) { - return ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131; -} - -/* Gets all the cases of a character. */ -static int unicode_all_cases(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - codepoints) { - return re_get_all_cases(ch, codepoints); -} - -/* Returns a character with its case folded, unless it could be Turkic - * (variants of I/i). - */ -static Py_UCS4 unicode_simple_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch) - { - /* Is it a possible Turkic character? If so, pass it through unchanged. */ - if (ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131) - return ch; - - return (Py_UCS4)re_get_simple_case_folding(ch); -} - -/* Returns a character with its case folded, unless it could be Turkic - * (variants of I/i). - */ -static int unicode_full_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch, - Py_UCS4* folded) { - /* Is it a possible Turkic character? If so, pass it through unchanged. */ - if (ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131) { - folded[0] = ch; - return 1; - } - - return re_get_full_case_folding(ch, folded); -} - -/* Gets all the case variants of Turkic 'I'. */ -static int unicode_all_turkic_i(RE_LocaleInfo* locale_info, Py_UCS4 ch, - Py_UCS4* cases) { - int count; - - count = 0; - - cases[count++] = ch; - - if (ch != 'I') - cases[count++] = 'I'; - - if (ch != 'i') - cases[count++] = 'i'; - - if (ch != 0x130) - cases[count++] = 0x130; - - if (ch != 0x131) - cases[count++] = 0x131; - - return count; - -} - -/* The handlers for Unicode characters. */ -static RE_EncodingTable unicode_encoding = { - unicode_has_property_wrapper, - unicode_at_boundary, - unicode_at_word_start, - unicode_at_word_end, - unicode_at_default_boundary, - unicode_at_default_word_start, - unicode_at_default_word_end, - unicode_at_grapheme_boundary, - unicode_is_line_sep, - unicode_at_line_start, - unicode_at_line_end, - unicode_possible_turkic, - unicode_all_cases, - unicode_simple_case_fold, - unicode_full_case_fold, - unicode_all_turkic_i, -}; - -Py_LOCAL_INLINE(PyObject*) get_object(char* module_name, char* object_name); - -/* Sets the error message. */ -Py_LOCAL_INLINE(void) set_error(int status, PyObject* object) { - TRACE(("<>\n")) - - PyErr_Clear(); - - if (!error_exception) - error_exception = get_object("_" RE_MODULE "_core", "error"); - - switch (status) { - case RE_ERROR_BACKTRACKING: - PyErr_SetString(error_exception, "too much backtracking"); - break; - case RE_ERROR_CONCURRENT: - PyErr_SetString(PyExc_ValueError, "concurrent not int or None"); - break; - case RE_ERROR_GROUP_INDEX_TYPE: - if (object) - PyErr_Format(PyExc_TypeError, - "group indices must be integers or strings, not %.200s", - object->ob_type->tp_name); - else - PyErr_Format(PyExc_TypeError, - "group indices must be integers or strings"); - break; - case RE_ERROR_ILLEGAL: - PyErr_SetString(PyExc_RuntimeError, "invalid RE code"); - break; - case RE_ERROR_INDEX: - PyErr_SetString(PyExc_TypeError, "string indices must be integers"); - break; - case RE_ERROR_INTERRUPTED: - /* An exception has already been raised, so let it fly. */ - break; - case RE_ERROR_INVALID_GROUP_REF: - PyErr_SetString(error_exception, "invalid group reference"); - break; - case RE_ERROR_MEMORY: - PyErr_NoMemory(); - break; - case RE_ERROR_NOT_STRING: - PyErr_Format(PyExc_TypeError, "expected string instance, %.200s found", - object->ob_type->tp_name); - break; - case RE_ERROR_NOT_UNICODE: - PyErr_Format(PyExc_TypeError, "expected unicode instance, not %.200s", - object->ob_type->tp_name); - break; - case RE_ERROR_NO_SUCH_GROUP: - PyErr_SetString(PyExc_IndexError, "no such group"); - break; - case RE_ERROR_REPLACEMENT: - PyErr_SetString(error_exception, "invalid replacement"); - break; - default: - /* Other error codes indicate compiler/engine bugs. */ - PyErr_SetString(PyExc_RuntimeError, - "internal error in regular expression engine"); - break; - } -} - -/* Allocates memory. - * - * Sets the Python error handler and returns NULL if the allocation fails. - */ -Py_LOCAL_INLINE(void*) re_alloc(size_t size) { - void* new_ptr; - - new_ptr = PyMem_Malloc(size); - if (!new_ptr) - set_error(RE_ERROR_MEMORY, NULL); - - return new_ptr; -} - -/* Reallocates memory. - * - * Sets the Python error handler and returns NULL if the reallocation fails. - */ -Py_LOCAL_INLINE(void*) re_realloc(void* ptr, size_t size) { - void* new_ptr; - - new_ptr = PyMem_Realloc(ptr, size); - if (!new_ptr) - set_error(RE_ERROR_MEMORY, NULL); - - return new_ptr; -} - -/* Deallocates memory. */ -Py_LOCAL_INLINE(void) re_dealloc(void* ptr) { - PyMem_Free(ptr); -} - -/* Releases the GIL if multithreading is enabled. */ -Py_LOCAL_INLINE(void) release_GIL(RE_SafeState* safe_state) { - if (safe_state->re_state->is_multithreaded) - safe_state->thread_state = PyEval_SaveThread(); -} - -/* Acquires the GIL if multithreading is enabled. */ -Py_LOCAL_INLINE(void) acquire_GIL(RE_SafeState* safe_state) { - if (safe_state->re_state->is_multithreaded) - PyEval_RestoreThread(safe_state->thread_state); -} - -/* Allocates memory, holding the GIL during the allocation. - * - * Sets the Python error handler and returns NULL if the allocation fails. - */ -Py_LOCAL_INLINE(void*) safe_alloc(RE_SafeState* safe_state, size_t size) { - void* new_ptr; - - acquire_GIL(safe_state); - - new_ptr = re_alloc(size); - - release_GIL(safe_state); - - return new_ptr; -} - -/* Reallocates memory, holding the GIL during the reallocation. - * - * Sets the Python error handler and returns NULL if the reallocation fails. - */ -Py_LOCAL_INLINE(void*) safe_realloc(RE_SafeState* safe_state, void* ptr, size_t - size) { - void* new_ptr; - - acquire_GIL(safe_state); - - new_ptr = re_realloc(ptr, size); - - release_GIL(safe_state); - - return new_ptr; -} - -/* Deallocates memory, holding the GIL during the deallocation. */ -Py_LOCAL_INLINE(void) safe_dealloc(RE_SafeState* safe_state, void* ptr) { - acquire_GIL(safe_state); - - re_dealloc(ptr); - - release_GIL(safe_state); -} - -/* Checks for KeyboardInterrupt, holding the GIL during the check. */ -Py_LOCAL_INLINE(BOOL) safe_check_signals(RE_SafeState* safe_state) { - BOOL result; - - acquire_GIL(safe_state); - - result = (BOOL)PyErr_CheckSignals(); - - release_GIL(safe_state); - - return result; -} - -/* Checks whether a character is in a range. */ -Py_LOCAL_INLINE(BOOL) in_range(Py_UCS4 lower, Py_UCS4 upper, Py_UCS4 ch) { - return lower <= ch && ch <= upper; -} - -/* Checks whether a character is in a range, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_range_ign(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, Py_UCS4 lower, Py_UCS4 upper, Py_UCS4 ch) { - int count; - Py_UCS4 cases[RE_MAX_CASES]; - int i; - - count = encoding->all_cases(locale_info, ch, cases); - - for (i = 0; i < count; i++) { - if (in_range(lower, upper, cases[i])) - return TRUE; - } - - return FALSE; -} - -/* Checks whether 2 characters are the same. */ -Py_LOCAL_INLINE(BOOL) same_char(Py_UCS4 ch1, Py_UCS4 ch2) { - return ch1 == ch2; -} - -/* Wrapper for calling 'same_char' via a pointer. */ -static BOOL same_char_wrapper(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, Py_UCS4 ch1, Py_UCS4 ch2) { - return same_char(ch1, ch2); -} - -/* Checks whether 2 characters are the same, ignoring case. */ -Py_LOCAL_INLINE(BOOL) same_char_ign(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, Py_UCS4 ch1, Py_UCS4 ch2) { - int count; - Py_UCS4 cases[RE_MAX_CASES]; - int i; - - if (ch1 == ch2) - return TRUE; - - count = encoding->all_cases(locale_info, ch1, cases); - - for (i = 1; i < count; i++) { - if (cases[i] == ch2) - return TRUE; - } - - return FALSE; -} - -/* Wrapper for calling 'same_char' via a pointer. */ -static BOOL same_char_ign_wrapper(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, Py_UCS4 ch1, Py_UCS4 ch2) { - return same_char_ign(encoding, locale_info, ch1, ch2); -} - -/* Checks whether a character is anything except a newline. */ -Py_LOCAL_INLINE(BOOL) matches_ANY(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { - return ch != '\n'; -} - -/* Checks whether a character is anything except a line separator. */ -Py_LOCAL_INLINE(BOOL) matches_ANY_U(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { - return !encoding->is_line_sep(ch); -} - -/* Checks whether 2 characters are the same. */ -Py_LOCAL_INLINE(BOOL) matches_CHARACTER(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { - return same_char(node->values[0], ch); -} - -/* Checks whether 2 characters are the same, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_CHARACTER_IGN(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { - return same_char_ign(encoding, locale_info, node->values[0], ch); -} - -/* Checks whether a character has a property. */ -Py_LOCAL_INLINE(BOOL) matches_PROPERTY(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { - return encoding->has_property(locale_info, node->values[0], ch); -} - -/* Checks whether a character has a property, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_PROPERTY_IGN(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { - RE_UINT32 property; - RE_UINT32 prop; - - property = node->values[0]; - prop = property >> 16; - - /* We need to do special handling of case-sensitive properties according to - * the 'encoding'. - */ - if (encoding == &unicode_encoding) { - /* We are working with Unicode. */ - if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property - == RE_PROP_GC_LT) { - RE_UINT32 value; - - value = re_get_general_category(ch); - - return value == RE_PROP_LU || value == RE_PROP_LL || value == - RE_PROP_LT; - } else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) - return (BOOL)re_get_cased(ch); - - /* The property is case-insensitive. */ - return unicode_has_property(property, ch); - } else if (encoding == &ascii_encoding) { - /* We are working with ASCII. */ - if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property - == RE_PROP_GC_LT) { - RE_UINT32 value; - - value = re_get_general_category(ch); - - return value == RE_PROP_LU || value == RE_PROP_LL || value == - RE_PROP_LT; - } else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) - return (BOOL)re_get_cased(ch); - - /* The property is case-insensitive. */ - return ascii_has_property(property, ch); - } else { - /* We are working with Locale. */ - if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property - == RE_PROP_GC_LT) - return locale_isupper(locale_info, ch) || - locale_islower(locale_info, ch); - else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) - return locale_isupper(locale_info, ch) || - locale_islower(locale_info, ch); - - /* The property is case-insensitive. */ - return locale_has_property(locale_info, property, ch); - } -} - -/* Checks whether a character is in a range. */ -Py_LOCAL_INLINE(BOOL) matches_RANGE(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, RE_Node* node, Py_UCS4 ch) { - return in_range(node->values[0], node->values[1], ch); -} - -/* Checks whether a character is in a range, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_RANGE_IGN(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { - return in_range_ign(encoding, locale_info, node->values[0], - node->values[1], ch); -} - -Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, RE_Node* node, Py_UCS4 ch); -Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, RE_Node* node, Py_UCS4 ch); -Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch); -Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, RE_Node* node, Py_UCS4 ch); - -/* Checks whether a character matches a set member. */ -Py_LOCAL_INLINE(BOOL) matches_member(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, RE_Node* member, Py_UCS4 ch) { - switch (member->op) { - case RE_OP_CHARACTER: - /* values are: char_code */ - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->values[0])) - return ch == member->values[0]; - case RE_OP_PROPERTY: - /* values are: property */ - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->values[0])) - return encoding->has_property(locale_info, member->values[0], ch); - case RE_OP_RANGE: - /* values are: lower, upper */ - TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match, - member->values[0], member->values[1])) - return in_range(member->values[0], member->values[1], ch); - case RE_OP_SET_DIFF: - TRACE(("%s\n", re_op_text[member->op])) - return in_set_diff(encoding, locale_info, member, ch); - case RE_OP_SET_INTER: - TRACE(("%s\n", re_op_text[member->op])) - return in_set_inter(encoding, locale_info, member, ch); - case RE_OP_SET_SYM_DIFF: - TRACE(("%s\n", re_op_text[member->op])) - return in_set_sym_diff(encoding, locale_info, member, ch); - case RE_OP_SET_UNION: - TRACE(("%s\n", re_op_text[member->op])) - return in_set_union(encoding, locale_info, member, ch); - case RE_OP_STRING: - { - /* values are: char_code, char_code, ... */ - size_t i; - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->value_count)) - - for (i = 0; i < member->value_count; i++) { - if (ch == member->values[i]) - return TRUE; - } - return FALSE; - } - default: - return FALSE; - } -} - -/* Checks whether a character matches a set member, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_member_ign(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* member, int case_count, Py_UCS4* cases) - { - int i; - - for (i = 0; i < case_count; i++) { - switch (member->op) { - case RE_OP_CHARACTER: - /* values are: char_code */ - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->values[0])) - if (cases[i] == member->values[0]) - return TRUE; - break; - case RE_OP_PROPERTY: - /* values are: property */ - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->values[0])) - if (encoding->has_property(locale_info, member->values[0], - cases[i])) - return TRUE; - break; - case RE_OP_RANGE: - /* values are: lower, upper */ - TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match, - member->values[0], member->values[1])) - if (in_range(member->values[0], member->values[1], cases[i])) - return TRUE; - break; - case RE_OP_SET_DIFF: - TRACE(("%s\n", re_op_text[member->op])) - if (in_set_diff(encoding, locale_info, member, cases[i])) - return TRUE; - break; - case RE_OP_SET_INTER: - TRACE(("%s\n", re_op_text[member->op])) - if (in_set_inter(encoding, locale_info, member, cases[i])) - return TRUE; - break; - case RE_OP_SET_SYM_DIFF: - TRACE(("%s\n", re_op_text[member->op])) - if (in_set_sym_diff(encoding, locale_info, member, cases[i])) - return TRUE; - break; - case RE_OP_SET_UNION: - TRACE(("%s\n", re_op_text[member->op])) - if (in_set_union(encoding, locale_info, member, cases[i])) - return TRUE; - break; - case RE_OP_STRING: - { - size_t j; - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->value_count)) - - for (j = 0; j < member->value_count; j++) { - if (cases[i] == member->values[j]) - return TRUE; - } - break; - } - default: - return TRUE; - } - } - - return FALSE; -} - -/* Checks whether a character is in a set difference. */ -Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, RE_Node* node, Py_UCS4 ch) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - if (matches_member(encoding, locale_info, member, ch) != member->match) - return FALSE; - - member = member->next_1.node; - - while (member) { - if (matches_member(encoding, locale_info, member, ch) == member->match) - return FALSE; - - member = member->next_1.node; - } - - return TRUE; -} - -/* Checks whether a character is in a set difference, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_diff_ign(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, int case_count, Py_UCS4* cases) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - if (matches_member_ign(encoding, locale_info, member, case_count, cases) != - member->match) - return FALSE; - - member = member->next_1.node; - - while (member) { - if (matches_member_ign(encoding, locale_info, member, case_count, - cases) == member->match) - return FALSE; - - member = member->next_1.node; - } - - return TRUE; -} - -/* Checks whether a character is in a set intersection. */ -Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, RE_Node* node, Py_UCS4 ch) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - while (member) { - if (matches_member(encoding, locale_info, member, ch) != member->match) - return FALSE; - - member = member->next_1.node; - } - - return TRUE; -} - -/* Checks whether a character is in a set intersection, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_inter_ign(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, int case_count, Py_UCS4* cases) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - while (member) { - if (matches_member_ign(encoding, locale_info, member, case_count, - cases) != member->match) - return FALSE; - - member = member->next_1.node; - } - - return TRUE; -} - -/* Checks whether a character is in a set symmetric difference. */ -Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { - RE_Node* member; - BOOL result; - - member = node->nonstring.next_2.node; - - result = FALSE; - - while (member) { - if (matches_member(encoding, locale_info, member, ch) == member->match) - result = !result; - - member = member->next_1.node; - } - - return result; -} - -/* Checks whether a character is in a set symmetric difference, ignoring case. - */ -Py_LOCAL_INLINE(BOOL) in_set_sym_diff_ign(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, int case_count, Py_UCS4* cases) { - RE_Node* member; - BOOL result; - - member = node->nonstring.next_2.node; - - result = FALSE; - - while (member) { - if (matches_member_ign(encoding, locale_info, member, case_count, - cases) == member->match) - result = !result; - - member = member->next_1.node; - } - - return result; -} - -/* Checks whether a character is in a set union. */ -Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, RE_Node* node, Py_UCS4 ch) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - while (member) { - if (matches_member(encoding, locale_info, member, ch) == member->match) - return TRUE; - - member = member->next_1.node; - } - - return FALSE; -} - -/* Checks whether a character is in a set union, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_union_ign(RE_EncodingTable* encoding, - RE_LocaleInfo* locale_info, RE_Node* node, int case_count, Py_UCS4* cases) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - while (member) { - if (matches_member_ign(encoding, locale_info, member, case_count, - cases) == member->match) - return TRUE; - - member = member->next_1.node; - } - - return FALSE; -} - -/* Checks whether a character is in a set. */ -Py_LOCAL_INLINE(BOOL) matches_SET(RE_EncodingTable* encoding, -RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { - switch (node->op) { - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_REV: - return in_set_diff(encoding, locale_info, node, ch); - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_REV: - return in_set_inter(encoding, locale_info, node, ch); - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_REV: - return in_set_sym_diff(encoding, locale_info, node, ch); - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_REV: - return in_set_union(encoding, locale_info, node, ch); - } - - return FALSE; -} - -/* Checks whether a character is in a set, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_SET_IGN(RE_EncodingTable* encoding, -RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - case_count = encoding->all_cases(locale_info, ch, cases); - - switch (node->op) { - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_DIFF_IGN_REV: - return in_set_diff_ign(encoding, locale_info, node, case_count, cases); - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_INTER_IGN_REV: - return in_set_inter_ign(encoding, locale_info, node, case_count, - cases); - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_SYM_DIFF_IGN_REV: - return in_set_sym_diff_ign(encoding, locale_info, node, case_count, - cases); - case RE_OP_SET_UNION_IGN: - case RE_OP_SET_UNION_IGN_REV: - return in_set_union_ign(encoding, locale_info, node, case_count, - cases); - } - - return FALSE; -} - -/* Resets a guard list. */ -Py_LOCAL_INLINE(void) reset_guard_list(RE_GuardList* guard_list) { - guard_list->count = 0; - guard_list->last_text_pos = -1; -} - -/* Clears the groups. */ -Py_LOCAL_INLINE(void) clear_groups(RE_State* state) { - size_t i; - - for (i = 0; i < state->pattern->true_group_count; i++) { - RE_GroupData* group; - - group = &state->groups[i]; - group->span.start = -1; - group->span.end = -1; - group->capture_count = 0; - group->current_capture = -1; - } -} - -/* Resets the various guards. */ -Py_LOCAL_INLINE(void) reset_guards(RE_State* state) { - size_t i; - - /* Reset the guards for the repeats. */ - for (i = 0; i < state->pattern->repeat_count; i++) { - reset_guard_list(&state->repeats[i].body_guard_list); - reset_guard_list(&state->repeats[i].tail_guard_list); - } - - /* Reset the guards for the fuzzy sections. */ - for (i = 0; i < state->pattern->fuzzy_count; i++) { - reset_guard_list(&state->fuzzy_guards[i].body_guard_list); - reset_guard_list(&state->fuzzy_guards[i].tail_guard_list); - } - - /* Reset the guards for the group calls. */ - for (i = 0; i < state->pattern->call_ref_info_count; i++) - reset_guard_list(&state->group_call_guard_list[i]); -} - -/* Initialises the state for a match. */ -Py_LOCAL_INLINE(void) init_match(RE_State* state) { - RE_AtomicBlock* current; - - /* Reset the backtrack. */ - state->current_backtrack_block = &state->backtrack_block; - state->current_backtrack_block->count = 0; - state->current_saved_groups = state->first_saved_groups; - state->backtrack = NULL; - state->search_anchor = state->text_pos; - state->match_pos = state->text_pos; - - /* Reset the atomic stack. */ - current = state->current_atomic_block; - if (current) { - while (current->previous) - current = current->previous; - - state->current_atomic_block = current; - state->current_atomic_block->count = 0; - } - - /* Clear the groups. */ - clear_groups(state); - - /* Reset the guards. */ - reset_guards(state); - - /* Clear the counts and cost for matching. */ - if (state->pattern->is_fuzzy) { - memset(state->fuzzy_info.counts, 0, sizeof(state->fuzzy_info.counts)); - memset(state->total_fuzzy_counts, 0, - sizeof(state->total_fuzzy_counts)); - } - - state->fuzzy_info.total_cost = 0; - state->total_errors = 0; - state->too_few_errors = FALSE; - state->found_match = FALSE; - state->capture_change = 0; - state->iterations = 0; -} - -/* Adds a new backtrack entry. */ -Py_LOCAL_INLINE(BOOL) add_backtrack(RE_SafeState* safe_state, RE_UINT8 op) { - RE_State* state; - RE_BacktrackBlock* current; - - state = safe_state->re_state; - - current = state->current_backtrack_block; - if (current->count >= current->capacity) { - if (!current->next) { - RE_BacktrackBlock* next; - - /* Is there too much backtracking? */ - if (state->backtrack_allocated >= RE_MAX_BACKTRACK_ALLOC) - return FALSE; - - next = (RE_BacktrackBlock*)safe_alloc(safe_state, - sizeof(RE_BacktrackBlock)); - if (!next) - return FALSE; - - next->previous = current; - next->next = NULL; - next->capacity = RE_BACKTRACK_BLOCK_SIZE; - current->next = next; - - state->backtrack_allocated += RE_BACKTRACK_BLOCK_SIZE; - } - - current = current->next; - current->count = 0; - state->current_backtrack_block = current; - } - - state->backtrack = ¤t->items[current->count++]; - state->backtrack->op = op; - - return TRUE; -} - -/* Gets the last backtrack entry. - * - * It'll never be called when there are _no_ entries. - */ -Py_LOCAL_INLINE(RE_BacktrackData*) last_backtrack(RE_State* state) { - RE_BacktrackBlock* current; - - current = state->current_backtrack_block; - state->backtrack = ¤t->items[current->count - 1]; - - return state->backtrack; -} - -/* Discards the last backtrack entry. - * - * It'll never be called to discard the _only_ entry. - */ -Py_LOCAL_INLINE(void) discard_backtrack(RE_State* state) { - RE_BacktrackBlock* current; - - current = state->current_backtrack_block; - --current->count; - if (current->count == 0 && current->previous) - state->current_backtrack_block = current->previous; -} - -/* Pushes a new empty entry onto the atomic stack. */ -Py_LOCAL_INLINE(RE_AtomicData*) push_atomic(RE_SafeState* safe_state) { - RE_State* state; - RE_AtomicBlock* current; - - state = safe_state->re_state; - - current = state->current_atomic_block; - if (!current || current->count >= current->capacity) { - /* The current block is full. */ - if (current && current->next) - /* Advance to the next block. */ - current = current->next; - else { - /* Add a new block. */ - RE_AtomicBlock* next; - - next = (RE_AtomicBlock*)safe_alloc(safe_state, - sizeof(RE_AtomicBlock)); - if (!next) - return NULL; - - next->previous = current; - next->next = NULL; - next->capacity = RE_ATOMIC_BLOCK_SIZE; - - current = next; - } - - current->count = 0; - state->current_atomic_block = current; - } - - return ¤t->items[current->count++]; -} - -/* Pops the top entry from the atomic stack. */ -Py_LOCAL_INLINE(RE_AtomicData*) pop_atomic(RE_SafeState* safe_state) { - RE_State* state; - RE_AtomicBlock* current; - RE_AtomicData* atomic; - - state = safe_state->re_state; - - current = state->current_atomic_block; - atomic = ¤t->items[--current->count]; - if (current->count == 0 && current->previous) - state->current_atomic_block = current->previous; - - return atomic; -} - -/* Gets the top entry from the atomic stack. */ -Py_LOCAL_INLINE(RE_AtomicData*) top_atomic(RE_SafeState* safe_state) { - RE_State* state; - RE_AtomicBlock* current; - - state = safe_state->re_state; - - current = state->current_atomic_block; - return ¤t->items[current->count - 1]; -} - -/* Copies a repeat guard list. */ -Py_LOCAL_INLINE(BOOL) copy_guard_data(RE_SafeState* safe_state, RE_GuardList* - dst, RE_GuardList* src) { - if (dst->capacity < src->count) { - RE_GuardSpan* new_spans; - - if (!safe_state) - return FALSE; - - dst->capacity = src->count; - new_spans = (RE_GuardSpan*)safe_realloc(safe_state, dst->spans, - dst->capacity * sizeof(RE_GuardSpan)); - if (!new_spans) - return FALSE; - - dst->spans = new_spans; - } - - dst->count = src->count; - memmove(dst->spans, src->spans, dst->count * sizeof(RE_GuardSpan)); - - dst->last_text_pos = -1; - - return TRUE; -} - -/* Copies a repeat. */ -Py_LOCAL_INLINE(BOOL) copy_repeat_data(RE_SafeState* safe_state, RE_RepeatData* - dst, RE_RepeatData* src) { - if (!copy_guard_data(safe_state, &dst->body_guard_list, - &src->body_guard_list) || !copy_guard_data(safe_state, - &dst->tail_guard_list, &src->tail_guard_list)) { - safe_dealloc(safe_state, dst->body_guard_list.spans); - safe_dealloc(safe_state, dst->tail_guard_list.spans); - - return FALSE; - } - - dst->count = src->count; - dst->start = src->start; - dst->capture_change = src->capture_change; - - return TRUE; -} - -/* Pushes a return node onto the group call stack. */ -Py_LOCAL_INLINE(BOOL) push_group_return(RE_SafeState* safe_state, RE_Node* - return_node) { - RE_State* state; - PatternObject* pattern; - RE_GroupCallFrame* frame; - - state = safe_state->re_state; - pattern = state->pattern; - - if (state->current_group_call_frame && - state->current_group_call_frame->next) - /* Advance to the next allocated frame. */ - frame = state->current_group_call_frame->next; - else if (!state->current_group_call_frame && state->first_group_call_frame) - /* Advance to the first allocated frame. */ - frame = state->first_group_call_frame; - else { - /* Create a new frame. */ - frame = (RE_GroupCallFrame*)safe_alloc(safe_state, - sizeof(RE_GroupCallFrame)); - if (!frame) - return FALSE; - - frame->groups = (RE_GroupData*)safe_alloc(safe_state, - pattern->true_group_count * sizeof(RE_GroupData)); - frame->repeats = (RE_RepeatData*)safe_alloc(safe_state, - pattern->repeat_count * sizeof(RE_RepeatData)); - if (!frame->groups || !frame->repeats) { - safe_dealloc(safe_state, frame->groups); - safe_dealloc(safe_state, frame->repeats); - safe_dealloc(safe_state, frame); - - return FALSE; - } - - memset(frame->groups, 0, pattern->true_group_count * - sizeof(RE_GroupData)); - memset(frame->repeats, 0, pattern->repeat_count * - sizeof(RE_RepeatData)); - - frame->previous = state->current_group_call_frame; - frame->next = NULL; - - if (frame->previous) - frame->previous->next = frame; - else - state->first_group_call_frame = frame; - } - - frame->node = return_node; - - /* Push the groups and guards. */ - if (return_node) { - size_t g; - size_t r; - - for (g = 0; g < pattern->true_group_count; g++) { - frame->groups[g].span = state->groups[g].span; - frame->groups[g].current_capture = - state->groups[g].current_capture; - } - - for (r = 0; r < pattern->repeat_count; r++) { - if (!copy_repeat_data(safe_state, &frame->repeats[r], - &state->repeats[r])) - return FALSE; - } - } - - state->current_group_call_frame = frame; - - return TRUE; -} - -/* Pops a return node from the group call stack. */ -Py_LOCAL_INLINE(RE_Node*) pop_group_return(RE_State* state) { - RE_GroupCallFrame* frame; - - frame = state->current_group_call_frame; - - /* Pop the groups and repeats. */ - if (frame->node) { - PatternObject* pattern; - size_t g; - size_t r; - - pattern = state->pattern; - - for (g = 0; g < pattern->true_group_count; g++) { - state->groups[g].span = frame->groups[g].span; - state->groups[g].current_capture = - frame->groups[g].current_capture; - } - - for (r = 0; r < pattern->repeat_count; r++) - copy_repeat_data(NULL, &state->repeats[r], &frame->repeats[r]); - } - - /* Withdraw to previous frame. */ - state->current_group_call_frame = frame->previous; - - return frame->node; -} - -/* Returns the return node from the top of the group call stack. */ -Py_LOCAL_INLINE(RE_Node*) top_group_return(RE_State* state) { - RE_GroupCallFrame* frame; - - frame = state->current_group_call_frame; - - return frame->node; -} - -/* Checks whether a node matches only 1 character. */ -Py_LOCAL_INLINE(BOOL) node_matches_one_character(RE_Node* node) { - switch (node->op) { - case RE_OP_ANY: - case RE_OP_ANY_ALL: - case RE_OP_ANY_ALL_REV: - case RE_OP_ANY_REV: - case RE_OP_ANY_U: - case RE_OP_ANY_U_REV: - case RE_OP_CHARACTER: - case RE_OP_CHARACTER_IGN: - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_CHARACTER_REV: - case RE_OP_PROPERTY: - case RE_OP_PROPERTY_IGN: - case RE_OP_PROPERTY_IGN_REV: - case RE_OP_PROPERTY_REV: - case RE_OP_RANGE: - case RE_OP_RANGE_IGN: - case RE_OP_RANGE_IGN_REV: - case RE_OP_RANGE_REV: - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_IGN: - case RE_OP_SET_UNION_IGN_REV: - case RE_OP_SET_UNION_REV: - return TRUE; - default: - return FALSE; - } -} - -/* Checks whether the node is a firstset. */ -Py_LOCAL_INLINE(BOOL) is_firstset(RE_Node* node) { - if (node->step != 0) - return FALSE; - - return node_matches_one_character(node); -} - -/* Locates the start node for testing ahead. */ -Py_LOCAL_INLINE(RE_Node*) locate_test_start(RE_Node* node) { - for (;;) { - switch (node->op) { - case RE_OP_BOUNDARY: - switch (node->next_1.node->op) { - case RE_OP_STRING: - case RE_OP_STRING_FLD: - case RE_OP_STRING_FLD_REV: - case RE_OP_STRING_IGN: - case RE_OP_STRING_IGN_REV: - case RE_OP_STRING_REV: - return node->next_1.node; - default: - return node; - } - case RE_OP_CALL_REF: - case RE_OP_END_GROUP: - case RE_OP_START_GROUP: - node = node->next_1.node; - break; - case RE_OP_GREEDY_REPEAT: - case RE_OP_LAZY_REPEAT: - if (node->values[1] == 0) - return node; - node = node->next_1.node; - break; - case RE_OP_GREEDY_REPEAT_ONE: - case RE_OP_LAZY_REPEAT_ONE: - if (node->values[1] == 0) - return node; - return node->nonstring.next_2.node; - case RE_OP_LOOKAROUND: - node = node->nonstring.next_2.node; - break; - default: - if (is_firstset(node)) { - switch (node->next_1.node->op) { - case RE_OP_END_OF_STRING: - case RE_OP_START_OF_STRING: - return node->next_1.node; - } - } - - return node; - } - } -} - -/* Checks whether a character matches any of a set of case characters. */ -Py_LOCAL_INLINE(BOOL) any_case(Py_UCS4 ch, int case_count, Py_UCS4* cases) { - int i; - - for (i = 0; i < case_count; i++) { - if (ch == cases[i]) - return TRUE; - } - - return FALSE; -} - -/* Matches many ANYs, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many ANYs, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many ANY_Us, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many ANY_Us, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many CHARACTERs, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - Py_UCS4 ch; - - text = state->text; - match = node->match == match; - ch = node->values[0]; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many CHARACTERs, up to a limit, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - text = state->text; - match = node->match == match; - case_count = state->encoding->all_cases(state->locale_info, - node->values[0], cases); - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many CHARACTERs, up to a limit, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN_REV(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - text = state->text; - match = node->match == match; - case_count = state->encoding->all_cases(state->locale_info, - node->values[0], cases); - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, - cases) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, - cases) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, - cases) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many CHARACTERs, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - Py_UCS4 ch; - - text = state->text; - match = node->match == match; - ch = node->values[0]; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many PROPERTYs, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many PROPERTYs, up to a limit, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, - locale_info, node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, - locale_info, node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, - locale_info, node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many PROPERTYs, up to a limit, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN_REV(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, - locale_info, node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, - locale_info, node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, - locale_info, node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many PROPERTYs, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many RANGEs, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many RANGEs, up to a limit, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many RANGEs, up to a limit, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many RANGEs, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many SETs, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_SET(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_SET(encoding, locale_info, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_SET(encoding, locale_info, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_SET(encoding, locale_info, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many SETs, up to a limit, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_SET_IGN(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_SET_IGN(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_SET_IGN(encoding, locale_info, - node, text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many SETs, up to a limit, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_SET_IGN(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_SET_IGN(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_SET_IGN(encoding, locale_info, - node, text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many SETs, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - locale_info = state->locale_info; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_SET(encoding, locale_info, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_SET(encoding, locale_info, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_SET(encoding, locale_info, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Counts a repeated character pattern. */ -Py_LOCAL_INLINE(size_t) count_one(RE_State* state, RE_Node* node, Py_ssize_t - text_pos, size_t max_count, BOOL* is_partial) { - size_t count; - - *is_partial = FALSE; - - if (max_count < 1) - return 0; - - switch (node->op) { - case RE_OP_ANY: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_ANY(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_ANY_ALL: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_ANY_ALL_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_ANY_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_ANY_REV(state, node, text_pos, - text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_ANY_U: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_ANY_U(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_ANY_U_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_ANY_U_REV(state, node, text_pos, - text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_CHARACTER: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_CHARACTER(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_CHARACTER_IGN: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_CHARACTER_IGN(state, node, text_pos, - text_pos + (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_CHARACTER_IGN_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_CHARACTER_IGN_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_CHARACTER_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_CHARACTER_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_PROPERTY: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_PROPERTY(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_PROPERTY_IGN: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_PROPERTY_IGN(state, node, text_pos, - text_pos + (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_PROPERTY_IGN_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_PROPERTY_IGN_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_PROPERTY_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_PROPERTY_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_RANGE: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_RANGE(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_RANGE_IGN: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_RANGE_IGN(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_RANGE_IGN_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_RANGE_IGN_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_RANGE_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_RANGE_REV(state, node, text_pos, - text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_SET(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_SET_IGN(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_SET_IGN_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_SET_REV(state, node, text_pos, - text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - } - - return 0; -} - -/* Performs a simple string search. */ -Py_LOCAL_INLINE(Py_ssize_t) simple_string_search(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - Py_ssize_t length; - RE_CODE* values; - Py_UCS4 check_char; - - length = (Py_ssize_t)node->value_count; - values = node->values; - check_char = values[0]; - - *is_partial = FALSE; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text = (Py_UCS1*)state->text; - Py_UCS1* text_ptr = text + text_pos; - Py_UCS1* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (text_ptr[0] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(text_ptr[s_pos], values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 2: - { - Py_UCS2* text = (Py_UCS2*)state->text; - Py_UCS2* text_ptr = text + text_pos; - Py_UCS2* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (text_ptr[0] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(text_ptr[s_pos], values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 4: - { - Py_UCS4* text = (Py_UCS4*)state->text; - Py_UCS4* text_ptr = text + text_pos; - Py_UCS4* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (text_ptr[0] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(text_ptr[s_pos], values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - } - - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_pos; - } - - return -1; -} - -/* Performs a simple string search, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_ign(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - Py_ssize_t length; - RE_CODE* values; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - length = (Py_ssize_t)node->value_count; - values = node->values; - encoding = state->encoding; - locale_info = state->locale_info; - case_count = encoding->all_cases(locale_info, values[0], cases); - - *is_partial = FALSE; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text = (Py_UCS1*)state->text; - Py_UCS1* text_ptr = text + text_pos; - Py_UCS1* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (any_case(text_ptr[0], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, locale_info, text_ptr[s_pos], - values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 2: - { - Py_UCS2* text = (Py_UCS2*)state->text; - Py_UCS2* text_ptr = text + text_pos; - Py_UCS2* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (any_case(text_ptr[0], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, locale_info, text_ptr[s_pos], - values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 4: - { - Py_UCS4* text = (Py_UCS4*)state->text; - Py_UCS4* text_ptr = text + text_pos; - Py_UCS4* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (any_case(text_ptr[0], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, locale_info, text_ptr[s_pos], - values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - } - - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_pos; - } - - return -1; -} - -/* Performs a simple string search, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_ign_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - Py_ssize_t length; - RE_CODE* values; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - length = (Py_ssize_t)node->value_count; - values = node->values; - encoding = state->encoding; - locale_info = state->locale_info; - case_count = encoding->all_cases(locale_info, values[length - 1], cases); - - *is_partial = FALSE; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text = (Py_UCS1*)state->text; - Py_UCS1* text_ptr = text + text_pos; - Py_UCS1* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (any_case(text_ptr[-1], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, locale_info, text_ptr[- s_pos - - 1], values[length - s_pos - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 2: - { - Py_UCS2* text = (Py_UCS2*)state->text; - Py_UCS2* text_ptr = text + text_pos; - Py_UCS2* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (any_case(text_ptr[-1], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, locale_info, text_ptr[- s_pos - - 1], values[length - s_pos - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 4: - { - Py_UCS4* text = (Py_UCS4*)state->text; - Py_UCS4* text_ptr = text + text_pos; - Py_UCS4* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (any_case(text_ptr[-1], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, locale_info, text_ptr[- s_pos - - 1], values[length - s_pos - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - } - - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_pos; - } - - return -1; -} - -/* Performs a simple string search, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_rev(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - Py_ssize_t length; - RE_CODE* values; - Py_UCS4 check_char; - - length = (Py_ssize_t)node->value_count; - values = node->values; - check_char = values[length - 1]; - - *is_partial = FALSE; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text = (Py_UCS1*)state->text; - Py_UCS1* text_ptr = text + text_pos; - Py_UCS1* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (text_ptr[-1] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(text_ptr[- s_pos - 1], values[length - s_pos - - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 2: - { - Py_UCS2* text = (Py_UCS2*)state->text; - Py_UCS2* text_ptr = text + text_pos; - Py_UCS2* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (text_ptr[-1] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(text_ptr[- s_pos - 1], values[length - s_pos - - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 4: - { - Py_UCS4* text = (Py_UCS4*)state->text; - Py_UCS4* text_ptr = text + text_pos; - Py_UCS4* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (text_ptr[-1] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(text_ptr[- s_pos - 1], values[length - s_pos - - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - } - - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_pos; - } - - return -1; -} - -/* Performs a Boyer-Moore fast string search. */ -Py_LOCAL_INLINE(Py_ssize_t) fast_string_search(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit) { - void* text; - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad_character_offset; - Py_ssize_t* good_suffix_offset; - Py_ssize_t last_pos; - Py_UCS4 check_char; - - text = state->text; - length = (Py_ssize_t)node->value_count; - values = node->values; - good_suffix_offset = node->string.good_suffix_offset; - bad_character_offset = node->string.bad_character_offset; - last_pos = length - 1; - check_char = values[last_pos]; - limit -= length; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char(text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS1*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char(text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS2*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char(text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS4*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - } - - return -1; -} - -/* Performs a Boyer-Moore fast string search, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit) { - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - void* text; - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad_character_offset; - Py_ssize_t* good_suffix_offset; - Py_ssize_t last_pos; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - encoding = state->encoding; - locale_info = state->locale_info; - text = state->text; - length = (Py_ssize_t)node->value_count; - values = node->values; - good_suffix_offset = node->string.good_suffix_offset; - bad_character_offset = node->string.bad_character_offset; - last_pos = length - 1; - case_count = encoding->all_cases(locale_info, values[last_pos], cases); - limit -= length; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char_ign(encoding, locale_info, - text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS1*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char_ign(encoding, locale_info, - text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS2*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char_ign(encoding, locale_info, - text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS4*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - } - - return -1; -} - -/* Performs a Boyer-Moore fast string search, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - void* text; - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad_character_offset; - Py_ssize_t* good_suffix_offset; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - encoding = state->encoding; - locale_info = state->locale_info; - text = state->text; - length = (Py_ssize_t)node->value_count; - values = node->values; - good_suffix_offset = node->string.good_suffix_offset; - bad_character_offset = node->string.bad_character_offset; - case_count = encoding->all_cases(locale_info, values[0], cases); - text_pos -= length; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char_ign(encoding, locale_info, - text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS1*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char_ign(encoding, locale_info, - text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS2*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char_ign(encoding, locale_info, - text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS4*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - } - - return -1; -} - -/* Performs a Boyer-Moore fast string search, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_rev(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit) { - void* text; - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad_character_offset; - Py_ssize_t* good_suffix_offset; - Py_UCS4 check_char; - - text = state->text; - length = (Py_ssize_t)node->value_count; - values = node->values; - good_suffix_offset = node->string.good_suffix_offset; - bad_character_offset = node->string.bad_character_offset; - check_char = values[0]; - text_pos -= length; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char(text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS1*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char(text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS2*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char(text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS4*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - } - - return -1; -} - -/* Builds the tables for a Boyer-Moore fast string search. */ -Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_State* state, RE_Node* node, BOOL - ignore) { - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad; - Py_ssize_t* good; - Py_UCS4 ch; - Py_ssize_t last_pos; - Py_ssize_t pos; - BOOL (*is_same_char)(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, Py_UCS4 ch1, Py_UCS4 ch2); - Py_ssize_t suffix_len; - BOOL saved_start; - Py_ssize_t s; - Py_ssize_t i; - Py_ssize_t s_start; - Py_UCS4 codepoints[RE_MAX_CASES]; - - length = (Py_ssize_t)node->value_count; - - if (length < RE_MIN_FAST_LENGTH) - return TRUE; - - values = node->values; - bad = (Py_ssize_t*)re_alloc(256 * sizeof(bad[0])); - good = (Py_ssize_t*)re_alloc((size_t)length * sizeof(good[0])); - - if (!bad || !good) { - re_dealloc(bad); - re_dealloc(good); - - return FALSE; - } - - for (ch = 0; ch < 0x100; ch++) - bad[ch] = length; - - last_pos = length - 1; - - for (pos = 0; pos < last_pos; pos++) { - Py_ssize_t offset; - - offset = last_pos - pos; - ch = values[pos]; - if (ignore) { - int count; - int i; - - count = state->encoding->all_cases(state->locale_info, ch, - codepoints); - - for (i = 0; i < count; i++) - bad[codepoints[i] & 0xFF] = offset; - } else - bad[ch & 0xFF] = offset; - } - - is_same_char = ignore ? same_char_ign_wrapper : same_char_wrapper; - - suffix_len = 2; - pos = length - suffix_len; - saved_start = FALSE; - s = pos - 1; - i = suffix_len - 1; - s_start = s; - - while (pos >= 0) { - /* Look for another occurrence of the suffix. */ - while (i > 0) { - /* Have we dropped off the end of the string? */ - if (s + i < 0) - break; - - if (is_same_char(state->encoding, state->locale_info, values[s + - i], values[pos + i])) - /* It still matches. */ - --i; - else { - /* Start again further along. */ - --s; - i = suffix_len - 1; - } - } - - if (s >= 0 && is_same_char(state->encoding, state->locale_info, - values[s], values[pos])) { - /* We haven't dropped off the end of the string, and the suffix has - * matched this far, so this is a good starting point for the next - * iteration. - */ - --s; - if (!saved_start) { - s_start = s; - saved_start = TRUE; - } - } else { - /* Calculate the suffix offset. */ - good[pos] = pos - s; - - /* Extend the suffix and start searching for _this_ one. */ - --pos; - ++suffix_len; - - /* Where's a good place to start searching? */ - if (saved_start) { - s = s_start; - saved_start = FALSE; - } else - --s; - - /* Can we short-circuit the searching? */ - if (s < 0) - break; - } - - i = suffix_len - 1; - } - - /* Fill-in any remaining entries. */ - while (pos >= 0) { - good[pos] = pos - s; - --pos; - --s; - } - - node->string.bad_character_offset = bad; - node->string.good_suffix_offset = good; - - return TRUE; -} - -/* Builds the tables for a Boyer-Moore fast string search, backwards. */ -Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_State* state, RE_Node* node, - BOOL ignore) { - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad; - Py_ssize_t* good; - Py_UCS4 ch; - Py_ssize_t last_pos; - Py_ssize_t pos; - BOOL (*is_same_char)(RE_EncodingTable* encoding, RE_LocaleInfo* - locale_info, Py_UCS4 ch1, Py_UCS4 ch2); - Py_ssize_t suffix_len; - BOOL saved_start; - Py_ssize_t s; - Py_ssize_t i; - Py_ssize_t s_start; - Py_UCS4 codepoints[RE_MAX_CASES]; - - length = (Py_ssize_t)node->value_count; - - if (length < RE_MIN_FAST_LENGTH) - return TRUE; - - values = node->values; - bad = (Py_ssize_t*)re_alloc(256 * sizeof(bad[0])); - good = (Py_ssize_t*)re_alloc((size_t)length * sizeof(good[0])); - - if (!bad || !good) { - re_dealloc(bad); - re_dealloc(good); - - return FALSE; - } - - for (ch = 0; ch < 0x100; ch++) - bad[ch] = -length; - - last_pos = length - 1; - - for (pos = last_pos; pos > 0; pos--) { - Py_ssize_t offset; - - offset = -pos; - ch = values[pos]; - if (ignore) { - int count; - int i; - - count = state->encoding->all_cases(state->locale_info, ch, - codepoints); - - for (i = 0; i < count; i++) - bad[codepoints[i] & 0xFF] = offset; - } else - bad[ch & 0xFF] = offset; - } - - is_same_char = ignore ? same_char_ign_wrapper : same_char_wrapper; - - suffix_len = 2; - pos = suffix_len - 1; - saved_start = FALSE; - s = pos + 1; - i = suffix_len - 1; - s_start = s; - - while (pos < length) { - /* Look for another occurrence of the suffix. */ - while (i > 0) { - /* Have we dropped off the end of the string? */ - if (s - i >= length) - break; - - if (is_same_char(state->encoding, state->locale_info, values[s - - i], values[pos - i])) - /* It still matches. */ - --i; - else { - /* Start again further along. */ - ++s; - i = suffix_len - 1; - } - } - - if (s < length && is_same_char(state->encoding, state->locale_info, - values[s], values[pos])) { - /* We haven't dropped off the end of the string, and the suffix has - * matched this far, so this is a good starting point for the next - * iteration. - */ - ++s; - if (!saved_start) { - s_start = s; - saved_start = TRUE; - } - } else { - /* Calculate the suffix offset. */ - good[pos] = pos - s; - - /* Extend the suffix and start searching for _this_ one. */ - ++pos; - ++suffix_len; - - /* Where's a good place to start searching? */ - if (saved_start) { - s = s_start; - saved_start = FALSE; - } else - ++s; - - /* Can we short-circuit the searching? */ - if (s >= length) - break; - } - - i = suffix_len - 1; - } - - /* Fill-in any remaining entries. */ - while (pos < length) { - good[pos] = pos - s; - ++pos; - ++s; - } - - node->string.bad_character_offset = bad; - node->string.good_suffix_offset = good; - - return TRUE; -} - -/* Performs a string search. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search(RE_SafeState* safe_state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - RE_State* state; - Py_ssize_t found_pos; - - state = safe_state->re_state; - - *is_partial = FALSE; - - /* Has the node been initialised for fast searching, if necessary? */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - /* Ideally the pattern should immutable and shareable across threads. - * Internally, however, it isn't. For safety we need to hold the GIL. - */ - acquire_GIL(safe_state); - - /* Double-check because of multithreading. */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables(state, node, FALSE); - node->status |= RE_STATUS_FAST_INIT; - } - - release_GIL(safe_state); - } - - if (node->string.bad_character_offset) { - /* Start with a fast search. This will find the string if it's complete - * (i.e. not truncated). - */ - found_pos = fast_string_search(state, node, text_pos, limit); - if (found_pos < 0 && state->partial_side == RE_PARTIAL_RIGHT) - /* We didn't find the string, but it could've been truncated, so - * try again, starting close to the end. - */ - found_pos = simple_string_search(state, node, limit - - (Py_ssize_t)(node->value_count - 1), limit, is_partial); - } else - found_pos = simple_string_search(state, node, text_pos, limit, - is_partial); - - return found_pos; -} - -/* Performs a string search, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search_fld(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos, - BOOL* is_partial) { - RE_State* state; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - folded); - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; - RE_CODE* values; - Py_ssize_t start_pos; - int f_pos; - int folded_len; - Py_ssize_t length; - Py_ssize_t s_pos; - Py_UCS4 folded[RE_MAX_FOLDED]; - - state = safe_state->re_state; - encoding = state->encoding; - locale_info = state->locale_info; - full_case_fold = encoding->full_case_fold; - char_at = state->char_at; - text = state->text; - - values = node->values; - start_pos = text_pos; - f_pos = 0; - folded_len = 0; - length = (Py_ssize_t)node->value_count; - s_pos = 0; - - *is_partial = FALSE; - - while (s_pos < length || f_pos < folded_len) { - if (f_pos >= folded_len) { - /* Fetch and casefold another character. */ - if (text_pos >= limit) { - if (text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) { - *is_partial = TRUE; - return start_pos; - } - - return -1; - } - - folded_len = full_case_fold(locale_info, char_at(text, text_pos), - folded); - f_pos = 0; - } - - if (s_pos < length && same_char_ign(encoding, locale_info, - values[s_pos], folded[f_pos])) { - ++s_pos; - ++f_pos; - - if (f_pos >= folded_len) - ++text_pos; - } else { - ++start_pos; - text_pos = start_pos; - f_pos = 0; - folded_len = 0; - s_pos = 0; - } - } - - /* We found the string. */ - if (new_pos) - *new_pos = text_pos; - - return start_pos; -} - -/* Performs a string search, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search_fld_rev(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos, - BOOL* is_partial) { - RE_State* state; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - folded); - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; - RE_CODE* values; - Py_ssize_t start_pos; - int f_pos; - int folded_len; - Py_ssize_t length; - Py_ssize_t s_pos; - Py_UCS4 folded[RE_MAX_FOLDED]; - - state = safe_state->re_state; - encoding = state->encoding; - locale_info = state->locale_info; - full_case_fold = encoding->full_case_fold; - char_at = state->char_at; - text = state->text; - - values = node->values; - start_pos = text_pos; - f_pos = 0; - folded_len = 0; - length = (Py_ssize_t)node->value_count; - s_pos = 0; - - *is_partial = FALSE; - - while (s_pos < length || f_pos < folded_len) { - if (f_pos >= folded_len) { - /* Fetch and casefold another character. */ - if (text_pos <= limit) { - if (text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) { - *is_partial = TRUE; - return start_pos; - } - - return -1; - } - - folded_len = full_case_fold(locale_info, char_at(text, text_pos - - 1), folded); - f_pos = 0; - } - - if (s_pos < length && same_char_ign(encoding, locale_info, - values[length - s_pos - 1], folded[folded_len - f_pos - 1])) { - ++s_pos; - ++f_pos; - - if (f_pos >= folded_len) - --text_pos; - } else { - --start_pos; - text_pos = start_pos; - f_pos = 0; - folded_len = 0; - s_pos = 0; - } - } - - /* We found the string. */ - if (new_pos) - *new_pos = text_pos; - - return start_pos; -} - -/* Performs a string search, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search_ign(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - RE_State* state; - Py_ssize_t found_pos; - - state = safe_state->re_state; - - *is_partial = FALSE; - - /* Has the node been initialised for fast searching, if necessary? */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - /* Ideally the pattern should immutable and shareable across threads. - * Internally, however, it isn't. For safety we need to hold the GIL. - */ - acquire_GIL(safe_state); - - /* Double-check because of multithreading. */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables(state, node, TRUE); - node->status |= RE_STATUS_FAST_INIT; - } - - release_GIL(safe_state); - } - - if (node->string.bad_character_offset) { - /* Start with a fast search. This will find the string if it's complete - * (i.e. not truncated). - */ - found_pos = fast_string_search_ign(state, node, text_pos, limit); - if (found_pos < 0 && state->partial_side == RE_PARTIAL_RIGHT) - /* We didn't find the string, but it could've been truncated, so - * try again, starting close to the end. - */ - found_pos = simple_string_search_ign(state, node, limit - - (Py_ssize_t)(node->value_count - 1), limit, is_partial); - } else - found_pos = simple_string_search_ign(state, node, text_pos, limit, - is_partial); - - return found_pos; -} - -/* Performs a string search, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search_ign_rev(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - RE_State* state; - Py_ssize_t found_pos; - - state = safe_state->re_state; - - *is_partial = FALSE; - - /* Has the node been initialised for fast searching, if necessary? */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - /* Ideally the pattern should immutable and shareable across threads. - * Internally, however, it isn't. For safety we need to hold the GIL. - */ - acquire_GIL(safe_state); - - /* Double-check because of multithreading. */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables_rev(state, node, TRUE); - node->status |= RE_STATUS_FAST_INIT; - } - - release_GIL(safe_state); - } - - if (node->string.bad_character_offset) { - /* Start with a fast search. This will find the string if it's complete - * (i.e. not truncated). - */ - found_pos = fast_string_search_ign_rev(state, node, text_pos, limit); - if (found_pos < 0 && state->partial_side == RE_PARTIAL_LEFT) - /* We didn't find the string, but it could've been truncated, so - * try again, starting close to the end. - */ - found_pos = simple_string_search_ign_rev(state, node, limit + - (Py_ssize_t)(node->value_count - 1), limit, is_partial); - } else - found_pos = simple_string_search_ign_rev(state, node, text_pos, limit, - is_partial); - - return found_pos; -} - -/* Performs a string search, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search_rev(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - RE_State* state; - Py_ssize_t found_pos; - - state = safe_state->re_state; - - *is_partial = FALSE; - - /* Has the node been initialised for fast searching, if necessary? */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - /* Ideally the pattern should immutable and shareable across threads. - * Internally, however, it isn't. For safety we need to hold the GIL. - */ - acquire_GIL(safe_state); - - /* Double-check because of multithreading. */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables_rev(state, node, FALSE); - node->status |= RE_STATUS_FAST_INIT; - } - - release_GIL(safe_state); - } - - if (node->string.bad_character_offset) { - /* Start with a fast search. This will find the string if it's complete - * (i.e. not truncated). - */ - found_pos = fast_string_search_rev(state, node, text_pos, limit); - if (found_pos < 0 && state->partial_side == RE_PARTIAL_LEFT) - /* We didn't find the string, but it could've been truncated, so - * try again, starting close to the end. - */ - found_pos = simple_string_search_rev(state, node, limit + - (Py_ssize_t)(node->value_count - 1), limit, is_partial); - } else - found_pos = simple_string_search_rev(state, node, text_pos, limit, - is_partial); - - return found_pos; -} - -/* Returns how many characters there could be before full case-folding. */ -Py_LOCAL_INLINE(Py_ssize_t) possible_unfolded_length(Py_ssize_t length) { - if (length == 0) - return 0; - - if (length < RE_MAX_FOLDED) - return 1; - - return length / RE_MAX_FOLDED; -} - -/* Checks whether there's any character except a newline at a position. */ -Py_LOCAL_INLINE(int) try_match_ANY(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_ANY(state->encoding, node, state->char_at(state->text, - text_pos))); -} - -/* Checks whether there's any character at all at a position. */ -Py_LOCAL_INLINE(int) try_match_ANY_ALL(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end); -} - -/* Checks whether there's any character at all at a position, backwards. */ -Py_LOCAL_INLINE(int) try_match_ANY_ALL_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start); -} - -/* Checks whether there's any character except a newline at a position, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_ANY_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_ANY(state->encoding, node, state->char_at(state->text, text_pos - - 1))); -} - -/* Checks whether there's any character except a line separator at a position. - */ -Py_LOCAL_INLINE(int) try_match_ANY_U(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_ANY_U(state->encoding, node, state->char_at(state->text, - text_pos))); -} - -/* Checks whether there's any character except a line separator at a position, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_ANY_U_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_ANY_U(state->encoding, node, state->char_at(state->text, text_pos - - 1))); -} - -/* Checks whether a position is on a word boundary. */ -Py_LOCAL_INLINE(int) try_match_BOUNDARY(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_boundary(state, text_pos) == - node->match); -} - -/* Checks whether there's a character at a position. */ -Py_LOCAL_INLINE(int) try_match_CHARACTER(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_CHARACTER(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos)) == node->match); -} - -/* Checks whether there's a character at a position, ignoring case. */ -Py_LOCAL_INLINE(int) try_match_CHARACTER_IGN(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_CHARACTER_IGN(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos)) == node->match); -} - -/* Checks whether there's a character at a position, ignoring case, backwards. - */ -Py_LOCAL_INLINE(int) try_match_CHARACTER_IGN_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_CHARACTER_IGN(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos - 1)) == node->match); -} - -/* Checks whether there's a character at a position, backwards. */ -Py_LOCAL_INLINE(int) try_match_CHARACTER_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_CHARACTER(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos - 1)) == node->match); -} - -/* Checks whether a position is on a default word boundary. */ -Py_LOCAL_INLINE(int) try_match_DEFAULT_BOUNDARY(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_default_boundary(state, text_pos) - == node->match); -} - -/* Checks whether a position is at the default end of a word. */ -Py_LOCAL_INLINE(int) try_match_DEFAULT_END_OF_WORD(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_default_word_end(state, - text_pos)); -} - -/* Checks whether a position is at the default start of a word. */ -Py_LOCAL_INLINE(int) try_match_DEFAULT_START_OF_WORD(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_default_word_start(state, - text_pos)); -} - -/* Checks whether a position is at the end of a line. */ -Py_LOCAL_INLINE(int) try_match_END_OF_LINE(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(text_pos >= state->slice_end || - state->char_at(state->text, text_pos) == '\n'); -} - -/* Checks whether a position is at the end of a line. */ -Py_LOCAL_INLINE(int) try_match_END_OF_LINE_U(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_line_end(state, text_pos)); -} - -/* Checks whether a position is at the end of the string. */ -Py_LOCAL_INLINE(int) try_match_END_OF_STRING(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(text_pos >= state->text_length); -} - -/* Checks whether a position is at the end of a line or the string. */ -Py_LOCAL_INLINE(int) try_match_END_OF_STRING_LINE(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - return bool_as_status(text_pos >= state->text_length || text_pos == - state->final_newline); -} - -/* Checks whether a position is at the end of the string. */ -Py_LOCAL_INLINE(int) try_match_END_OF_STRING_LINE_U(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - return bool_as_status(text_pos >= state->text_length || text_pos == - state->final_line_sep); -} - -/* Checks whether a position is at the end of a word. */ -Py_LOCAL_INLINE(int) try_match_END_OF_WORD(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_word_end(state, text_pos)); -} - -/* Checks whether a position is on a grapheme boundary. */ -Py_LOCAL_INLINE(int) try_match_GRAPHEME_BOUNDARY(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_grapheme_boundary(state, - text_pos)); -} - -/* Checks whether there's a character with a certain property at a position. */ -Py_LOCAL_INLINE(int) try_match_PROPERTY(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_PROPERTY(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos)) == node->match); -} - -/* Checks whether there's a character with a certain property at a position, - * ignoring case. - */ -Py_LOCAL_INLINE(int) try_match_PROPERTY_IGN(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_PROPERTY_IGN(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos)) == node->match); -} - -/* Checks whether there's a character with a certain property at a position, - * ignoring case, backwards. - */ -Py_LOCAL_INLINE(int) try_match_PROPERTY_IGN_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_PROPERTY_IGN(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos - 1)) == node->match); -} - -/* Checks whether there's a character with a certain property at a position, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_PROPERTY_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_PROPERTY(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos - 1)) == node->match); -} - -/* Checks whether there's a character in a certain range at a position. */ -Py_LOCAL_INLINE(int) try_match_RANGE(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_RANGE(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos)) == node->match); -} - -/* Checks whether there's a character in a certain range at a position, - * ignoring case. - */ -Py_LOCAL_INLINE(int) try_match_RANGE_IGN(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_RANGE_IGN(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos)) == node->match); -} - -/* Checks whether there's a character in a certain range at a position, - * ignoring case, backwards. - */ -Py_LOCAL_INLINE(int) try_match_RANGE_IGN_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_RANGE_IGN(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos - 1)) == node->match); -} - -/* Checks whether there's a character in a certain range at a position, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_RANGE_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_RANGE(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos - 1)) == node->match); -} - -/* Checks whether a position is at the search anchor. */ -Py_LOCAL_INLINE(int) try_match_SEARCH_ANCHOR(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(text_pos == state->search_anchor); -} - -/* Checks whether there's a character in a certain set at a position. */ -Py_LOCAL_INLINE(int) try_match_SET(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_SET(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos)) == node->match); -} - -/* Checks whether there's a character in a certain set at a position, ignoring - * case. - */ -Py_LOCAL_INLINE(int) try_match_SET_IGN(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_SET_IGN(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos)) == node->match); -} - -/* Checks whether there's a character in a certain set at a position, ignoring - * case, backwards. - */ -Py_LOCAL_INLINE(int) try_match_SET_IGN_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_SET_IGN(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos - 1)) == node->match); -} - -/* Checks whether there's a character in a certain set at a position, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_SET_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_SET(state->encoding, state->locale_info, node, - state->char_at(state->text, text_pos - 1)) == node->match); -} - -/* Checks whether a position is at the start of a line. */ -Py_LOCAL_INLINE(int) try_match_START_OF_LINE(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(text_pos <= 0 || state->char_at(state->text, text_pos - - 1) == '\n'); -} - -/* Checks whether a position is at the start of a line. */ -Py_LOCAL_INLINE(int) try_match_START_OF_LINE_U(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_line_start(state, text_pos)); -} - -/* Checks whether a position is at the start of the string. */ -Py_LOCAL_INLINE(int) try_match_START_OF_STRING(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(text_pos <= 0); -} - -/* Checks whether a position is at the start of a word. */ -Py_LOCAL_INLINE(int) try_match_START_OF_WORD(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_word_start(state, text_pos)); -} - -/* Checks whether there's a certain string at a position. */ -Py_LOCAL_INLINE(int) try_match_STRING(RE_State* state, RE_NextNode* next, - RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_CODE* values; - Py_ssize_t s_pos; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - values = node->values; - - for (s_pos = 0; s_pos < length; s_pos++) { - if (text_pos + s_pos >= state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - next_position->text_pos = text_pos; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - - if (!same_char(char_at(state->text, text_pos + s_pos), values[s_pos])) - return RE_ERROR_FAILURE; - } - - next_position->node = next->match_next; - next_position->text_pos = text_pos + next->match_step; - - return RE_ERROR_SUCCESS; -} - -/* Checks whether there's a certain string at a position, ignoring case. */ -Py_LOCAL_INLINE(int) try_match_STRING_FLD(RE_State* state, RE_NextNode* next, - RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - folded); - Py_ssize_t s_pos; - RE_CODE* values; - int folded_len; - int f_pos; - Py_ssize_t start_pos; - Py_UCS4 folded[RE_MAX_FOLDED]; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - encoding = state->encoding; - locale_info = state->locale_info; - full_case_fold = encoding->full_case_fold; - - s_pos = 0; - values = node->values; - folded_len = 0; - f_pos = 0; - start_pos = text_pos; - - while (s_pos < length) { - if (f_pos >= folded_len) { - /* Fetch and casefold another character. */ - if (text_pos >= state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - if (next->match_step == 0) - next_position->text_pos = start_pos; - else - next_position->text_pos = text_pos; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - - folded_len = full_case_fold(locale_info, char_at(state->text, - text_pos), folded); - f_pos = 0; - } - - if (!same_char_ign(encoding, locale_info, folded[f_pos], - values[s_pos])) - return RE_ERROR_FAILURE; - - ++s_pos; - ++f_pos; - - if (f_pos >= folded_len) - ++text_pos; - } - - if (f_pos < folded_len) - return RE_ERROR_FAILURE; - - next_position->node = next->match_next; - if (next->match_step == 0) - next_position->text_pos = start_pos; - else - next_position->text_pos = text_pos; - - return RE_ERROR_SUCCESS; -} - -/* Checks whether there's a certain string at a position, ignoring case, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_STRING_FLD_REV(RE_State* state, RE_NextNode* - next, RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - folded); - Py_ssize_t s_pos; - RE_CODE* values; - int folded_len; - int f_pos; - Py_ssize_t start_pos; - Py_UCS4 folded[RE_MAX_FOLDED]; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - encoding = state->encoding; - locale_info = state->locale_info; - full_case_fold = encoding->full_case_fold; - - s_pos = 0; - values = node->values; - folded_len = 0; - f_pos = 0; - start_pos = text_pos; - - while (s_pos < length) { - if (f_pos >= folded_len) { - /* Fetch and casefold another character. */ - if (text_pos <= state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) { - if (next->match_step == 0) - next_position->text_pos = start_pos; - else - next_position->text_pos = text_pos; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - - folded_len = full_case_fold(locale_info, char_at(state->text, - text_pos - 1), folded); - f_pos = 0; - } - - if (!same_char_ign(encoding, locale_info, folded[folded_len - f_pos - - 1], values[length - s_pos - 1])) - return RE_ERROR_FAILURE; - - ++s_pos; - ++f_pos; - - if (f_pos >= folded_len) - --text_pos; - } - - if (f_pos < folded_len) - return RE_ERROR_FAILURE; - - next_position->node = next->match_next; - if (next->match_step == 0) - next_position->text_pos = start_pos; - else - next_position->text_pos = text_pos; - - return RE_ERROR_SUCCESS; -} - -/* Checks whether there's a certain string at a position, ignoring case. */ -Py_LOCAL_INLINE(int) try_match_STRING_IGN(RE_State* state, RE_NextNode* next, - RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - RE_CODE* values; - Py_ssize_t s_pos; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - encoding = state->encoding; - locale_info = state->locale_info; - values = node->values; - - for (s_pos = 0; s_pos < length; s_pos++) { - if (text_pos + s_pos >= state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - next_position->text_pos = text_pos; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - - if (!same_char_ign(encoding, locale_info, char_at(state->text, text_pos - + s_pos), values[s_pos])) - return RE_ERROR_FAILURE; - } - - next_position->node = next->match_next; - next_position->text_pos = text_pos + next->match_step; - - return RE_ERROR_SUCCESS; -} - -/* Checks whether there's a certain string at a position, ignoring case, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_STRING_IGN_REV(RE_State* state, RE_NextNode* - next, RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - RE_CODE* values; - Py_ssize_t s_pos; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - encoding = state->encoding; - locale_info = state->locale_info; - values = node->values; - - for (s_pos = 0; s_pos < length; s_pos++) { - if (text_pos - s_pos <= state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) { - next_position->text_pos = text_pos; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - - if (!same_char_ign(encoding, locale_info, char_at(state->text, text_pos - - s_pos - 1), values[length - s_pos - 1])) - return RE_ERROR_FAILURE; - } - - next_position->node = next->match_next; - next_position->text_pos = text_pos + next->match_step; - - return RE_ERROR_SUCCESS; -} - -/* Checks whether there's a certain string at a position, backwards. */ -Py_LOCAL_INLINE(int) try_match_STRING_REV(RE_State* state, RE_NextNode* next, - RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_CODE* values; - Py_ssize_t s_pos; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - values = node->values; - - for (s_pos = 0; s_pos < length; s_pos++) { - if (text_pos - s_pos <= state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) { - next_position->text_pos = text_pos; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - - if (!same_char(char_at(state->text, text_pos - s_pos - 1), - values[length - s_pos - 1])) - return RE_ERROR_FAILURE; - } - - next_position->node = next->match_next; - next_position->text_pos = text_pos + next->match_step; - - return RE_ERROR_SUCCESS; -} - -/* Tries a match at the current text position. - * - * Returns the next node and text position if the match succeeds. - */ -Py_LOCAL_INLINE(int) try_match(RE_State* state, RE_NextNode* next, Py_ssize_t - text_pos, RE_Position* next_position) { - RE_Node* test; - int status; - - test = next->test; - - if (test->status & RE_STATUS_FUZZY) { - next_position->node = next->node; - next_position->text_pos = text_pos; - return RE_ERROR_SUCCESS; - } - - switch (test->op) { - case RE_OP_ANY: - status = try_match_ANY(state, test, text_pos); - break; - case RE_OP_ANY_ALL: - status = try_match_ANY_ALL(state, test, text_pos); - break; - case RE_OP_ANY_ALL_REV: - status = try_match_ANY_ALL_REV(state, test, text_pos); - break; - case RE_OP_ANY_REV: - status = try_match_ANY_REV(state, test, text_pos); - break; - case RE_OP_ANY_U: - status = try_match_ANY_U(state, test, text_pos); - break; - case RE_OP_ANY_U_REV: - status = try_match_ANY_U_REV(state, test, text_pos); - break; - case RE_OP_BOUNDARY: - status = try_match_BOUNDARY(state, test, text_pos); - break; - case RE_OP_CHARACTER: - status = try_match_CHARACTER(state, test, text_pos); - break; - case RE_OP_CHARACTER_IGN: - status = try_match_CHARACTER_IGN(state, test, text_pos); - break; - case RE_OP_CHARACTER_IGN_REV: - status = try_match_CHARACTER_IGN_REV(state, test, text_pos); - break; - case RE_OP_CHARACTER_REV: - status = try_match_CHARACTER_REV(state, test, text_pos); - break; - case RE_OP_DEFAULT_BOUNDARY: - status = try_match_DEFAULT_BOUNDARY(state, test, text_pos); - break; - case RE_OP_DEFAULT_END_OF_WORD: - status = try_match_DEFAULT_END_OF_WORD(state, test, text_pos); - break; - case RE_OP_DEFAULT_START_OF_WORD: - status = try_match_DEFAULT_START_OF_WORD(state, test, text_pos); - break; - case RE_OP_END_OF_LINE: - status = try_match_END_OF_LINE(state, test, text_pos); - break; - case RE_OP_END_OF_LINE_U: - status = try_match_END_OF_LINE_U(state, test, text_pos); - break; - case RE_OP_END_OF_STRING: - status = try_match_END_OF_STRING(state, test, text_pos); - break; - case RE_OP_END_OF_STRING_LINE: - status = try_match_END_OF_STRING_LINE(state, test, text_pos); - break; - case RE_OP_END_OF_STRING_LINE_U: - status = try_match_END_OF_STRING_LINE_U(state, test, text_pos); - break; - case RE_OP_END_OF_WORD: - status = try_match_END_OF_WORD(state, test, text_pos); - break; - case RE_OP_GRAPHEME_BOUNDARY: - status = try_match_GRAPHEME_BOUNDARY(state, test, text_pos); - break; - case RE_OP_PROPERTY: - status = try_match_PROPERTY(state, test, text_pos); - break; - case RE_OP_PROPERTY_IGN: - status = try_match_PROPERTY_IGN(state, test, text_pos); - break; - case RE_OP_PROPERTY_IGN_REV: - status = try_match_PROPERTY_IGN_REV(state, test, text_pos); - break; - case RE_OP_PROPERTY_REV: - status = try_match_PROPERTY_REV(state, test, text_pos); - break; - case RE_OP_RANGE: - status = try_match_RANGE(state, test, text_pos); - break; - case RE_OP_RANGE_IGN: - status = try_match_RANGE_IGN(state, test, text_pos); - break; - case RE_OP_RANGE_IGN_REV: - status = try_match_RANGE_IGN_REV(state, test, text_pos); - break; - case RE_OP_RANGE_REV: - status = try_match_RANGE_REV(state, test, text_pos); - break; - case RE_OP_SEARCH_ANCHOR: - status = try_match_SEARCH_ANCHOR(state, test, text_pos); - break; - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - status = try_match_SET(state, test, text_pos); - break; - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - status = try_match_SET_IGN(state, test, text_pos); - break; - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - status = try_match_SET_IGN_REV(state, test, text_pos); - break; - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - status = try_match_SET_REV(state, test, text_pos); - break; - case RE_OP_START_OF_LINE: - status = try_match_START_OF_LINE(state, test, text_pos); - break; - case RE_OP_START_OF_LINE_U: - status = try_match_START_OF_LINE_U(state, test, text_pos); - break; - case RE_OP_START_OF_STRING: - status = try_match_START_OF_STRING(state, test, text_pos); - break; - case RE_OP_START_OF_WORD: - status = try_match_START_OF_WORD(state, test, text_pos); - break; - case RE_OP_STRING: - return try_match_STRING(state, next, test, text_pos, next_position); - case RE_OP_STRING_FLD: - return try_match_STRING_FLD(state, next, test, text_pos, - next_position); - case RE_OP_STRING_FLD_REV: - return try_match_STRING_FLD_REV(state, next, test, text_pos, - next_position); - case RE_OP_STRING_IGN: - return try_match_STRING_IGN(state, next, test, text_pos, - next_position); - case RE_OP_STRING_IGN_REV: - return try_match_STRING_IGN_REV(state, next, test, text_pos, - next_position); - case RE_OP_STRING_REV: - return try_match_STRING_REV(state, next, test, text_pos, - next_position); - default: - next_position->node = next->node; - next_position->text_pos = text_pos; - return RE_ERROR_SUCCESS; - } - - if (status != RE_ERROR_SUCCESS) - return status; - - next_position->node = next->match_next; - next_position->text_pos = text_pos + next->match_step; - - return RE_ERROR_SUCCESS; -} - -/* Searches for a word boundary. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_BOUNDARY(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_boundary = state->encoding->at_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for a word boundary, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_BOUNDARY_rev(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_boundary = state->encoding->at_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for a default word boundary. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_BOUNDARY(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_default_boundary = state->encoding->at_default_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_default_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for a default word boundary, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_BOUNDARY_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_default_boundary = state->encoding->at_default_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_default_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the default end of a word. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_END_OF_WORD(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_word_end)(RE_State* state, Py_ssize_t text_pos); - - at_default_word_end = state->encoding->at_default_word_end; - - *is_partial = FALSE; - - for (;;) { - if (at_default_word_end(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the default end of a word, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_END_OF_WORD_rev(RE_State* - state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_word_end)(RE_State* state, Py_ssize_t text_pos); - - at_default_word_end = state->encoding->at_default_word_end; - - *is_partial = FALSE; - - for (;;) { - if (at_default_word_end(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the default start of a word. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_START_OF_WORD(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_word_start)(RE_State* state, Py_ssize_t text_pos); - - at_default_word_start = state->encoding->at_default_word_start; - - *is_partial = FALSE; - - for (;;) { - if (at_default_word_start(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the default start of a word, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_START_OF_WORD_rev(RE_State* - state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_word_start)(RE_State* state, Py_ssize_t text_pos); - - at_default_word_start = state->encoding->at_default_word_start; - - *is_partial = FALSE; - - for (;;) { - if (at_default_word_start(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the end of line. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_LINE(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - for (;;) { - if (text_pos >= state->text_length || state->char_at(state->text, - text_pos) == '\n') - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the end of line, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_LINE_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - for (;;) { - if (text_pos >= state->text_length || state->char_at(state->text, - text_pos) == '\n') - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the end of the string. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (state->slice_end >= state->text_length) - return state->text_length; - - return -1; -} - -/* Searches for the end of the string, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (text_pos >= state->text_length) - return text_pos; - - return -1; -} - -/* Searches for the end of the string or line. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_LINE(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (text_pos <= state->final_newline) - text_pos = state->final_newline; - else if (text_pos <= state->text_length) - text_pos = state->text_length; - - if (text_pos > state->slice_end) - return -1; - - if (text_pos >= state->text_length) - return text_pos; - - return text_pos; -} - -/* Searches for the end of the string or line, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_LINE_rev(RE_State* - state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (text_pos >= state->text_length) - text_pos = state->text_length; - else if (text_pos >= state->final_newline) - text_pos = state->final_newline; - else - return -1; - - if (text_pos < state->slice_start) - return -1; - - if (text_pos <= 0) - return text_pos; - - return text_pos; -} - -/* Searches for the end of a word. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_WORD(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_word_end)(RE_State* state, Py_ssize_t text_pos); - - at_word_end = state->encoding->at_word_end; - - *is_partial = FALSE; - - for (;;) { - if (at_word_end(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the end of a word, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_WORD_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_word_end)(RE_State* state, Py_ssize_t text_pos); - - at_word_end = state->encoding->at_word_end; - - *is_partial = FALSE; - - for (;;) { - if (at_word_end(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for a grapheme boundary. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_GRAPHEME_BOUNDARY(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_grapheme_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_grapheme_boundary = state->encoding->at_grapheme_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_grapheme_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for a grapheme boundary, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_GRAPHEME_BOUNDARY_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_grapheme_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_grapheme_boundary = state->encoding->at_grapheme_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_grapheme_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the start of line. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_LINE(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - for (;;) { - if (text_pos <= 0 || state->char_at(state->text, text_pos - 1) == '\n') - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the start of line, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_LINE_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - for (;;) { - if (text_pos <= 0 || state->char_at(state->text, text_pos - 1) == '\n') - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the start of the string. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_STRING(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (text_pos <= 0) - return text_pos; - - return -1; -} - -/* Searches for the start of the string, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_STRING_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (state->slice_start <= 0) - return 0; - - return -1; -} - -/* Searches for the start of a word. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_WORD(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_word_start)(RE_State* state, Py_ssize_t text_pos); - - at_word_start = state->encoding->at_word_start; - - *is_partial = FALSE; - - for (;;) { - if (at_word_start(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the start of a word, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_WORD_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_word_start)(RE_State* state, Py_ssize_t text_pos); - - at_word_start = state->encoding->at_word_start; - - *is_partial = FALSE; - - for (;;) { - if (at_word_start(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for a string. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) - return text_pos; - - return string_search(safe_state, node, text_pos, state->slice_end, - is_partial); -} - -/* Searches for a string, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_FLD(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t* new_pos, BOOL* is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) { - *new_pos = state->req_end; - return text_pos; - } - - return string_search_fld(safe_state, node, text_pos, state->slice_end, - new_pos, is_partial); -} - -/* Searches for a string, ignoring case, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_FLD_REV(RE_SafeState* - safe_state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t* new_pos, BOOL* - is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) { - *new_pos = state->req_end; - return text_pos; - } - - return string_search_fld_rev(safe_state, node, text_pos, - state->slice_start, new_pos, is_partial); -} - -/* Searches for a string, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_IGN(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) - return text_pos; - - return string_search_ign(safe_state, node, text_pos, state->slice_end, - is_partial); -} - -/* Searches for a string, ignoring case, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_IGN_REV(RE_SafeState* - safe_state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) - return text_pos; - - return string_search_ign_rev(safe_state, node, text_pos, - state->slice_start, is_partial); -} - -/* Searches for a string, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_REV(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) - return text_pos; - - return string_search_rev(safe_state, node, text_pos, state->slice_start, - is_partial); -} - -/* Searches for the start of a match. */ -Py_LOCAL_INLINE(int) search_start(RE_SafeState* safe_state, RE_NextNode* next, - RE_Position* new_position, int search_index) { - RE_State* state; - Py_ssize_t start_pos; - RE_Node* test; - RE_Node* node; - RE_SearchPosition* info; - Py_ssize_t text_pos; - - state = safe_state->re_state; - - start_pos = state->text_pos; - TRACE(("<> at %d\n", start_pos)) - - test = next->test; - node = next->node; - - if (state->reverse) { - if (start_pos < state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = state->slice_start; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - } else { - if (start_pos > state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = state->slice_end; - return RE_ERROR_PARTIAL; - } - } - } - - if (test->status & RE_STATUS_FUZZY) { - /* Don't call 'search_start' again. */ - state->pattern->do_search_start = FALSE; - - state->match_pos = start_pos; - new_position->node = node; - new_position->text_pos = start_pos; - - return RE_ERROR_SUCCESS; - } - -again: - if (!state->pattern->is_fuzzy && state->partial_side == RE_PARTIAL_NONE) { - if (state->reverse) { - if (start_pos - state->min_width < state->slice_start) - return RE_ERROR_FAILURE; - } else { - if (start_pos + state->min_width > state->slice_end) - return RE_ERROR_FAILURE; - } - } - - if (search_index < MAX_SEARCH_POSITIONS) { - info = &state->search_positions[search_index]; - if (state->reverse) { - if (info->start_pos >= 0 && info->start_pos >= start_pos && - start_pos >= info->match_pos) { - state->match_pos = info->match_pos; - - new_position->text_pos = state->match_pos; - new_position->node = node; - - return RE_ERROR_SUCCESS; - } - } else { - if (info->start_pos >= 0 && info->start_pos <= start_pos && - start_pos <= info->match_pos) { - state->match_pos = info->match_pos; - - new_position->text_pos = state->match_pos; - new_position->node = node; - - return RE_ERROR_SUCCESS; - } - } - } else - info = NULL; - - switch (test->op) { - case RE_OP_ANY: - start_pos = match_many_ANY(state, test, start_pos, state->slice_end, - FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_ANY_ALL: - case RE_OP_ANY_ALL_REV: - break; - case RE_OP_ANY_REV: - start_pos = match_many_ANY_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_ANY_U: - start_pos = match_many_ANY_U(state, test, start_pos, state->slice_end, - FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_ANY_U_REV: - start_pos = match_many_ANY_U_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_BOUNDARY: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_BOUNDARY_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_BOUNDARY(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_CHARACTER: - start_pos = match_many_CHARACTER(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_CHARACTER_IGN: - start_pos = match_many_CHARACTER_IGN(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_CHARACTER_IGN_REV: - start_pos = match_many_CHARACTER_IGN_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_CHARACTER_REV: - start_pos = match_many_CHARACTER_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_DEFAULT_BOUNDARY: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_DEFAULT_BOUNDARY_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_DEFAULT_BOUNDARY(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_DEFAULT_END_OF_WORD: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_DEFAULT_END_OF_WORD_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_DEFAULT_END_OF_WORD(state, test, - start_pos, &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_DEFAULT_START_OF_WORD: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_DEFAULT_START_OF_WORD_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_DEFAULT_START_OF_WORD(state, test, - start_pos, &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_END_OF_LINE: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_END_OF_LINE_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_END_OF_LINE(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_END_OF_STRING: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_END_OF_STRING_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_END_OF_STRING(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_END_OF_STRING_LINE: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_END_OF_STRING_LINE_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_END_OF_STRING_LINE(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_END_OF_WORD: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_END_OF_WORD_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_END_OF_WORD(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_GRAPHEME_BOUNDARY: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_GRAPHEME_BOUNDARY_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_GRAPHEME_BOUNDARY(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_PROPERTY: - start_pos = match_many_PROPERTY(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_PROPERTY_IGN: - start_pos = match_many_PROPERTY_IGN(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_PROPERTY_IGN_REV: - start_pos = match_many_PROPERTY_IGN_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_PROPERTY_REV: - start_pos = match_many_PROPERTY_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_RANGE: - start_pos = match_many_RANGE(state, test, start_pos, state->slice_end, - FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_RANGE_IGN: - start_pos = match_many_RANGE_IGN(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_RANGE_IGN_REV: - start_pos = match_many_RANGE_IGN_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_RANGE_REV: - start_pos = match_many_RANGE_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_SEARCH_ANCHOR: - if (state->reverse) { - if (start_pos < state->search_anchor) - return RE_ERROR_FAILURE; - } else { - if (start_pos > state->search_anchor) - return RE_ERROR_FAILURE; - } - - start_pos = state->search_anchor; - break; - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - start_pos = match_many_SET(state, test, start_pos, state->slice_end, - FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return FALSE; - break; - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - start_pos = match_many_SET_IGN(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return FALSE; - break; - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - start_pos = match_many_SET_IGN_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return FALSE; - break; - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - start_pos = match_many_SET_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return FALSE; - break; - case RE_OP_START_OF_LINE: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_START_OF_LINE_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_START_OF_LINE(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_START_OF_STRING: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_START_OF_STRING_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_START_OF_STRING(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_START_OF_WORD: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_START_OF_WORD_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_START_OF_WORD(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_STRING: - { - BOOL is_partial; - - start_pos = search_start_STRING(safe_state, test, start_pos, - &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_STRING_FLD: - { - Py_ssize_t new_pos; - BOOL is_partial; - - start_pos = search_start_STRING_FLD(safe_state, test, start_pos, - &new_pos, &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - - /* Can we look further ahead? */ - if (test == node) { - if (test->next_1.node) { - int status; - - status = try_match(state, &test->next_1, new_pos, - new_position); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) { - ++start_pos; - - if (start_pos >= state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = state->slice_start; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - - goto again; - } - } - - /* It's a possible match. */ - state->match_pos = start_pos; - - if (info) { - info->start_pos = state->text_pos; - info->match_pos = state->match_pos; - } - - return RE_ERROR_SUCCESS; - } - break; - } - case RE_OP_STRING_FLD_REV: - { - Py_ssize_t new_pos; - BOOL is_partial; - - start_pos = search_start_STRING_FLD_REV(safe_state, test, start_pos, - &new_pos, &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - - /* Can we look further ahead? */ - if (test == node) { - if (test->next_1.node) { - int status; - - status = try_match(state, &test->next_1, new_pos, - new_position); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) { - --start_pos; - - if (start_pos <= state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = state->slice_start; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - - goto again; - } - } - - /* It's a possible match. */ - state->match_pos = start_pos; - - if (info) { - info->start_pos = state->text_pos; - info->match_pos = state->match_pos; - } - - return RE_ERROR_SUCCESS; - } - break; - } - case RE_OP_STRING_IGN: - { - BOOL is_partial; - - start_pos = search_start_STRING_IGN(safe_state, test, start_pos, - &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_STRING_IGN_REV: - { - BOOL is_partial; - - start_pos = search_start_STRING_IGN_REV(safe_state, test, start_pos, - &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_STRING_REV: - { - BOOL is_partial; - - start_pos = search_start_STRING_REV(safe_state, test, start_pos, - &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - default: - /* Don't call 'search_start' again. */ - state->pattern->do_search_start = FALSE; - - state->match_pos = start_pos; - new_position->node = node; - new_position->text_pos = start_pos; - return RE_ERROR_SUCCESS; - } - - /* Can we look further ahead? */ - if (test == node) { - text_pos = start_pos + test->step; - - if (test->next_1.node) { - int status; - - status = try_match(state, &test->next_1, text_pos, new_position); - if (status == RE_ERROR_PARTIAL) { - new_position->node = node; - new_position->text_pos = start_pos; - return status; - } - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) { - if (state->reverse) { - --start_pos; - - if (start_pos < state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = state->slice_start; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - } else { - ++start_pos; - - if (start_pos > state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = state->slice_end; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - } - - goto again; - } - } - } else { - new_position->node = node; - new_position->text_pos = start_pos; - } - - /* It's a possible match. */ - state->match_pos = start_pos; - - if (info) { - info->start_pos = state->text_pos; - info->match_pos = state->match_pos; - } - - return RE_ERROR_SUCCESS; -} - -/* Saves a capture group. */ -Py_LOCAL_INLINE(BOOL) save_capture(RE_SafeState* safe_state, size_t - private_index, size_t public_index) { - RE_State* state; - RE_GroupData* private_group; - RE_GroupData* public_group; - - state = safe_state->re_state; - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - private_group = &state->groups[private_index - 1]; - public_group = &state->groups[public_index - 1]; - - /* Will the repeated captures ever be visible? */ - if (!state->visible_captures) { - public_group->captures[0] = private_group->span; - public_group->capture_count = 1; - - return TRUE; - } - - if (public_group->capture_count >= public_group->capture_capacity) { - size_t new_capacity; - RE_GroupSpan* new_captures; - - new_capacity = public_group->capture_capacity * 2; - new_capacity = max_size_t(new_capacity, RE_INIT_CAPTURE_SIZE); - new_captures = (RE_GroupSpan*)safe_realloc(safe_state, - public_group->captures, new_capacity * sizeof(RE_GroupSpan)); - if (!new_captures) - return FALSE; - - public_group->captures = new_captures; - public_group->capture_capacity = new_capacity; - } - - public_group->captures[public_group->capture_count++] = - private_group->span; - - return TRUE; -} - -/* Unsaves a capture group. */ -Py_LOCAL_INLINE(void) unsave_capture(RE_State* state, size_t private_index, - size_t public_index) { - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - if (state->groups[public_index - 1].capture_count > 0) - --state->groups[public_index - 1].capture_count; -} - -/* Pushes the groups for backtracking. */ -Py_LOCAL_INLINE(BOOL) push_groups(RE_SafeState* safe_state) { - RE_State* state; - size_t group_count; - RE_SavedGroups* current; - size_t g; - - state = safe_state->re_state; - - group_count = state->pattern->true_group_count; - if (group_count == 0) - return TRUE; - - current = state->current_saved_groups; - - if (current && current->next) - current = current->next; - else if (!current && state->first_saved_groups) - current = state->first_saved_groups; - else { - RE_SavedGroups* new_block; - - new_block = (RE_SavedGroups*)safe_alloc(safe_state, - sizeof(RE_SavedGroups)); - if (!new_block) - return FALSE; - - new_block->spans = (RE_GroupSpan*)safe_alloc(safe_state, group_count * - sizeof(RE_GroupSpan)); - new_block->counts = (size_t*)safe_alloc(safe_state, group_count * - sizeof(Py_ssize_t)); - if (!new_block->spans || !new_block->counts) { - safe_dealloc(safe_state, new_block->spans); - safe_dealloc(safe_state, new_block->counts); - safe_dealloc(safe_state, new_block); - return FALSE; - } - - new_block->previous = current; - new_block->next = NULL; - - if (new_block->previous) - new_block->previous->next = new_block; - else - state->first_saved_groups = new_block; - - current = new_block; - } - - for (g = 0; g < group_count; g++) { - current->spans[g] = state->groups[g].span; - current->counts[g] = state->groups[g].capture_count; - } - - state->current_saved_groups = current; - - return TRUE; -} - -/* Pops the groups for backtracking. */ -Py_LOCAL_INLINE(void) pop_groups(RE_State* state) { - size_t group_count; - RE_SavedGroups* current; - size_t g; - - group_count = state->pattern->true_group_count; - if (group_count == 0) - return; - - current = state->current_saved_groups; - - for (g = 0; g < group_count; g++) { - state->groups[g].span = current->spans[g]; - state->groups[g].capture_count = current->counts[g]; - } - - state->current_saved_groups = current->previous; -} - -/* Drops the groups for backtracking. */ -Py_LOCAL_INLINE(void) drop_groups(RE_State* state) { - if (state->pattern->true_group_count != 0) - state->current_saved_groups = state->current_saved_groups->previous; -} - -/* Pushes the repeats for backtracking. */ -Py_LOCAL_INLINE(BOOL) push_repeats(RE_SafeState* safe_state) { - RE_State* state; - PatternObject* pattern; - size_t repeat_count; - RE_SavedRepeats* current; - size_t r; - - state = safe_state->re_state; - pattern = state->pattern; - - repeat_count = pattern->repeat_count; - if (repeat_count == 0) - return TRUE; - - current = state->current_saved_repeats; - - if (current && current->next) - current = current->next; - else if (!current && state->first_saved_repeats) - current = state->first_saved_repeats; - else { - RE_SavedRepeats* new_block; - - new_block = (RE_SavedRepeats*)safe_alloc(safe_state, - sizeof(RE_SavedRepeats)); - if (!new_block) - return FALSE; - - new_block->repeats = (RE_RepeatData*)safe_alloc(safe_state, - repeat_count * sizeof(RE_RepeatData)); - if (!new_block->repeats) { - safe_dealloc(safe_state, new_block); - return FALSE; - } - - memset(new_block->repeats, 0, repeat_count * sizeof(RE_RepeatData)); - - new_block->previous = current; - new_block->next = NULL; - - if (new_block->previous) - new_block->previous->next = new_block; - else - state->first_saved_repeats = new_block; - - current = new_block; - } - - for (r = 0; r < repeat_count; r++) { - if (!copy_repeat_data(safe_state, ¤t->repeats[r], - &state->repeats[r])) - return FALSE; - } - - state->current_saved_repeats = current; - - return TRUE; -} - -/* Pops the repeats for backtracking. */ -Py_LOCAL_INLINE(void) pop_repeats(RE_State* state) { - PatternObject* pattern; - size_t repeat_count; - RE_SavedRepeats* current; - size_t r; - - pattern = state->pattern; - - repeat_count = pattern->repeat_count; - if (repeat_count == 0) - return; - - current = state->current_saved_repeats; - - for (r = 0; r < repeat_count; r++) - copy_repeat_data(NULL, &state->repeats[r], ¤t->repeats[r]); - - state->current_saved_repeats = current->previous; -} - -/* Drops the repeats for backtracking. */ -Py_LOCAL_INLINE(void) drop_repeats(RE_State* state) { - PatternObject* pattern; - size_t repeat_count; - RE_SavedRepeats* current; - - pattern = state->pattern; - - repeat_count = pattern->repeat_count; - if (repeat_count == 0) - return; - - current = state->current_saved_repeats; - state->current_saved_repeats = current->previous; -} - -/* Inserts a new span in a guard list. */ -Py_LOCAL_INLINE(BOOL) insert_guard_span(RE_SafeState* safe_state, RE_GuardList* - guard_list, size_t index) { - size_t n; - - if (guard_list->count >= guard_list->capacity) { - size_t new_capacity; - RE_GuardSpan* new_spans; - - new_capacity = guard_list->capacity * 2; - if (new_capacity == 0) - new_capacity = RE_INIT_GUARDS_BLOCK_SIZE; - new_spans = (RE_GuardSpan*)safe_realloc(safe_state, guard_list->spans, - new_capacity * sizeof(RE_GuardSpan)); - if (!new_spans) - return FALSE; - - guard_list->capacity = new_capacity; - guard_list->spans = new_spans; - } - - n = guard_list->count - index; - if (n > 0) - memmove(guard_list->spans + index + 1, guard_list->spans + index, n * - sizeof(RE_GuardSpan)); - ++guard_list->count; - - return TRUE; -} - -/* Deletes a span in a guard list. */ -Py_LOCAL_INLINE(void) delete_guard_span(RE_GuardList* guard_list, size_t index) - { - size_t n; - - n = guard_list->count - index - 1; - if (n > 0) - memmove(guard_list->spans + index, guard_list->spans + index + 1, n * - sizeof(RE_GuardSpan)); - --guard_list->count; -} - -/* Checks whether a position is guarded against further matching. */ -Py_LOCAL_INLINE(BOOL) is_guarded(RE_GuardList* guard_list, Py_ssize_t text_pos) - { - size_t low; - size_t high; - - /* Is this position in the guard list? */ - if (guard_list->count == 0 || text_pos < guard_list->spans[0].low) - guard_list->last_low = 0; - else if (text_pos > guard_list->spans[guard_list->count - 1].high) - guard_list->last_low = guard_list->count; - else { - low = 0; - high = guard_list->count; - while (low < high) { - size_t mid; - RE_GuardSpan* span; - - mid = (low + high) / 2; - span = &guard_list->spans[mid]; - if (text_pos < span->low) - high = mid; - else if (text_pos > span->high) - low = mid + 1; - else - return span->protect; - } - - guard_list->last_low = low; - } - - guard_list->last_text_pos = text_pos; - - return FALSE; -} - -/* Guards a position against further matching. */ -Py_LOCAL_INLINE(BOOL) guard(RE_SafeState* safe_state, RE_GuardList* guard_list, - Py_ssize_t text_pos, BOOL protect) { - size_t low; - size_t high; - - /* Where should be new position be added? */ - if (text_pos == guard_list->last_text_pos) - low = guard_list->last_low; - else { - low = 0; - high = guard_list->count; - while (low < high) { - size_t mid; - RE_GuardSpan* span; - - mid = (low + high) / 2; - span = &guard_list->spans[mid]; - if (text_pos < span->low) - high = mid; - else if (text_pos > span->high) - low = mid + 1; - else - return TRUE; - } - } - - /* Add the position to the guard list. */ - if (low > 0 && guard_list->spans[low - 1].high + 1 == text_pos && - guard_list->spans[low - 1].protect == protect) { - /* The new position is just above this span. */ - if (low < guard_list->count && guard_list->spans[low].low - 1 == - text_pos && guard_list->spans[low].protect == protect) { - /* The new position joins 2 spans */ - guard_list->spans[low - 1].high = guard_list->spans[low].high; - delete_guard_span(guard_list, low); - } else - /* Extend the span. */ - guard_list->spans[low - 1].high = text_pos; - } else if (low < guard_list->count && guard_list->spans[low].low - 1 == - text_pos && guard_list->spans[low].protect == protect) - /* The new position is just below this span. */ - /* Extend the span. */ - guard_list->spans[low].low = text_pos; - else { - /* Insert a new span. */ - if (!insert_guard_span(safe_state, guard_list, low)) - return FALSE; - guard_list->spans[low].low = text_pos; - guard_list->spans[low].high = text_pos; - guard_list->spans[low].protect = protect; - } - - guard_list->last_text_pos = -1; - - return TRUE; -} - -/* Guards a position against further matching for a repeat. */ -Py_LOCAL_INLINE(BOOL) guard_repeat(RE_SafeState* safe_state, size_t index, - Py_ssize_t text_pos, RE_STATUS_T guard_type, BOOL protect) { - RE_State* state; - RE_GuardList* guard_list; - - state = safe_state->re_state; - - /* Is a guard active here? */ - if (!(state->pattern->repeat_info[index].status & guard_type)) - return TRUE; - - /* Which guard list? */ - if (guard_type & RE_STATUS_BODY) - guard_list = &state->repeats[index].body_guard_list; - else - guard_list = &state->repeats[index].tail_guard_list; - - return guard(safe_state, guard_list, text_pos, protect); -} - -/* Guards a range of positions against further matching for a repeat. */ -Py_LOCAL_INLINE(BOOL) guard_repeat_range(RE_SafeState* safe_state, size_t - index, Py_ssize_t lo_pos, Py_ssize_t hi_pos, RE_STATUS_T guard_type, BOOL - protect) { - RE_State* state; - RE_GuardList* guard_list; - Py_ssize_t pos; - - state = safe_state->re_state; - - /* Is a guard active here? */ - if (!(state->pattern->repeat_info[index].status & guard_type)) - return TRUE; - - /* Which guard list? */ - if (guard_type & RE_STATUS_BODY) - guard_list = &state->repeats[index].body_guard_list; - else - guard_list = &state->repeats[index].tail_guard_list; - - for (pos = lo_pos; pos <= hi_pos; pos++) { - if (!guard(safe_state, guard_list, pos, protect)) - return FALSE; - } - - return TRUE; -} - -/* Checks whether a position is guarded against further matching for a repeat. - */ -Py_LOCAL_INLINE(BOOL) is_repeat_guarded(RE_SafeState* safe_state, size_t index, - Py_ssize_t text_pos, RE_STATUS_T guard_type) { - RE_State* state; - RE_GuardList* guard_list; - - state = safe_state->re_state; - - /* Is a guard active here? */ - if (!(state->pattern->repeat_info[index].status & guard_type)) - return FALSE; - - /* Which guard list? */ - if (guard_type == RE_STATUS_BODY) - guard_list = &state->repeats[index].body_guard_list; - else - guard_list = &state->repeats[index].tail_guard_list; - - return is_guarded(guard_list, text_pos); -} - -/* Builds a Unicode string. */ -Py_LOCAL_INLINE(PyObject*) build_unicode_value(void* buffer, Py_ssize_t start, - Py_ssize_t end, Py_ssize_t buffer_charsize) { - Py_ssize_t len; - - buffer = (void*)((RE_UINT8*)buffer + start * buffer_charsize); - len = end - start; - - return PyUnicode_FromUnicode(buffer, len); -} - -/* Builds a bytestring. Returns NULL if any member is too wide. */ -Py_LOCAL_INLINE(PyObject*) build_bytes_value(void* buffer, Py_ssize_t start, - Py_ssize_t end, Py_ssize_t buffer_charsize) { - Py_ssize_t len; - Py_UCS1* byte_buffer; - Py_ssize_t i; - PyObject* result; - - buffer = (void*)((RE_UINT8*)buffer + start * buffer_charsize); - len = end - start; - - if (buffer_charsize == 1) - return Py_BuildValue("s#", buffer, len); - - byte_buffer = re_alloc((size_t)len); - if (!byte_buffer) - return NULL; - - for (i = 0; i < len; i++) { - Py_UCS2 c = ((Py_UCS2*)buffer)[i]; - if (c > 0xFF) - goto too_wide; - - byte_buffer[i] = (Py_UCS1)c; - } - - result = Py_BuildValue("s#", byte_buffer, len); - - re_dealloc(byte_buffer); - - return result; - -too_wide: - re_dealloc(byte_buffer); - - return NULL; -} - -/* Looks for a string in a string set. */ -Py_LOCAL_INLINE(int) string_set_contains(RE_State* state, PyObject* string_set, - Py_ssize_t first, Py_ssize_t last) { - PyObject* string; - int status; - - if (state->is_unicode) - string = build_unicode_value(state->text, first, last, - state->charsize); - else - string = build_bytes_value(state->text, first, last, state->charsize); - if (!string) - return RE_ERROR_INTERNAL; - - status = PySet_Contains(string_set, string); - Py_DECREF(string); - - return status; -} - -/* Looks for a string in a string set, ignoring case. */ -Py_LOCAL_INLINE(int) string_set_contains_ign(RE_State* state, PyObject* - string_set, void* buffer, Py_ssize_t first, Py_ssize_t last, Py_ssize_t - index, Py_ssize_t buffer_charsize) { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - BOOL (*possible_turkic)(RE_LocaleInfo* locale_info, Py_UCS4 ch); - Py_UCS4 codepoints[4]; - - switch (buffer_charsize) { - case 1: - char_at = bytes1_char_at; - set_char_at = bytes1_set_char_at; - break; - case 2: - char_at = bytes2_char_at; - set_char_at = bytes2_set_char_at; - break; - case 4: - char_at = bytes4_char_at; - set_char_at = bytes4_set_char_at; - break; - default: - char_at = bytes1_char_at; - set_char_at = bytes1_set_char_at; - break; - } - - encoding = state->encoding; - locale_info = state->locale_info; - possible_turkic = encoding->possible_turkic; - - /* Look for a possible Turkic 'I'. */ - while (index < last && !possible_turkic(locale_info, char_at(buffer, - index))) - ++index; - - if (index < last) { - /* Possible Turkic 'I'. */ - int count; - int i; - - /* Try all the alternatives to the 'I'. */ - count = encoding->all_turkic_i(locale_info, char_at(buffer, index), - codepoints); - - for (i = 0; i < count; i++) { - int status; - - set_char_at(buffer, index, codepoints[i]); - - /* Recurse for the remainder of the string. */ - status = string_set_contains_ign(state, string_set, buffer, first, - last, index + 1, buffer_charsize); - if (status != 0) - return status; - } - - return 0; - } else { - /* No Turkic 'I'. */ - PyObject* string; - int status; - - if (state->is_unicode) - string = build_unicode_value(buffer, first, last, buffer_charsize); - else - string = build_bytes_value(buffer, first, last, buffer_charsize); - if (!string) - return RE_ERROR_MEMORY; - - status = PySet_Contains(string_set, string); - Py_DECREF(string); - - return status; - } -} - -/* Creates a partial string set for truncation at the left or right side. */ -Py_LOCAL_INLINE(int) make_partial_string_set(RE_State* state, RE_Node* node) { - PatternObject* pattern; - int partial_side; - PyObject* string_set; - PyObject* partial_set; - PyObject* iter = NULL; - PyObject* item = NULL; - PyObject* slice = NULL; - - pattern = state->pattern; - partial_side = state->partial_side; - if (partial_side != RE_PARTIAL_LEFT && partial_side != RE_PARTIAL_RIGHT) - return RE_ERROR_INTERNAL; - - /* Fetch the full string set. PyList_GET_ITEM borrows a reference. */ - string_set = PyList_GET_ITEM(pattern->named_list_indexes, node->values[0]); - if (!string_set) - return RE_ERROR_INTERNAL; - - /* Gets the list of partial string sets. */ - if (!pattern->partial_named_lists[partial_side]) { - size_t size; - - size = pattern->named_lists_count * sizeof(PyObject*); - pattern->partial_named_lists[partial_side] = re_alloc(size); - if (!pattern->partial_named_lists[partial_side]) - return RE_ERROR_INTERNAL; - - memset(pattern->partial_named_lists[partial_side], 0, size); - } - - /* Get the partial string set. */ - partial_set = pattern->partial_named_lists[partial_side][node->values[0]]; - if (partial_set) - return 1; - - /* Build the partial string set. */ - partial_set = PySet_New(NULL); - if (!partial_set) - return RE_ERROR_INTERNAL; - - iter = PyObject_GetIter(string_set); - if (!iter) - goto error; - - item = PyIter_Next(iter); - - while (item) { - Py_ssize_t len; - Py_ssize_t first; - Py_ssize_t last; - - len = PySequence_Length(item); - if (len == -1) - goto error; - - first = 0; - last = len; - - while (last - first > 1) { - int status; - - /* Shorten the entry. */ - if (partial_side == RE_PARTIAL_LEFT) - ++first; - else - --last; - - slice = PySequence_GetSlice(item, first, last); - if (!slice) - goto error; - - status = PySet_Add(partial_set, slice); - Py_DECREF(slice); - if (status < 0) - goto error; - } - - Py_DECREF(item); - item = PyIter_Next(iter); - } - - if (PyErr_Occurred()) - goto error; - - Py_DECREF(iter); - - pattern->partial_named_lists[partial_side][node->values[0]] = partial_set; - - return 1; - -error: - Py_XDECREF(item); - Py_XDECREF(iter); - Py_DECREF(partial_set); - - return RE_ERROR_INTERNAL; -} - -/* Tries to match a string at the current position with a member of a string - * set, forwards or backwards. - */ -Py_LOCAL_INLINE(int) string_set_match_fwdrev(RE_SafeState* safe_state, RE_Node* - node, BOOL reverse) { - RE_State* state; - Py_ssize_t min_len; - Py_ssize_t max_len; - Py_ssize_t text_available; - Py_ssize_t slice_available; - int partial_side; - Py_ssize_t len; - Py_ssize_t first; - Py_ssize_t last; - int status; - PyObject* string_set; - - state = safe_state->re_state; - - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; - - acquire_GIL(safe_state); - - if (reverse) { - text_available = state->text_pos; - slice_available = state->text_pos - state->slice_start; - partial_side = RE_PARTIAL_LEFT; - } else { - text_available = state->text_length - state->text_pos; - slice_available = state->slice_end - state->text_pos; - partial_side = RE_PARTIAL_RIGHT; - } - - /* Get as many characters as we need for the longest possible match. */ - len = min_ssize_t(max_len, slice_available); - - if (reverse) { - first = state->text_pos - len; - last = state->text_pos; - } else { - first = state->text_pos; - last = state->text_pos + len; - } - - /* If we didn't get all of the characters we need, is a partial match - * allowed? - */ - if (len < max_len && len == text_available && state->partial_side == - partial_side) { - if (len == 0) { - /* An empty string is always a possible partial match. */ - status = RE_ERROR_PARTIAL; - goto finished; - } - - /* Make a set of the possible partial matches. */ - status = make_partial_string_set(state, node); - if (status < 0) - goto finished; - - /* Fetch the partial string set. */ - string_set = - state->pattern->partial_named_lists[partial_side][node->values[0]]; - - /* Is the text we have a partial match? */ - status = string_set_contains(state, string_set, first, last); - if (status < 0) - goto finished; - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= len; - else - state->text_pos += len; - - status = RE_ERROR_PARTIAL; - goto finished; - } - } - - /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, - node->values[0]); - if (!string_set) { - status = RE_ERROR_INTERNAL; - goto finished; - } - - /* We've already looked for a partial match (if allowed), but what about a - * complete match? - */ - while (len >= min_len) { - status = string_set_contains(state, string_set, first, last); - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= len; - else - state->text_pos += len; - - status = 1; - goto finished; - } - - /* Look for a shorter match. */ - --len; - if (reverse) - ++first; - else - --last; - } - - /* No match. */ - status = 0; - -finished: - release_GIL(safe_state); - - return status; -} - -/* Tries to match a string at the current position with a member of a string - * set, ignoring case, forwards or backwards. - */ -Py_LOCAL_INLINE(int) string_set_match_fld_fwdrev(RE_SafeState* safe_state, - RE_Node* node, BOOL reverse) { - RE_State* state; - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - folded); - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - Py_ssize_t folded_charsize; - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - Py_ssize_t min_len; - Py_ssize_t max_len; - Py_ssize_t buf_len; - void* folded; - int status; - BOOL* end_of_fold = NULL; - Py_ssize_t text_available; - Py_ssize_t slice_available; - Py_ssize_t t_pos; - Py_ssize_t f_pos; - int step; - int partial_side; - Py_ssize_t len; - Py_ssize_t consumed; - Py_UCS4 codepoints[RE_MAX_FOLDED]; - Py_ssize_t first; - Py_ssize_t last; - PyObject* string_set; - - state = safe_state->re_state; - full_case_fold = state->encoding->full_case_fold; - char_at = state->char_at; - - /* The folded string will have the same width as the original string. */ - folded_charsize = state->charsize; - - switch (folded_charsize) { - case 1: - set_char_at = bytes1_set_char_at; - break; - case 2: - set_char_at = bytes2_set_char_at; - break; - case 4: - set_char_at = bytes4_set_char_at; - break; - default: - return RE_ERROR_INTERNAL; - } - - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; - - acquire_GIL(safe_state); - - /* Allocate a buffer for the folded string. */ - buf_len = max_len + RE_MAX_FOLDED; - folded = re_alloc((size_t)(buf_len * folded_charsize)); - if (!folded) { - status = RE_ERROR_MEMORY; - goto finished; - } - - end_of_fold = re_alloc((size_t)buf_len * sizeof(BOOL)); - if (!end_of_fold) { - status = RE_ERROR_MEMORY; - goto finished; - } - - memset(end_of_fold, 0, (size_t)buf_len * sizeof(BOOL)); - - if (reverse) { - text_available = state->text_pos; - slice_available = state->text_pos - state->slice_start; - t_pos = state->text_pos - 1; - f_pos = buf_len; - step = -1; - partial_side = RE_PARTIAL_LEFT; - } else { - text_available = state->text_length - state->text_pos; - slice_available = state->slice_end - state->text_pos; - t_pos = state->text_pos; - f_pos = 0; - step = 1; - partial_side = RE_PARTIAL_RIGHT; - } - - /* We can stop getting characters as soon as the case-folded string is long - * enough (each codepoint from the text can expand to more than one folded - * codepoint). - */ - len = 0; - end_of_fold[len] = TRUE; - - consumed = 0; - while (len < max_len && consumed < slice_available) { - int count; - int j; - - count = full_case_fold(state->locale_info, char_at(state->text, t_pos), - codepoints); - - if (reverse) - f_pos -= count; - - for (j = 0; j < count; j++) - set_char_at(folded, f_pos + j, codepoints[j]); - - if (!reverse) - f_pos += count; - - len += count; - end_of_fold[len] = TRUE; - ++consumed; - t_pos += step; - } - - if (reverse) { - first = f_pos; - last = buf_len; - } else { - first = 0; - last = f_pos; - } - - /* If we didn't get all of the characters we need, is a partial match - * allowed? - */ - if (len < max_len && len == text_available && state->partial_side == - partial_side) { - if (len == 0) { - /* An empty string is always a possible partial match. */ - status = RE_ERROR_PARTIAL; - goto finished; - } - - /* Make a set of the possible partial matches. */ - status = make_partial_string_set(state, node); - if (status < 0) - goto finished; - - /* Fetch the partial string set. */ - string_set = - state->pattern->partial_named_lists[partial_side][node->values[0]]; - - /* Is the text we have a partial match? */ - status = string_set_contains_ign(state, string_set, folded, first, - last, first, folded_charsize); - if (status < 0) - goto finished; - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= consumed; - else - state->text_pos += consumed; - - status = RE_ERROR_PARTIAL; - goto finished; - } - } - - /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, - node->values[0]); - if (!string_set) { - status = RE_ERROR_INTERNAL; - goto finished; - } - - /* We've already looked for a partial match (if allowed), but what about a - * complete match? - */ - while (len >= min_len) { - if (end_of_fold[len]) { - status = string_set_contains_ign(state, string_set, folded, first, - last, first, folded_charsize); - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= consumed; - else - state->text_pos += consumed; - - status = 1; - goto finished; - } - - --consumed; - } - - /* Look for a shorter match. */ - --len; - if (reverse) - ++first; - else - --last; - } - - /* No match. */ - status = 0; - -finished: - re_dealloc(end_of_fold); - re_dealloc(folded); - - release_GIL(safe_state); - - return status; -} - -/* Tries to match a string at the current position with a member of a string - * set, ignoring case, forwards or backwards. - */ -Py_LOCAL_INLINE(int) string_set_match_ign_fwdrev(RE_SafeState* safe_state, - RE_Node* node, BOOL reverse) { - RE_State* state; - Py_UCS4 (*simple_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch); - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - Py_ssize_t folded_charsize; - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - Py_ssize_t min_len; - Py_ssize_t max_len; - void* folded; - int status; - Py_ssize_t text_available; - Py_ssize_t slice_available; - Py_ssize_t t_pos; - Py_ssize_t f_pos; - int step; - int partial_side; - Py_ssize_t len; - Py_ssize_t i; - Py_ssize_t first; - Py_ssize_t last; - PyObject* string_set; - - state = safe_state->re_state; - simple_case_fold = state->encoding->simple_case_fold; - char_at = state->char_at; - - /* The folded string will have the same width as the original string. */ - folded_charsize = state->charsize; - - switch (folded_charsize) { - case 1: - set_char_at = bytes1_set_char_at; - break; - case 2: - set_char_at = bytes2_set_char_at; - break; - case 4: - set_char_at = bytes4_set_char_at; - break; - default: - return RE_ERROR_INTERNAL; - } - - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; - - acquire_GIL(safe_state); - - /* Allocate a buffer for the folded string. */ - folded = re_alloc((size_t)(max_len * folded_charsize)); - if (!folded) { - status = RE_ERROR_MEMORY; - goto finished; - } - - if (reverse) { - text_available = state->text_pos; - slice_available = state->text_pos - state->slice_start; - t_pos = state->text_pos - 1; - f_pos = max_len - 1; - step = -1; - partial_side = RE_PARTIAL_LEFT; - } else { - text_available = state->text_length - state->text_pos; - slice_available = state->slice_end - state->text_pos; - t_pos = state->text_pos; - f_pos = 0; - step = 1; - partial_side = RE_PARTIAL_RIGHT; - } - - /* Get as many characters as we need for the longest possible match. */ - len = min_ssize_t(max_len, slice_available); - - for (i = 0; i < len; i ++) { - Py_UCS4 ch; - - ch = simple_case_fold(state->locale_info, char_at(state->text, t_pos)); - set_char_at(folded, f_pos, ch); - t_pos += step; - f_pos += step; - } - - if (reverse) { - first = f_pos + 1; - last = max_len; - } else { - first = 0; - last = f_pos; - } - - /* If we didn't get all of the characters we need, is a partial match - * allowed? - */ - if (len < max_len && len == text_available && state->partial_side == - partial_side) { - if (len == 0) { - /* An empty string is always a possible partial match. */ - status = RE_ERROR_PARTIAL; - goto finished; - } - - /* Make a set of the possible partial matches. */ - status = make_partial_string_set(state, node); - if (status < 0) - goto finished; - - /* Fetch the partial string set. */ - string_set = - state->pattern->partial_named_lists[partial_side][node->values[0]]; - - /* Is the text we have a partial match? */ - status = string_set_contains_ign(state, string_set, folded, first, - last, first, folded_charsize); - if (status < 0) - goto finished; - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= len; - else - state->text_pos += len; - - status = RE_ERROR_PARTIAL; - goto finished; - } - } - - /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, - node->values[0]); - if (!string_set) { - status = RE_ERROR_INTERNAL; - goto finished; - } - - /* We've already looked for a partial match (if allowed), but what about a - * complete match? - */ - while (len >= min_len) { - status = string_set_contains_ign(state, string_set, folded, first, - last, first, folded_charsize); - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= len; - else - state->text_pos += len; - - status = 1; - goto finished; - } - - /* Look for a shorter match. */ - --len; - if (reverse) - ++first; - else - --last; - } - - /* No match. */ - status = 0; - -finished: - re_dealloc(folded); - - release_GIL(safe_state); - - return status; -} - -/* Checks whether any additional fuzzy error is permitted. */ -Py_LOCAL_INLINE(BOOL) any_error_permitted(RE_State* state) { - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - return fuzzy_info->total_cost <= values[RE_FUZZY_VAL_MAX_COST] && - fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MAX_ERR] && - state->total_errors < state->max_errors; -} - -/* Checks whether this additional fuzzy error is permitted. */ -Py_LOCAL_INLINE(BOOL) this_error_permitted(RE_State* state, int fuzzy_type) { - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - return fuzzy_info->total_cost + values[RE_FUZZY_VAL_COST_BASE + fuzzy_type] - <= values[RE_FUZZY_VAL_MAX_COST] && fuzzy_info->counts[fuzzy_type] < - values[RE_FUZZY_VAL_MAX_BASE + fuzzy_type] && state->total_errors < - state->max_errors; -} - -/* Checks whether we've reachsd the end of the text during a fuzzy partial - * match. - */ -Py_LOCAL_INLINE(int) check_fuzzy_partial(RE_State* state, Py_ssize_t text_pos) - { - switch (state->partial_side) { - case RE_PARTIAL_LEFT: - if (text_pos < 0) - return RE_ERROR_PARTIAL; - break; - case RE_PARTIAL_RIGHT: - if (text_pos > state->text_length) - return RE_ERROR_PARTIAL; - break; - } - - return RE_ERROR_FAILURE; -} - -/* Checks a fuzzy match of an item. */ -Py_LOCAL_INLINE(int) next_fuzzy_match_item(RE_State* state, RE_FuzzyData* data, - BOOL is_string, int step) { - Py_ssize_t new_pos; - - if (this_error_permitted(state, data->fuzzy_type)) { - switch (data->fuzzy_type) { - case RE_FUZZY_DEL: - /* Could a character at text_pos have been deleted? */ - if (step == 0) - return RE_ERROR_FAILURE; - - if (is_string) - data->new_string_pos += step; - else - data->new_node = data->new_node->next_1.node; - return RE_ERROR_SUCCESS; - case RE_FUZZY_INS: - /* Could the character at text_pos have been inserted? */ - if (!data->permit_insertion) - return RE_ERROR_FAILURE; - - if (step == 0) - new_pos = data->new_text_pos + data->step; - else - new_pos = data->new_text_pos + step; - if (state->slice_start <= new_pos && new_pos <= state->slice_end) { - data->new_text_pos = new_pos; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - case RE_FUZZY_SUB: - /* Could the character at text_pos have been substituted? */ - if (step == 0) - return RE_ERROR_FAILURE; - - new_pos = data->new_text_pos + step; - if (state->slice_start <= new_pos && new_pos <= state->slice_end) { - data->new_text_pos = new_pos; - if (is_string) - data->new_string_pos += step; - else - data->new_node = data->new_node->next_1.node; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - } - } - - return RE_ERROR_FAILURE; -} - -/* Tries a fuzzy match of an item of width 0 or 1. */ -Py_LOCAL_INLINE(int) fuzzy_match_item(RE_SafeState* safe_state, BOOL search, - Py_ssize_t* text_pos, RE_Node** node, int step) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - - state = safe_state->re_state; - - if (!any_error_permitted(state)) { - *node = NULL; - return RE_ERROR_SUCCESS; - } - - data.new_text_pos = *text_pos; - data.new_node = *node; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - if (step == 0) { - if (data.new_node->status & RE_STATUS_REVERSE) { - data.step = -1; - data.limit = state->slice_start; - } else { - data.step = 1; - data.limit = state->slice_end; - } - } else - data.step = step; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || data.new_text_pos != - state->search_anchor; - - for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_item(state, &data, FALSE, step); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - *node = NULL; - return RE_ERROR_SUCCESS; - -found: - if (!add_backtrack(safe_state, (*node)->op)) - return RE_ERROR_FAILURE; - bt_data = state->backtrack; - bt_data->fuzzy_item.position.text_pos = *text_pos; - bt_data->fuzzy_item.position.node = *node; - bt_data->fuzzy_item.fuzzy_type = (RE_INT8)data.fuzzy_type; - bt_data->fuzzy_item.step = (RE_INT8)step; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - ++state->capture_change; - - *text_pos = data.new_text_pos; - *node = data.new_node; - - return RE_ERROR_SUCCESS; -} - -/* Retries a fuzzy match of a item of width 0 or 1. */ -Py_LOCAL_INLINE(int) retry_fuzzy_match_item(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node** node, BOOL advance) { - RE_State* state; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - RE_FuzzyData data; - int step; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - data.new_text_pos = bt_data->fuzzy_item.position.text_pos; - data.new_node = bt_data->fuzzy_item.position.node; - data.fuzzy_type = bt_data->fuzzy_item.fuzzy_type; - data.step = bt_data->fuzzy_item.step; - - if (data.fuzzy_type >= 0) { - --fuzzy_info->counts[data.fuzzy_type]; - --fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + - data.fuzzy_type]; - --state->total_errors; - } - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || data.new_text_pos != - state->search_anchor; - - step = advance ? data.step : 0; - - for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_item(state, &data, FALSE, step); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - discard_backtrack(state); - *node = NULL; - return RE_ERROR_SUCCESS; - -found: - bt_data->fuzzy_item.fuzzy_type = (RE_INT8)data.fuzzy_type; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - ++state->capture_change; - - *text_pos = data.new_text_pos; - *node = data.new_node; - - return RE_ERROR_SUCCESS; -} - -/* Tries a fuzzy insertion. */ -Py_LOCAL_INLINE(int) fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t - text_pos, RE_Node* node) { - RE_State* state; - RE_BacktrackData* bt_data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - - state = safe_state->re_state; - - /* No insertion or deletion. */ - if (!add_backtrack(safe_state, node->op)) - return RE_ERROR_FAILURE; - bt_data = state->backtrack; - bt_data->fuzzy_insert.position.text_pos = text_pos; - bt_data->fuzzy_insert.position.node = node; - bt_data->fuzzy_insert.count = 0; - bt_data->fuzzy_insert.too_few_errors = state->too_few_errors; - bt_data->fuzzy_insert.fuzzy_node = node; /* END_FUZZY node. */ - - /* Check whether there are too few errors. */ - fuzzy_info = &state->fuzzy_info; - - /* The node in this case is the END_FUZZY node. */ - values = node->values; - - if (fuzzy_info->counts[RE_FUZZY_DEL] < values[RE_FUZZY_VAL_MIN_DEL] || - fuzzy_info->counts[RE_FUZZY_INS] < values[RE_FUZZY_VAL_MIN_INS] || - fuzzy_info->counts[RE_FUZZY_SUB] < values[RE_FUZZY_VAL_MIN_SUB] || - fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MIN_ERR]) - state->too_few_errors = RE_ERROR_SUCCESS; - - return RE_ERROR_SUCCESS; -} - -/* Retries a fuzzy insertion. */ -Py_LOCAL_INLINE(int) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t* - text_pos, RE_Node** node) { - RE_State* state; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - Py_ssize_t new_text_pos; - RE_Node* new_node; - int step; - Py_ssize_t limit; - RE_Node* fuzzy_node; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - new_text_pos = bt_data->fuzzy_insert.position.text_pos; - new_node = bt_data->fuzzy_insert.position.node; - - if (new_node->status & RE_STATUS_REVERSE) { - step = -1; - limit = state->slice_start; - } else { - step = 1; - limit = state->slice_end; - } - - /* Could the character at text_pos have been inserted? */ - if (!this_error_permitted(state, RE_FUZZY_INS) || new_text_pos == limit) { - size_t count; - - count = bt_data->fuzzy_insert.count; - - fuzzy_info->counts[RE_FUZZY_INS] -= count; - fuzzy_info->counts[RE_FUZZY_ERR] -= count; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_INS_COST] * count; - state->total_errors -= count; - state->too_few_errors = bt_data->fuzzy_insert.too_few_errors; - - discard_backtrack(state); - *node = NULL; - return RE_ERROR_SUCCESS; - } - - ++bt_data->fuzzy_insert.count; - - ++fuzzy_info->counts[RE_FUZZY_INS]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_INS_COST]; - ++state->total_errors; - ++state->capture_change; - - /* Check whether there are too few errors. */ - state->too_few_errors = bt_data->fuzzy_insert.too_few_errors; - fuzzy_node = bt_data->fuzzy_insert.fuzzy_node; /* END_FUZZY node. */ - values = fuzzy_node->values; - if (fuzzy_info->counts[RE_FUZZY_DEL] < values[RE_FUZZY_VAL_MIN_DEL] || - fuzzy_info->counts[RE_FUZZY_INS] < values[RE_FUZZY_VAL_MIN_INS] || - fuzzy_info->counts[RE_FUZZY_SUB] < values[RE_FUZZY_VAL_MIN_SUB] || - fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MIN_ERR]) - state->too_few_errors = RE_ERROR_SUCCESS; - - *text_pos = new_text_pos + step * (Py_ssize_t)bt_data->fuzzy_insert.count; - *node = new_node; - - return RE_ERROR_SUCCESS; -} - -/* Tries a fuzzy match of a string. */ -Py_LOCAL_INLINE(int) fuzzy_match_string(RE_SafeState* safe_state, BOOL search, - Py_ssize_t* text_pos, RE_Node* node, Py_ssize_t* string_pos, BOOL* matched, - int step) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - - state = safe_state->re_state; - - if (!any_error_permitted(state)) { - *matched = FALSE; - return RE_ERROR_SUCCESS; - } - - data.new_text_pos = *text_pos; - data.new_string_pos = *string_pos; - data.step = step; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || data.new_text_pos != - state->search_anchor; - - for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_item(state, &data, TRUE, data.step); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - if (!add_backtrack(safe_state, node->op)) - return RE_ERROR_FAILURE; - bt_data = state->backtrack; - bt_data->fuzzy_string.position.text_pos = *text_pos; - bt_data->fuzzy_string.position.node = node; - bt_data->fuzzy_string.string_pos = *string_pos; - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - bt_data->fuzzy_string.step = (RE_INT8)step; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - ++state->capture_change; - - *text_pos = data.new_text_pos; - *string_pos = data.new_string_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Retries a fuzzy match of a string. */ -Py_LOCAL_INLINE(int) retry_fuzzy_match_string(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node** node, Py_ssize_t* string_pos, BOOL* - matched) { - RE_State* state; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - RE_FuzzyData data; - RE_Node* new_node; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - data.new_text_pos = bt_data->fuzzy_string.position.text_pos; - new_node = bt_data->fuzzy_string.position.node; - data.new_string_pos = bt_data->fuzzy_string.string_pos; - data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; - data.step = bt_data->fuzzy_string.step; - - --fuzzy_info->counts[data.fuzzy_type]; - --fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - --state->total_errors; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || data.new_text_pos != - state->search_anchor; - - for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_item(state, &data, TRUE, data.step); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - discard_backtrack(state); - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - ++state->capture_change; - - *text_pos = data.new_text_pos; - *node = new_node; - *string_pos = data.new_string_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Checks a fuzzy match of a atring. */ -Py_LOCAL_INLINE(int) next_fuzzy_match_string_fld(RE_State* state, RE_FuzzyData* - data) { - int new_pos; - - if (this_error_permitted(state, data->fuzzy_type)) { - switch (data->fuzzy_type) { - case RE_FUZZY_DEL: - /* Could a character at text_pos have been deleted? */ - data->new_string_pos += data->step; - return RE_ERROR_SUCCESS; - case RE_FUZZY_INS: - /* Could the character at text_pos have been inserted? */ - if (!data->permit_insertion) - return RE_ERROR_FAILURE; - - new_pos = data->new_folded_pos + data->step; - if (0 <= new_pos && new_pos <= data->folded_len) { - data->new_folded_pos = new_pos; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - case RE_FUZZY_SUB: - /* Could the character at text_pos have been substituted? */ - new_pos = data->new_folded_pos + data->step; - if (0 <= new_pos && new_pos <= data->folded_len) { - data->new_folded_pos = new_pos; - data->new_string_pos += data->step; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - } - } - - return RE_ERROR_FAILURE; -} - -/* Tries a fuzzy match of a string, ignoring case. */ -Py_LOCAL_INLINE(int) fuzzy_match_string_fld(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node* node, Py_ssize_t* string_pos, int* - folded_pos, int folded_len, BOOL* matched, int step) { - RE_State* state; - Py_ssize_t new_text_pos; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - - state = safe_state->re_state; - - if (!any_error_permitted(state)) { - *matched = FALSE; - return RE_ERROR_SUCCESS; - } - - new_text_pos = *text_pos; - data.new_string_pos = *string_pos; - data.new_folded_pos = *folded_pos; - data.folded_len = folded_len; - data.step = step; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || new_text_pos != state->search_anchor; - if (step > 0) { - if (data.new_folded_pos != 0) - data.permit_insertion = RE_ERROR_SUCCESS; - } else { - if (data.new_folded_pos != folded_len) - data.permit_insertion = RE_ERROR_SUCCESS; - } - - for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_string_fld(state, &data); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - if (!add_backtrack(safe_state, node->op)) - return RE_ERROR_FAILURE; - bt_data = state->backtrack; - bt_data->fuzzy_string.position.text_pos = *text_pos; - bt_data->fuzzy_string.position.node = node; - bt_data->fuzzy_string.string_pos = *string_pos; - bt_data->fuzzy_string.folded_pos = (RE_INT8)(*folded_pos); - bt_data->fuzzy_string.folded_len = (RE_INT8)folded_len; - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - bt_data->fuzzy_string.step = (RE_INT8)step; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - ++state->capture_change; - - *text_pos = new_text_pos; - *string_pos = data.new_string_pos; - *folded_pos = data.new_folded_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Retries a fuzzy match of a string, ignoring case. */ -Py_LOCAL_INLINE(int) retry_fuzzy_match_string_fld(RE_SafeState* safe_state, - BOOL search, Py_ssize_t* text_pos, RE_Node** node, Py_ssize_t* string_pos, - int* folded_pos, BOOL* matched) { - RE_State* state; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - Py_ssize_t new_text_pos; - RE_Node* new_node; - RE_FuzzyData data; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - new_text_pos = bt_data->fuzzy_string.position.text_pos; - new_node = bt_data->fuzzy_string.position.node; - data.new_string_pos = bt_data->fuzzy_string.string_pos; - data.new_folded_pos = bt_data->fuzzy_string.folded_pos; - data.folded_len = bt_data->fuzzy_string.folded_len; - data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; - data.step = bt_data->fuzzy_string.step; - - --fuzzy_info->counts[data.fuzzy_type]; - --fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - --state->total_errors; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || new_text_pos != state->search_anchor; - if (data.step > 0) { - if (data.new_folded_pos != 0) - data.permit_insertion = RE_ERROR_SUCCESS; - } else { - if (data.new_folded_pos != bt_data->fuzzy_string.folded_len) - data.permit_insertion = RE_ERROR_SUCCESS; - } - - for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_string_fld(state, &data); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - discard_backtrack(state); - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - ++state->capture_change; - - *text_pos = new_text_pos; - *node = new_node; - *string_pos = data.new_string_pos; - *folded_pos = data.new_folded_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Checks a fuzzy match of a atring. */ -Py_LOCAL_INLINE(int) next_fuzzy_match_group_fld(RE_State* state, RE_FuzzyData* - data) { - int new_pos; - - if (this_error_permitted(state, data->fuzzy_type)) { - switch (data->fuzzy_type) { - case RE_FUZZY_DEL: - /* Could a character at text_pos have been deleted? */ - data->new_gfolded_pos += data->step; - return RE_ERROR_SUCCESS; - case RE_FUZZY_INS: - /* Could the character at text_pos have been inserted? */ - if (!data->permit_insertion) - return RE_ERROR_FAILURE; - - new_pos = data->new_folded_pos + data->step; - if (0 <= new_pos && new_pos <= data->folded_len) { - data->new_folded_pos = new_pos; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - case RE_FUZZY_SUB: - /* Could the character at text_pos have been substituted? */ - new_pos = data->new_folded_pos + data->step; - if (0 <= new_pos && new_pos <= data->folded_len) { - data->new_folded_pos = new_pos; - data->new_gfolded_pos += data->step; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - } - } - - return RE_ERROR_FAILURE; -} - -/* Tries a fuzzy match of a group reference, ignoring case. */ -Py_LOCAL_INLINE(int) fuzzy_match_group_fld(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node* node, int* folded_pos, int folded_len, - Py_ssize_t* group_pos, int* gfolded_pos, int gfolded_len, BOOL* matched, int - step) { - RE_State* state; - Py_ssize_t new_text_pos; - RE_FuzzyData data; - Py_ssize_t new_group_pos; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - - state = safe_state->re_state; - - if (!any_error_permitted(state)) { - *matched = FALSE; - return RE_ERROR_SUCCESS; - } - - new_text_pos = *text_pos; - data.new_folded_pos = *folded_pos; - data.folded_len = folded_len; - new_group_pos = *group_pos; - data.new_gfolded_pos = *gfolded_pos; - data.step = step; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || new_text_pos != state->search_anchor; - if (data.step > 0) { - if (data.new_folded_pos != 0) - data.permit_insertion = RE_ERROR_SUCCESS; - } else { - if (data.new_folded_pos != folded_len) - data.permit_insertion = RE_ERROR_SUCCESS; - } - - for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_group_fld(state, &data); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - if (!add_backtrack(safe_state, node->op)) - return RE_ERROR_FAILURE; - bt_data = state->backtrack; - bt_data->fuzzy_string.position.text_pos = *text_pos; - bt_data->fuzzy_string.position.node = node; - bt_data->fuzzy_string.string_pos = *group_pos; - bt_data->fuzzy_string.folded_pos = (RE_INT8)(*folded_pos); - bt_data->fuzzy_string.folded_len = (RE_INT8)folded_len; - bt_data->fuzzy_string.gfolded_pos = (RE_INT8)(*gfolded_pos); - bt_data->fuzzy_string.gfolded_len = (RE_INT8)gfolded_len; - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - bt_data->fuzzy_string.step = (RE_INT8)step; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - ++state->capture_change; - - *text_pos = new_text_pos; - *group_pos = new_group_pos; - *folded_pos = data.new_folded_pos; - *gfolded_pos = data.new_gfolded_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Retries a fuzzy match of a group reference, ignoring case. */ -Py_LOCAL_INLINE(int) retry_fuzzy_match_group_fld(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node** node, int* folded_pos, Py_ssize_t* - group_pos, int* gfolded_pos, BOOL* matched) { - RE_State* state; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - Py_ssize_t new_text_pos; - RE_Node* new_node; - Py_ssize_t new_group_pos; - RE_FuzzyData data; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - new_text_pos = bt_data->fuzzy_string.position.text_pos; - new_node = bt_data->fuzzy_string.position.node; - new_group_pos = bt_data->fuzzy_string.string_pos; - data.new_folded_pos = bt_data->fuzzy_string.folded_pos; - data.folded_len = bt_data->fuzzy_string.folded_len; - data.new_gfolded_pos = bt_data->fuzzy_string.gfolded_pos; - data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; - data.step = bt_data->fuzzy_string.step; - - --fuzzy_info->counts[data.fuzzy_type]; - --fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - --state->total_errors; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || new_text_pos != state->search_anchor || - data.new_folded_pos != bt_data->fuzzy_string.folded_len; - - for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_group_fld(state, &data); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - discard_backtrack(state); - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - ++state->capture_change; - - *text_pos = new_text_pos; - *node = new_node; - *group_pos = new_group_pos; - *folded_pos = data.new_folded_pos; - *gfolded_pos = data.new_gfolded_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Locates the required string, if there's one. */ -Py_LOCAL_INLINE(Py_ssize_t) locate_required_string(RE_SafeState* safe_state, - BOOL search) { - RE_State* state; - PatternObject* pattern; - Py_ssize_t found_pos; - Py_ssize_t end_pos; - - state = safe_state->re_state; - pattern = state->pattern; - - if (!pattern->req_string) - /* There isn't a required string, so start matching from the current - * position. - */ - return state->text_pos; - - /* Search for the required string and calculate where to start matching. */ - switch (pattern->req_string->op) { - case RE_OP_STRING: - { - BOOL is_partial; - Py_ssize_t limit; - - if (search || pattern->req_offset < 0) - limit = state->slice_end; - else { - limit = state->slice_start + pattern->req_offset + - (Py_ssize_t)pattern->req_string->value_count; - if (limit > state->slice_end || limit < 0) - limit = state->slice_end; - } - - if (state->req_pos < 0 || state->text_pos > state->req_pos) - /* First time or already passed it. */ - found_pos = string_search(safe_state, pattern->req_string, - state->text_pos, limit, &is_partial); - else { - found_pos = state->req_pos; - end_pos = state->req_end; - is_partial = FALSE; - } - - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (!is_partial) { - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos + - (Py_ssize_t)pattern->req_string->value_count; - } - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos -= pattern->req_offset; - if (found_pos >= state->text_pos) - return found_pos; - } - break; - } - case RE_OP_STRING_FLD: - { - BOOL is_partial; - Py_ssize_t limit; - - if (search || pattern->req_offset < 0) - limit = state->slice_end; - else { - limit = state->slice_start + pattern->req_offset + - (Py_ssize_t)pattern->req_string->value_count; - if (limit > state->slice_end || limit < 0) - limit = state->slice_end; - } - - if (state->req_pos < 0 || state->text_pos > state->req_pos) - /* First time or already passed it. */ - found_pos = string_search_fld(safe_state, pattern->req_string, - state->text_pos, limit, &end_pos, &is_partial); - else { - found_pos = state->req_pos; - end_pos = state->req_end; - is_partial = FALSE; - } - - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (!is_partial) { - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = end_pos; - } - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos -= pattern->req_offset; - if (found_pos >= state->text_pos) - return found_pos; - } - break; - } - case RE_OP_STRING_FLD_REV: - { - BOOL is_partial; - Py_ssize_t limit; - - if (search || pattern->req_offset < 0) - limit = state->slice_start; - else { - limit = state->slice_end - pattern->req_offset - - (Py_ssize_t)pattern->req_string->value_count; - if (limit < state->slice_start) - limit = state->slice_start; - } - - if (state->req_pos < 0 || state->text_pos < state->req_pos) - /* First time or already passed it. */ - found_pos = string_search_fld_rev(safe_state, pattern->req_string, - state->text_pos, limit, &end_pos, &is_partial); - else { - found_pos = state->req_pos; - end_pos = state->req_end; - is_partial = FALSE; - } - - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (!is_partial) { - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = end_pos; - } - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos += pattern->req_offset; - if (found_pos <= state->text_pos) - return found_pos; - } - break; - } - case RE_OP_STRING_IGN: - { - BOOL is_partial; - Py_ssize_t limit; - - if (search || pattern->req_offset < 0) - limit = state->slice_end; - else { - limit = state->slice_start + pattern->req_offset + - (Py_ssize_t)pattern->req_string->value_count; - if (limit > state->slice_end || limit < 0) - limit = state->slice_end; - } - - if (state->req_pos < 0 || state->text_pos > state->req_pos) - /* First time or already passed it. */ - found_pos = string_search_ign(safe_state, pattern->req_string, - state->text_pos, limit, &is_partial); - else { - found_pos = state->req_pos; - end_pos = state->req_end; - is_partial = FALSE; - } - - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (!is_partial) { - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos + - (Py_ssize_t)pattern->req_string->value_count; - } - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos -= pattern->req_offset; - if (found_pos >= state->text_pos) - return found_pos; - } - break; - } - case RE_OP_STRING_IGN_REV: - { - BOOL is_partial; - Py_ssize_t limit; - - if (search || pattern->req_offset < 0) - limit = state->slice_start; - else { - limit = state->slice_end - pattern->req_offset - - (Py_ssize_t)pattern->req_string->value_count; - if (limit < state->slice_start) - limit = state->slice_start; - } - - if (state->req_pos < 0 || state->text_pos < state->req_pos) - /* First time or already passed it. */ - found_pos = string_search_ign_rev(safe_state, pattern->req_string, - state->text_pos, limit, &is_partial); - else { - found_pos = state->req_pos; - end_pos = state->req_end; - is_partial = FALSE; - } - - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (!is_partial) { - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos - - (Py_ssize_t)pattern->req_string->value_count; - } - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos += pattern->req_offset; - if (found_pos <= state->text_pos) - return found_pos; - } - break; - } - case RE_OP_STRING_REV: - { - BOOL is_partial; - Py_ssize_t limit; - - if (search || pattern->req_offset < 0) - limit = state->slice_start; - else { - limit = state->slice_end - pattern->req_offset - - (Py_ssize_t)pattern->req_string->value_count; - if (limit < state->slice_start) - limit = state->slice_start; - } - - if (state->req_pos < 0 || state->text_pos < state->req_pos) - /* First time or already passed it. */ - found_pos = string_search_rev(safe_state, pattern->req_string, - state->text_pos, limit, &is_partial); - else { - found_pos = state->req_pos; - end_pos = state->req_end; - is_partial = FALSE; - } - - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (!is_partial) { - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos - - (Py_ssize_t)pattern->req_string->value_count; - } - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos += pattern->req_offset; - if (found_pos <= state->text_pos) - return found_pos; - } - break; - } - } - - /* Start matching from the current position. */ - return state->text_pos; -} - -/* Tries to match a character pattern. */ -Py_LOCAL_INLINE(int) match_one(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - switch (node->op) { - case RE_OP_ANY: - return try_match_ANY(state, node, text_pos); - case RE_OP_ANY_ALL: - return try_match_ANY_ALL(state, node, text_pos); - case RE_OP_ANY_ALL_REV: - return try_match_ANY_ALL_REV(state, node, text_pos); - case RE_OP_ANY_REV: - return try_match_ANY_REV(state, node, text_pos); - case RE_OP_ANY_U: - return try_match_ANY_U(state, node, text_pos); - case RE_OP_ANY_U_REV: - return try_match_ANY_U_REV(state, node, text_pos); - case RE_OP_CHARACTER: - return try_match_CHARACTER(state, node, text_pos); - case RE_OP_CHARACTER_IGN: - return try_match_CHARACTER_IGN(state, node, text_pos); - case RE_OP_CHARACTER_IGN_REV: - return try_match_CHARACTER_IGN_REV(state, node, text_pos); - case RE_OP_CHARACTER_REV: - return try_match_CHARACTER_REV(state, node, text_pos); - case RE_OP_PROPERTY: - return try_match_PROPERTY(state, node, text_pos); - case RE_OP_PROPERTY_IGN: - return try_match_PROPERTY_IGN(state, node, text_pos); - case RE_OP_PROPERTY_IGN_REV: - return try_match_PROPERTY_IGN_REV(state, node, text_pos); - case RE_OP_PROPERTY_REV: - return try_match_PROPERTY_REV(state, node, text_pos); - case RE_OP_RANGE: - return try_match_RANGE(state, node, text_pos); - case RE_OP_RANGE_IGN: - return try_match_RANGE_IGN(state, node, text_pos); - case RE_OP_RANGE_IGN_REV: - return try_match_RANGE_IGN_REV(state, node, text_pos); - case RE_OP_RANGE_REV: - return try_match_RANGE_REV(state, node, text_pos); - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - return try_match_SET(state, node, text_pos); - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - return try_match_SET_IGN(state, node, text_pos); - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - return try_match_SET_IGN_REV(state, node, text_pos); - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - return try_match_SET_REV(state, node, text_pos); - } - - return FALSE; -} - -/* Tests whether 2 nodes contains the same values. */ -Py_LOCAL_INLINE(BOOL) same_values(RE_Node* node_1, RE_Node* node_2) { - size_t i; - - if (node_1->value_count != node_2->value_count) - return FALSE; - - for (i = 0; i < node_1->value_count; i++) { - if (node_1->values[i] != node_2->values[i]) - return FALSE; - } - - return TRUE; -} - -/* Tests whether 2 nodes are equivalent (both string-like in the same way). */ -Py_LOCAL_INLINE(BOOL) equivalent_nodes(RE_Node* node_1, RE_Node* node_2) { - switch (node_1->op) { - case RE_OP_CHARACTER: - case RE_OP_STRING: - switch (node_2->op) { - case RE_OP_CHARACTER: - case RE_OP_STRING: - return same_values(node_1, node_2); - } - break; - case RE_OP_CHARACTER_IGN: - case RE_OP_STRING_IGN: - switch (node_2->op) { - case RE_OP_CHARACTER_IGN: - case RE_OP_STRING_IGN: - return same_values(node_1, node_2); - } - break; - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_STRING_IGN_REV: - switch (node_2->op) { - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_STRING_IGN_REV: - return same_values(node_1, node_2); - } - break; - case RE_OP_CHARACTER_REV: - case RE_OP_STRING_REV: - switch (node_2->op) { - case RE_OP_CHARACTER_REV: - case RE_OP_STRING_REV: - return same_values(node_1, node_2); - } - break; - } - - return FALSE; -} - -/* Prunes the backtracking. */ -Py_LOCAL_INLINE(void) prune_backtracking(RE_State* state) { - RE_AtomicBlock* current; - - current = state->current_atomic_block; - if (current && current->count > 0) { - /* In an atomic group or a lookaround. */ - RE_AtomicData* atomic; - - /* Discard any backtracking info from inside the atomic group or - * lookaround. - */ - atomic = ¤t->items[current->count - 1]; - state->current_backtrack_block = atomic->current_backtrack_block; - state->current_backtrack_block->count = atomic->backtrack_count; - } else { - /* In the outermost pattern. */ - while (state->current_backtrack_block->previous) - state->current_backtrack_block = - state->current_backtrack_block->previous; - - /* Keep the bottom FAILURE on the backtracking stack. */ - state->current_backtrack_block->count = 1; - } -} - -/* Saves the match as the best POSIX match (leftmost longest) found so far. */ -Py_LOCAL_INLINE(BOOL) save_best_match(RE_SafeState* safe_state) { - RE_State* state; - size_t group_count; - size_t g; - - state = safe_state->re_state; - - state->best_match_pos = state->match_pos; - state->best_text_pos = state->text_pos; - state->found_match = TRUE; - - memmove(state->best_fuzzy_counts, state->total_fuzzy_counts, - sizeof(state->total_fuzzy_counts)); - - group_count = state->pattern->true_group_count; - if (group_count == 0) - return TRUE; - - acquire_GIL(safe_state); - - if (!state->best_match_groups) { - /* Allocate storage for the groups of the best match. */ - state->best_match_groups = (RE_GroupData*)re_alloc(group_count * - sizeof(RE_GroupData)); - if (!state->best_match_groups) - goto error; - - memset(state->best_match_groups, 0, group_count * - sizeof(RE_GroupData)); - - for (g = 0; g < group_count; g++) { - RE_GroupData* best; - RE_GroupData* group; - - best = &state->best_match_groups[g]; - group = &state->groups[g]; - - best->capture_capacity = group->capture_capacity; - best->captures = (RE_GroupSpan*)re_alloc(best->capture_capacity * - sizeof(RE_GroupSpan)); - if (!best->captures) - goto error; - } - } - - /* Copy the group spans and captures. */ - for (g = 0; g < group_count; g++) { - RE_GroupData* best; - RE_GroupData* group; - - best = &state->best_match_groups[g]; - group = &state->groups[g]; - - best->span = group->span; - best->capture_count = group->capture_count; - - if (best->capture_count < best->capture_capacity) { - /* We need more space for the captures. */ - re_dealloc(best->captures); - best->captures = (RE_GroupSpan*)re_alloc(best->capture_capacity * - sizeof(RE_GroupSpan)); - if (!best->captures) - goto error; - } - - /* Copy the captures for this group. */ - memmove(best->captures, group->captures, group->capture_count * - sizeof(RE_GroupSpan)); - } - - release_GIL(safe_state); - - return TRUE; - -error: - release_GIL(safe_state); - - return FALSE; -} - -/* Restores the best match for a POSIX match (leftmost longest). */ -Py_LOCAL_INLINE(void) restore_best_match(RE_SafeState* safe_state) { - RE_State* state; - size_t group_count; - size_t g; - - state = safe_state->re_state; - - if (!state->found_match) - return; - - state->match_pos = state->best_match_pos; - state->text_pos = state->best_text_pos; - - memmove(state->total_fuzzy_counts, state->best_fuzzy_counts, - sizeof(state->total_fuzzy_counts)); - - group_count = state->pattern->true_group_count; - if (group_count == 0) - return; - - /* Copy the group spans and captures. */ - for (g = 0; g < group_count; g++) { - RE_GroupData* group; - RE_GroupData* best; - - group = &state->groups[g]; - best = &state->best_match_groups[g]; - - group->span = best->span; - group->capture_count = best->capture_count; - - /* Copy the captures for this group. */ - memmove(group->captures, best->captures, best->capture_count * - sizeof(RE_GroupSpan)); - } -} - -/* Checks whether the new match is better than the current match for a POSIX - * match (leftmost longest) and saves it if it is. - */ -Py_LOCAL_INLINE(BOOL) check_posix_match(RE_SafeState* safe_state) { - RE_State* state; - Py_ssize_t best_length; - Py_ssize_t new_length; - - state = safe_state->re_state; - - if (!state->found_match) - return save_best_match(safe_state); - - /* Check the overall match. */ - if (state->reverse) { - /* We're searching backwards. */ - best_length = state->match_pos - state->best_text_pos; - new_length = state->match_pos - state->text_pos; - } else { - /* We're searching forwards. */ - best_length = state->best_text_pos - state->match_pos; - new_length = state->text_pos - state->match_pos; - } - - if (new_length > best_length) - /* It's a longer match. */ - return save_best_match(safe_state); - - return TRUE; -} - -/* Performs a depth-first match or search from the context. */ -Py_LOCAL_INLINE(int) basic_match(RE_SafeState* safe_state, BOOL search) { - RE_State* state; - RE_EncodingTable* encoding; - RE_LocaleInfo* locale_info; - PatternObject* pattern; - RE_Node* start_node; - RE_NextNode start_pair; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - Py_ssize_t pattern_step; /* The overall step of the pattern (forwards or backwards). */ - Py_ssize_t string_pos; - BOOL do_search_start; - Py_ssize_t found_pos; - int status; - RE_Node* node; - int folded_pos; - int gfolded_pos; - TRACE(("<>\n")) - - state = safe_state->re_state; - encoding = state->encoding; - locale_info = state->locale_info; - pattern = state->pattern; - start_node = pattern->start_node; - - /* Look beyond any initial group node. */ - start_pair.node = start_node; - start_pair.test = pattern->start_test; - - /* Is the pattern anchored to the start or end of the string? */ - switch (start_pair.test->op) { - case RE_OP_END_OF_STRING: - if (state->reverse) { - /* Searching backwards. */ - if (state->text_pos != state->text_length) - return RE_ERROR_FAILURE; - - /* Don't bother to search further because it's anchored. */ - search = FALSE; - } - break; - case RE_OP_START_OF_STRING: - if (!state->reverse) { - /* Searching forwards. */ - if (state->text_pos != 0) - return RE_ERROR_FAILURE; - - /* Don't bother to search further because it's anchored. */ - search = FALSE; - } - break; - } - - char_at = state->char_at; - pattern_step = state->reverse ? -1 : 1; - string_pos = -1; - do_search_start = pattern->do_search_start; - state->fewest_errors = state->max_errors; - - if (do_search_start && pattern->req_string && - equivalent_nodes(start_pair.test, pattern->req_string)) - do_search_start = FALSE; - - /* Add a backtrack entry for failure. */ - if (!add_backtrack(safe_state, RE_OP_FAILURE)) - return RE_ERROR_BACKTRACKING; - -start_match: - /* If we're searching, advance along the string until there could be a - * match. - */ - if (pattern->pattern_call_ref >= 0) { - RE_GuardList* guard_list; - - guard_list = &state->group_call_guard_list[pattern->pattern_call_ref]; - guard_list->count = 0; - guard_list->last_text_pos = -1; - } - - /* Locate the required string, if there's one, unless this is a recursive - * call of 'basic_match'. - */ - if (!pattern->req_string || state->text_pos < state->req_pos) - found_pos = state->text_pos; - else { - found_pos = locate_required_string(safe_state, search); - if (found_pos < 0) - return RE_ERROR_FAILURE; - } - - if (search) { - state->text_pos = found_pos; - - if (do_search_start) { - RE_Position new_position; - -next_match_1: - /* 'search_start' will clear 'do_search_start' if it can't perform - * a fast search for the next possible match. This enables us to - * avoid the overhead of the call subsequently. - */ - status = search_start(safe_state, &start_pair, &new_position, 0); - if (status == RE_ERROR_PARTIAL) { - state->match_pos = new_position.text_pos; - return status; - } else if (status != RE_ERROR_SUCCESS) - return status; - - node = new_position.node; - state->text_pos = new_position.text_pos; - - if (node->op == RE_OP_SUCCESS) { - /* Must the match advance past its start? */ - if (state->text_pos != state->search_anchor || - !state->must_advance) - return RE_ERROR_SUCCESS; - - state->text_pos = state->match_pos + pattern_step; - goto next_match_1; - } - - /* 'do_search_start' may have been cleared. */ - do_search_start = pattern->do_search_start; - } else { - /* Avoiding 'search_start', which we've found can't perform a fast - * search for the next possible match. - */ - node = start_node; - -next_match_2: - if (state->reverse) { - if (state->text_pos < state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - } else { - if (state->text_pos > state->slice_end) { - if (state-> partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - } - - state->match_pos = state->text_pos; - - if (node->op == RE_OP_SUCCESS) { - /* Must the match advance past its start? */ - if (state->text_pos != state->search_anchor || - !state->must_advance) { - BOOL success; - - if (state->match_all) { - /* We want to match all of the slice. */ - if (state->reverse) - success = state->text_pos == state->slice_start; - else - success = state->text_pos == state->slice_end; - } else - success = TRUE; - - if (success) - return RE_ERROR_SUCCESS; - } - - state->text_pos = state->match_pos + pattern_step; - goto next_match_2; - } - } - } else { - /* The start position is anchored to the current position. */ - if (found_pos != state->text_pos) - return RE_ERROR_FAILURE; - - node = start_node; - } - -advance: - /* The main matching loop. */ - for (;;) { - TRACE(("%d|", state->text_pos)) - - /* Should we abort the matching? */ - ++state->iterations; - - if (state->iterations == 0 && safe_check_signals(safe_state)) - return RE_ERROR_INTERRUPTED; - - switch (node->op) { - case RE_OP_ANY: /* Any character except a newline. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - ++state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ANY_ALL: /* Any character at all. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY_ALL(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - ++state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ANY_ALL_REV: /* Any character at all, backwards. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY_ALL_REV(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - --state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ANY_REV: /* Any character except a newline, backwards. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY_REV(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - --state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ANY_U: /* Any character except a line separator. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY_U(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - ++state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ANY_U_REV: /* Any character except a line separator, backwards. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY_U_REV(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - --state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ATOMIC: /* Start of an atomic group. */ - { - RE_AtomicData* atomic; - TRACE(("%s\n", re_op_text[node->op])) - - if (!add_backtrack(safe_state, RE_OP_ATOMIC)) - return RE_ERROR_BACKTRACKING; - state->backtrack->atomic.too_few_errors = state->too_few_errors; - state->backtrack->atomic.capture_change = state->capture_change; - - atomic = push_atomic(safe_state); - if (!atomic) - return RE_ERROR_MEMORY; - atomic->backtrack_count = state->current_backtrack_block->count; - atomic->current_backtrack_block = state->current_backtrack_block; - atomic->is_lookaround = FALSE; - atomic->has_groups = (node->status & RE_STATUS_HAS_GROUPS) != 0; - atomic->has_repeats = (node->status & RE_STATUS_HAS_REPEATS) != 0; - atomic->call_frame = state->current_group_call_frame; - - /* Save the groups and repeats. */ - if (atomic->has_groups && !push_groups(safe_state)) - return RE_ERROR_MEMORY; - - if (atomic->has_repeats && !push_repeats(safe_state)) - return RE_ERROR_MEMORY; - - node = node->next_1.node; - break; - } - case RE_OP_BOUNDARY: /* On a word boundary. */ - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - status = try_match_BOUNDARY(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_BRANCH: /* 2-way branch. */ - { - RE_Position next_position; - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match(state, &node->next_1, state->text_pos, - &next_position); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - if (!add_backtrack(safe_state, RE_OP_BRANCH)) - return RE_ERROR_BACKTRACKING; - state->backtrack->branch.position.node = - node->nonstring.next_2.node; - state->backtrack->branch.position.text_pos = state->text_pos; - - node = next_position.node; - state->text_pos = next_position.text_pos; - } else - node = node->nonstring.next_2.node; - break; - } - case RE_OP_CALL_REF: /* A group call reference. */ - { - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - if (!push_group_return(safe_state, NULL)) - return RE_ERROR_MEMORY; - - if (!add_backtrack(safe_state, RE_OP_CALL_REF)) - return RE_ERROR_BACKTRACKING; - - node = node->next_1.node; - break; - } - case RE_OP_CHARACTER: /* A character. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - matches_CHARACTER(encoding, locale_info, node, - char_at(state->text, state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_CHARACTER_IGN: /* A character, ignoring case. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - matches_CHARACTER_IGN(encoding, locale_info, node, - char_at(state->text, state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_CHARACTER_IGN_REV: /* A character, backwards, ignoring case. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_CHARACTER_IGN(encoding, locale_info, node, - char_at(state->text, state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_CHARACTER_REV: /* A character, backwards. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_CHARACTER(encoding, locale_info, node, - char_at(state->text, state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_CONDITIONAL: /* Start of a conditional subpattern. */ - { - RE_AtomicData* conditional; - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - if (!add_backtrack(safe_state, RE_OP_CONDITIONAL)) - return RE_ERROR_BACKTRACKING; - state->backtrack->lookaround.too_few_errors = - state->too_few_errors; - state->backtrack->lookaround.capture_change = - state->capture_change; - state->backtrack->lookaround.inside = TRUE; - state->backtrack->lookaround.node = node; - - conditional = push_atomic(safe_state); - if (!conditional) - return RE_ERROR_MEMORY; - conditional->backtrack_count = - state->current_backtrack_block->count; - conditional->current_backtrack_block = - state->current_backtrack_block; - conditional->slice_start = state->slice_start; - conditional->slice_end = state->slice_end; - conditional->text_pos = state->text_pos; - conditional->node = node; - conditional->backtrack = state->backtrack; - conditional->is_lookaround = TRUE; - conditional->has_groups = (node->status & RE_STATUS_HAS_GROUPS) != - 0; - conditional->has_repeats = (node->status & RE_STATUS_HAS_REPEATS) - != 0; - - /* Save the groups and repeats. */ - if (conditional->has_groups && !push_groups(safe_state)) - return RE_ERROR_MEMORY; - - if (conditional->has_repeats && !push_repeats(safe_state)) - return RE_ERROR_MEMORY; - - conditional->saved_groups = state->current_saved_groups; - conditional->saved_repeats = state->current_saved_repeats; - - state->slice_start = 0; - state->slice_end = state->text_length; - - node = node->next_1.node; - break; - } - case RE_OP_DEFAULT_BOUNDARY: /* On a default word boundary. */ - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - status = try_match_DEFAULT_BOUNDARY(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_DEFAULT_END_OF_WORD: /* At the default end of a word. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_DEFAULT_END_OF_WORD(state, node, - state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_DEFAULT_START_OF_WORD: /* At the default start of a word. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_DEFAULT_START_OF_WORD(state, node, - state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_ATOMIC: /* End of an atomic group. */ - { - RE_AtomicData* atomic; - TRACE(("%s\n", re_op_text[node->op])) - - /* Discard any backtracking info from inside the atomic group. */ - atomic = top_atomic(safe_state); - state->current_backtrack_block = atomic->current_backtrack_block; - state->current_backtrack_block->count = atomic->backtrack_count; - - node = node->next_1.node; - break; - } - case RE_OP_END_CONDITIONAL: /* End of a conditional subpattern. */ - { - RE_AtomicData* conditional; - TRACE(("%s\n", re_op_text[node->op])) - - conditional = pop_atomic(safe_state); - while (!conditional->is_lookaround) { - if (conditional->has_repeats) - drop_repeats(state); - - if (conditional->has_groups) - drop_groups(state); - - conditional = pop_atomic(safe_state); - } - state->text_pos = conditional->text_pos; - state->slice_end = conditional->slice_end; - state->slice_start = conditional->slice_start; - - /* Discard any backtracking info from inside the lookaround. */ - state->current_backtrack_block = - conditional->current_backtrack_block; - state->current_backtrack_block->count = - conditional->backtrack_count; - state->current_saved_groups = conditional->saved_groups; - state->current_saved_repeats = conditional->saved_repeats; - - /* It's a positive lookaround that's succeeded. We're now going to - * leave the lookaround. - */ - conditional->backtrack->lookaround.inside = FALSE; - - if (conditional->node->match) { - /* It's a positive lookaround that's succeeded. - * - * Go to the 'true' branch. - */ - node = node->next_1.node; - } else { - /* It's a negative lookaround that's succeeded. - * - * Go to the 'false' branch. - */ - node = node->nonstring.next_2.node; - } - break; - } - case RE_OP_END_FUZZY: /* End of fuzzy matching. */ - TRACE(("%s\n", re_op_text[node->op])) - - if (!fuzzy_insert(safe_state, state->text_pos, node)) - return RE_ERROR_BACKTRACKING; - - /* If there were too few errors, in the fuzzy section, try again. - */ - if (state->too_few_errors) { - state->too_few_errors = FALSE; - goto backtrack; - } - - state->total_fuzzy_counts[RE_FUZZY_SUB] += - state->fuzzy_info.counts[RE_FUZZY_SUB]; - state->total_fuzzy_counts[RE_FUZZY_INS] += - state->fuzzy_info.counts[RE_FUZZY_INS]; - state->total_fuzzy_counts[RE_FUZZY_DEL] += - state->fuzzy_info.counts[RE_FUZZY_DEL]; - - node = node->next_1.node; - break; - case RE_OP_END_GREEDY_REPEAT: /* End of a greedy repeat. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - BOOL changed; - BOOL try_body; - int body_status; - RE_Position next_body_position; - BOOL try_tail; - int tail_status; - RE_Position next_tail_position; - RE_BacktrackData* bt_data; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - /* The body has matched successfully at this position. */ - if (!guard_repeat(safe_state, index, rp_data->start, - RE_STATUS_BODY, FALSE)) - return RE_ERROR_MEMORY; - - ++rp_data->count; - - /* Have we advanced through the text or has a capture group change? - */ - changed = rp_data->capture_change != state->capture_change || - state->text_pos != rp_data->start; - - /* Additional checks are needed if there's fuzzy matching. */ - if (changed && state->pattern->is_fuzzy && rp_data->count >= - node->values[1]) - changed = !(node->step == 1 ? state->text_pos >= - state->slice_end : state->text_pos <= state->slice_start); - - /* The counts are of type size_t, so the format needs to specify - * that. - */ - TRACE(("min is %" PY_FORMAT_SIZE_T "u, max is %" PY_FORMAT_SIZE_T - "u, count is %" PY_FORMAT_SIZE_T "u\n", node->values[1], - node->values[2], rp_data->count)) - - /* Could the body or tail match? */ - try_body = changed && (rp_data->count < node->values[2] || - ~node->values[2] == 0) && !is_repeat_guarded(safe_state, index, - state->text_pos, RE_STATUS_BODY); - if (try_body) { - body_status = try_match(state, &node->next_1, state->text_pos, - &next_body_position); - if (body_status < 0) - return body_status; - - if (body_status == RE_ERROR_FAILURE) - try_body = FALSE; - } else - body_status = RE_ERROR_FAILURE; - - try_tail = (!changed || rp_data->count >= node->values[1]) && - !is_repeat_guarded(safe_state, index, state->text_pos, - RE_STATUS_TAIL); - if (try_tail) { - tail_status = try_match(state, &node->nonstring.next_2, - state->text_pos, &next_tail_position); - if (tail_status < 0) - return tail_status; - - if (tail_status == RE_ERROR_FAILURE) - try_tail = FALSE; - } else - tail_status = RE_ERROR_FAILURE; - - if (!try_body && !try_tail) { - /* Neither the body nor the tail could match. */ - --rp_data->count; - goto backtrack; - } - - if (body_status < 0 || (body_status == 0 && tail_status < 0)) - return RE_ERROR_PARTIAL; - - /* Record info in case we backtrack into the body. */ - if (!add_backtrack(safe_state, RE_OP_BODY_END)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count - 1; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - - if (try_body) { - /* Both the body and the tail could match. */ - if (try_tail) { - /* The body takes precedence. If the body fails to match - * then we want to try the tail before backtracking - * further. - */ - - /* Record backtracking info for matching the tail. */ - if (!add_backtrack(safe_state, RE_OP_MATCH_TAIL)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position = next_tail_position; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - } - - /* Record backtracking info in case the body fails to match. */ - if (!add_backtrack(safe_state, RE_OP_BODY_START)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.text_pos = state->text_pos; - - rp_data->capture_change = state->capture_change; - rp_data->start = state->text_pos; - - /* Advance into the body. */ - node = next_body_position.node; - state->text_pos = next_body_position.text_pos; - } else { - /* Only the tail could match. */ - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } - break; - } - case RE_OP_END_GROUP: /* End of a capture group. */ - { - RE_CODE private_index; - RE_CODE public_index; - RE_GroupData* group; - RE_BacktrackData* bt_data; - TRACE(("%s %d\n", re_op_text[node->op], node->values[1])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - */ - private_index = node->values[0]; - public_index = node->values[1]; - group = &state->groups[private_index - 1]; - - if (!add_backtrack(safe_state, RE_OP_END_GROUP)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->group.private_index = private_index; - bt_data->group.public_index = public_index; - bt_data->group.text_pos = group->span.end; - bt_data->group.capture = (BOOL)node->values[2]; - bt_data->group.current_capture = group->current_capture; - - if (pattern->group_info[private_index - 1].referenced && - group->span.end != state->text_pos) - ++state->capture_change; - group->span.end = state->text_pos; - - /* Save the capture? */ - if (node->values[2]) { - group->current_capture = (Py_ssize_t)group->capture_count; - if (!save_capture(safe_state, private_index, public_index)) - return RE_ERROR_MEMORY; - } - - node = node->next_1.node; - break; - } - case RE_OP_END_LAZY_REPEAT: /* End of a lazy repeat. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - BOOL changed; - BOOL try_body; - int body_status; - RE_Position next_body_position; - BOOL try_tail; - int tail_status; - RE_Position next_tail_position; - RE_BacktrackData* bt_data; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - /* The body has matched successfully at this position. */ - if (!guard_repeat(safe_state, index, rp_data->start, - RE_STATUS_BODY, FALSE)) - return RE_ERROR_MEMORY; - - ++rp_data->count; - - /* Have we advanced through the text or has a capture group change? - */ - changed = rp_data->capture_change != state->capture_change || - state->text_pos != rp_data->start; - - /* Additional checks are needed if there's fuzzy matching. */ - if (changed && state->pattern->is_fuzzy && rp_data->count >= - node->values[1]) - changed = !(node->step == 1 ? state->text_pos >= - state->slice_end : state->text_pos <= state->slice_start); - - /* The counts are of type size_t, so the format needs to specify - * that. - */ - TRACE(("min is %" PY_FORMAT_SIZE_T "u, max is %" PY_FORMAT_SIZE_T - "u, count is %" PY_FORMAT_SIZE_T "u\n", node->values[1], - node->values[2], rp_data->count)) - - /* Could the body or tail match? */ - try_body = changed && (rp_data->count < node->values[2] || - ~node->values[2] == 0) && !is_repeat_guarded(safe_state, index, - state->text_pos, RE_STATUS_BODY); - if (try_body) { - body_status = try_match(state, &node->next_1, state->text_pos, - &next_body_position); - if (body_status < 0) - return body_status; - - if (body_status == RE_ERROR_FAILURE) - try_body = FALSE; - } else - body_status = RE_ERROR_FAILURE; - - try_tail = (!changed || rp_data->count >= node->values[1]); - if (try_tail) { - tail_status = try_match(state, &node->nonstring.next_2, - state->text_pos, &next_tail_position); - if (tail_status < 0) - return tail_status; - - if (tail_status == RE_ERROR_FAILURE) - try_tail = FALSE; - } else - tail_status = RE_ERROR_FAILURE; - - if (!try_body && !try_tail) { - /* Neither the body nor the tail could match. */ - --rp_data->count; - goto backtrack; - } - - if (body_status < 0 || (body_status == 0 && tail_status < 0)) - return RE_ERROR_PARTIAL; - - /* Record info in case we backtrack into the body. */ - if (!add_backtrack(safe_state, RE_OP_BODY_END)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count - 1; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - - if (try_body) { - /* Both the body and the tail could match. */ - if (try_tail) { - /* The tail takes precedence. If the tail fails to match - * then we want to try the body before backtracking - * further. - */ - - /* Record backtracking info for matching the body. */ - if (!add_backtrack(safe_state, RE_OP_MATCH_BODY)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position = next_body_position; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } else { - /* Only the body could match. */ - - /* Record backtracking info in case the body fails to - * match. - */ - if (!add_backtrack(safe_state, RE_OP_BODY_START)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.text_pos = state->text_pos; - - rp_data->capture_change = state->capture_change; - rp_data->start = state->text_pos; - - /* Advance into the body. */ - node = next_body_position.node; - state->text_pos = next_body_position.text_pos; - } - } else { - /* Only the tail could match. */ - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } - break; - } - case RE_OP_END_LOOKAROUND: /* End of a lookaround subpattern. */ - { - RE_AtomicData* lookaround; - - lookaround = pop_atomic(safe_state); - while (!lookaround->is_lookaround) { - if (lookaround->has_repeats) - drop_repeats(state); - - if (lookaround->has_groups) - drop_groups(state); - - lookaround = pop_atomic(safe_state); - } - state->text_pos = lookaround->text_pos; - state->slice_end = lookaround->slice_end; - state->slice_start = lookaround->slice_start; - - /* Discard any backtracking info from inside the lookaround. */ - state->current_backtrack_block = - lookaround->current_backtrack_block; - state->current_backtrack_block->count = - lookaround->backtrack_count; - state->current_saved_groups = lookaround->saved_groups; - state->current_saved_repeats = lookaround->saved_repeats; - - if (lookaround->node->match) { - /* It's a positive lookaround that's succeeded. We're now going - * to leave the lookaround. - */ - lookaround->backtrack->lookaround.inside = FALSE; - - node = node->next_1.node; - } else { - /* It's a negative lookaround that's succeeded. The groups and - * certain flags may have changed. We need to restore them and - * then backtrack. - */ - if (lookaround->has_repeats) - pop_repeats(state); - - if (lookaround->has_groups) - pop_groups(state); - - state->too_few_errors = - lookaround->backtrack->lookaround.too_few_errors; - state->capture_change = - lookaround->backtrack->lookaround.capture_change; - - discard_backtrack(state); - goto backtrack; - } - break; - } - case RE_OP_END_OF_LINE: /* At the end of a line. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_LINE(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_OF_LINE_U: /* At the end of a line. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_LINE_U(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_OF_STRING: /* At the end of the string. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_STRING(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_STRING_LINE(state, node, - state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_OF_STRING_LINE_U: /* At end of string or final newline. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_STRING_LINE_U(state, node, - state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_OF_WORD: /* At the end of a word. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_WORD(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_FAILURE: /* Failure. */ - goto backtrack; - case RE_OP_FUZZY: /* Fuzzy matching. */ - { - RE_FuzzyInfo* fuzzy_info; - RE_BacktrackData* bt_data; - TRACE(("%s\n", re_op_text[node->op])) - - fuzzy_info = &state->fuzzy_info; - - /* Save the current fuzzy info. */ - if (!add_backtrack(safe_state, RE_OP_FUZZY)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - memmove(&bt_data->fuzzy.fuzzy_info, fuzzy_info, - sizeof(RE_FuzzyInfo)); - bt_data->fuzzy.index = node->values[0]; - bt_data->fuzzy.text_pos = state->text_pos; - - /* Initialise the new fuzzy info. */ - memset(fuzzy_info->counts, 0, 4 * sizeof(fuzzy_info->counts[0])); - fuzzy_info->total_cost = 0; - fuzzy_info->node = node; - - node = node->next_1.node; - break; - } - case RE_OP_GRAPHEME_BOUNDARY: /* On a grapheme boundary. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_GRAPHEME_BOUNDARY(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_GREEDY_REPEAT: /* Greedy repeat. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - RE_BacktrackData* bt_data; - BOOL try_body; - int body_status; - RE_Position next_body_position; - BOOL try_tail; - int tail_status; - RE_Position next_tail_position; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - /* We might need to backtrack into the head, so save the current - * repeat. - */ - if (!add_backtrack(safe_state, RE_OP_GREEDY_REPEAT)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - - /* Initialise the new repeat. */ - rp_data->count = 0; - rp_data->start = state->text_pos; - rp_data->capture_change = state->capture_change; - - /* Could the body or tail match? */ - try_body = node->values[2] > 0 && !is_repeat_guarded(safe_state, - index, state->text_pos, RE_STATUS_BODY); - if (try_body) { - body_status = try_match(state, &node->next_1, state->text_pos, - &next_body_position); - if (body_status < 0) - return body_status; - - if (body_status == RE_ERROR_FAILURE) - try_body = FALSE; - } else - body_status = RE_ERROR_FAILURE; - - try_tail = node->values[1] == 0; - if (try_tail) { - tail_status = try_match(state, &node->nonstring.next_2, - state->text_pos, &next_tail_position); - if (tail_status < 0) - return tail_status; - - if (tail_status == RE_ERROR_FAILURE) - try_tail = FALSE; - } else - tail_status = RE_ERROR_FAILURE; - if (!try_body && !try_tail) - /* Neither the body nor the tail could match. */ - goto backtrack; - - if (body_status < 0 || (body_status == 0 && tail_status < 0)) - return RE_ERROR_PARTIAL; - - if (try_body) { - if (try_tail) { - /* Both the body and the tail could match, but the body - * takes precedence. If the body fails to match then we - * want to try the tail before backtracking further. - */ - - /* Record backtracking info for matching the tail. */ - if (!add_backtrack(safe_state, RE_OP_MATCH_TAIL)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position = next_tail_position; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - } - - /* Advance into the body. */ - node = next_body_position.node; - state->text_pos = next_body_position.text_pos; - } else { - /* Only the tail could match. */ - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } - break; - } - case RE_OP_GREEDY_REPEAT_ONE: /* Greedy repeat for one character. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - size_t count; - BOOL is_partial; - BOOL match; - RE_BacktrackData* bt_data; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - if (is_repeat_guarded(safe_state, index, state->text_pos, - RE_STATUS_BODY)) - goto backtrack; - - /* Count how many times the character repeats, up to the maximum. - */ - count = count_one(state, node->nonstring.next_2.node, - state->text_pos, node->values[2], &is_partial); - if (is_partial) { - state->text_pos += (Py_ssize_t)count * node->step; - return RE_ERROR_PARTIAL; - } - - /* Unmatch until it's not guarded. */ - match = FALSE; - for (;;) { - if (count < node->values[1]) - /* The number of repeats is below the minimum. */ - break; - - if (!is_repeat_guarded(safe_state, index, state->text_pos + - (Py_ssize_t)count * node->step, RE_STATUS_TAIL)) { - /* It's not guarded at this position. */ - match = TRUE; - break; - } - - if (count == 0) - break; - - --count; - } - - if (!match) { - /* The repeat has failed to match at this position. */ - if (!guard_repeat(safe_state, index, state->text_pos, - RE_STATUS_BODY, TRUE)) - return RE_ERROR_MEMORY; - goto backtrack; - } - - if (count > node->values[1]) { - /* Record the backtracking info. */ - if (!add_backtrack(safe_state, RE_OP_GREEDY_REPEAT_ONE)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position.node = node; - bt_data->repeat.index = index; - bt_data->repeat.text_pos = rp_data->start; - bt_data->repeat.count = rp_data->count; - - rp_data->start = state->text_pos; - rp_data->count = count; - } - - /* Advance into the tail. */ - state->text_pos += (Py_ssize_t)count * node->step; - node = node->next_1.node; - break; - } - case RE_OP_GROUP_CALL: /* Group call. */ - { - size_t index; - size_t g; - size_t r; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - index = node->values[0]; - - /* Save the capture groups and repeat guards. */ - if (!push_group_return(safe_state, node->next_1.node)) - return RE_ERROR_MEMORY; - - /* Clear the capture groups for the group call. They'll be restored - * on return. - */ - for (g = 0; g < state->pattern->true_group_count; g++) { - RE_GroupData* group; - - group = &state->groups[g]; - group->span.start = -1; - group->span.end = -1; - group->current_capture = -1; - } - - /* Clear the repeat guards for the group call. They'll be restored - * on return. - */ - for (r = 0; r < state->pattern->repeat_count; r++) { - RE_RepeatData* repeat; - - repeat = &state->repeats[r]; - repeat->body_guard_list.count = 0; - repeat->body_guard_list.last_text_pos = -1; - repeat->tail_guard_list.count = 0; - repeat->tail_guard_list.last_text_pos = -1; - } - - /* Call a group, skipping its CALL_REF node. */ - node = pattern->call_ref_info[index].node->next_1.node; - - if (!add_backtrack(safe_state, RE_OP_GROUP_CALL)) - return RE_ERROR_BACKTRACKING; - break; - } - case RE_OP_GROUP_EXISTS: /* Capture group exists. */ - { - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - * - * A group index of 0, however, means that it's a DEFINE, which we - * should skip. - */ - if (node->values[0] == 0) - /* Skip past the body. */ - node = node->nonstring.next_2.node; - else { - RE_GroupData* group; - - group = &state->groups[node->values[0] - 1]; - if (group->current_capture >= 0) - /* The 'true' branch. */ - node = node->next_1.node; - else - /* The 'false' branch. */ - node = node->nonstring.next_2.node; - } - break; - } - case RE_OP_GROUP_RETURN: /* Group return. */ - { - RE_Node* return_node; - RE_BacktrackData* bt_data; - TRACE(("%s\n", re_op_text[node->op])) - - return_node = top_group_return(state); - - if (!add_backtrack(safe_state, RE_OP_GROUP_RETURN)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->group_call.node = return_node; - bt_data->group_call.capture_change = state->capture_change; - - if (return_node) { - /* The group was called. */ - node = return_node; - - /* Save the groups. */ - if (!push_groups(safe_state)) - return RE_ERROR_MEMORY; - - /* Save the repeats. */ - if (!push_repeats(safe_state)) - return RE_ERROR_MEMORY; - } else - /* The group was not called. */ - node = node->next_1.node; - - pop_group_return(state); - break; - } - case RE_OP_KEEP: /* Keep. */ - { - RE_BacktrackData* bt_data; - TRACE(("%s\n", re_op_text[node->op])) - - if (!add_backtrack(safe_state, RE_OP_KEEP)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->keep.match_pos = state->match_pos; - state->match_pos = state->text_pos; - node = node->next_1.node; - break; - } - case RE_OP_LAZY_REPEAT: /* Lazy repeat. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - RE_BacktrackData* bt_data; - BOOL try_body; - int body_status; - RE_Position next_body_position; - BOOL try_tail; - int tail_status; - RE_Position next_tail_position; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - /* We might need to backtrack into the head, so save the current - * repeat. - */ - if (!add_backtrack(safe_state, RE_OP_LAZY_REPEAT)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - - /* Initialise the new repeat. */ - rp_data->count = 0; - rp_data->start = state->text_pos; - rp_data->capture_change = state->capture_change; - - /* Could the body or tail match? */ - try_body = node->values[2] > 0 && !is_repeat_guarded(safe_state, - index, state->text_pos, RE_STATUS_BODY); - if (try_body) { - body_status = try_match(state, &node->next_1, state->text_pos, - &next_body_position); - if (body_status < 0) - return body_status; - - if (body_status == RE_ERROR_FAILURE) - try_body = FALSE; - } else - body_status = RE_ERROR_FAILURE; - - try_tail = node->values[1] == 0; - if (try_tail) { - tail_status = try_match(state, &node->nonstring.next_2, - state->text_pos, &next_tail_position); - if (tail_status < 0) - return tail_status; - - if (tail_status == RE_ERROR_FAILURE) - try_tail = FALSE; - } else - tail_status = RE_ERROR_FAILURE; - - if (!try_body && !try_tail) - /* Neither the body nor the tail could match. */ - goto backtrack; - - if (body_status < 0 || (body_status == 0 && tail_status < 0)) - return RE_ERROR_PARTIAL; - - if (try_body) { - if (try_tail) { - /* Both the body and the tail could match, but the tail - * takes precedence. If the tail fails to match then we - * want to try the body before backtracking further. - */ - - /* Record backtracking info for matching the tail. */ - if (!add_backtrack(safe_state, RE_OP_MATCH_BODY)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position = next_body_position; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } else { - /* Advance into the body. */ - node = next_body_position.node; - state->text_pos = next_body_position.text_pos; - } - } else { - /* Only the tail could match. */ - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } - break; - } - case RE_OP_LAZY_REPEAT_ONE: /* Lazy repeat for one character. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - size_t count; - BOOL is_partial; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - if (is_repeat_guarded(safe_state, index, state->text_pos, - RE_STATUS_BODY)) - goto backtrack; - - /* Count how many times the character repeats, up to the minimum. - */ - count = count_one(state, node->nonstring.next_2.node, - state->text_pos, node->values[1], &is_partial); - if (is_partial) { - state->text_pos += (Py_ssize_t)count * node->step; - return RE_ERROR_PARTIAL; - } - - /* Have we matched at least the minimum? */ - if (count < node->values[1]) { - /* The repeat has failed to match at this position. */ - if (!guard_repeat(safe_state, index, state->text_pos, - RE_STATUS_BODY, TRUE)) - return RE_ERROR_MEMORY; - goto backtrack; - } - - if (count < node->values[2]) { - /* The match is shorter than the maximum, so we might need to - * backtrack the repeat to consume more. - */ - RE_BacktrackData* bt_data; - - /* Get the offset to the repeat values in the context. */ - rp_data = &state->repeats[index]; - if (!add_backtrack(safe_state, RE_OP_LAZY_REPEAT_ONE)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position.node = node; - bt_data->repeat.index = index; - bt_data->repeat.text_pos = rp_data->start; - bt_data->repeat.count = rp_data->count; - - rp_data->start = state->text_pos; - rp_data->count = count; - } - - /* Advance into the tail. */ - state->text_pos += (Py_ssize_t)count * node->step; - node = node->next_1.node; - break; - } - case RE_OP_LOOKAROUND: /* Start of a lookaround subpattern. */ - { - RE_AtomicData* lookaround; - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - if (!add_backtrack(safe_state, RE_OP_LOOKAROUND)) - return RE_ERROR_BACKTRACKING; - state->backtrack->lookaround.too_few_errors = - state->too_few_errors; - state->backtrack->lookaround.capture_change = - state->capture_change; - state->backtrack->lookaround.inside = TRUE; - state->backtrack->lookaround.node = node; - - lookaround = push_atomic(safe_state); - if (!lookaround) - return RE_ERROR_MEMORY; - lookaround->backtrack_count = - state->current_backtrack_block->count; - lookaround->current_backtrack_block = - state->current_backtrack_block; - lookaround->slice_start = state->slice_start; - lookaround->slice_end = state->slice_end; - lookaround->text_pos = state->text_pos; - lookaround->node = node; - lookaround->backtrack = state->backtrack; - lookaround->is_lookaround = TRUE; - lookaround->has_groups = (node->status & RE_STATUS_HAS_GROUPS) != - 0; - lookaround->has_repeats = (node->status & RE_STATUS_HAS_REPEATS) != - 0; - - /* Save the groups and repeats. */ - if (lookaround->has_groups && !push_groups(safe_state)) - return RE_ERROR_MEMORY; - - if (lookaround->has_repeats && !push_repeats(safe_state)) - return RE_ERROR_MEMORY; - - lookaround->saved_groups = state->current_saved_groups; - lookaround->saved_repeats = state->current_saved_repeats; - - state->slice_start = 0; - state->slice_end = state->text_length; - - node = node->next_1.node; - break; - } - case RE_OP_PROPERTY: /* A property. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - matches_PROPERTY(encoding, locale_info, node, - char_at(state->text, state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_PROPERTY_IGN: /* A property, ignoring case. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - matches_PROPERTY_IGN(encoding, locale_info, node, - char_at(state->text, state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_PROPERTY_IGN_REV: /* A property, backwards, ignoring case. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_PROPERTY_IGN(encoding, locale_info, node, - char_at(state->text, state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_PROPERTY_REV: /* A property, backwards. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_PROPERTY(encoding, locale_info, node, - char_at(state->text, state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_PRUNE: /* Prune the backtracking. */ - TRACE(("%s\n", re_op_text[node->op])) - - prune_backtracking(state); - - node = node->next_1.node; - break; - case RE_OP_RANGE: /* A range. */ - TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, - node->values[0], node->values[1])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && matches_RANGE(encoding, - locale_info, node, char_at(state->text, state->text_pos)) == - node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_RANGE_IGN: /* A range, ignoring case. */ - TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, - node->values[0], node->values[1])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - matches_RANGE_IGN(encoding, locale_info, node, - char_at(state->text, state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_RANGE_IGN_REV: /* A range, backwards, ignoring case. */ - TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, - node->values[0], node->values[1])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_RANGE_IGN(encoding, locale_info, node, - char_at(state->text, state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_RANGE_REV: /* A range, backwards. */ - TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, - node->values[0], node->values[1])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && matches_RANGE(encoding, - locale_info, node, char_at(state->text, state->text_pos - 1)) == - node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_REF_GROUP: /* Reference to a capture group. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - if (string_pos < 0) - string_pos = span->start; - - /* Try comparing. */ - while (string_pos < span->end) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - same_char(char_at(state->text, state->text_pos), - char_at(state->text, string_pos))) { - ++string_pos; - ++state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_REF_GROUP_FLD: /* Reference to a capture group, ignoring case. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, - Py_UCS4* folded); - int folded_len; - int gfolded_len; - Py_UCS4 folded[RE_MAX_FOLDED]; - Py_UCS4 gfolded[RE_MAX_FOLDED]; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - full_case_fold = encoding->full_case_fold; - - if (string_pos < 0) { - string_pos = span->start; - folded_pos = 0; - folded_len = 0; - gfolded_pos = 0; - gfolded_len = 0; - } else { - folded_len = full_case_fold(locale_info, char_at(state->text, - state->text_pos), folded); - gfolded_len = full_case_fold(locale_info, char_at(state->text, - string_pos), gfolded); - } - - /* Try comparing. */ - while (string_pos < span->end) { - /* Case-fold at current position in text. */ - if (folded_pos >= folded_len) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end) - folded_len = full_case_fold(locale_info, - char_at(state->text, state->text_pos), folded); - else - folded_len = 0; - - folded_pos = 0; - } - - /* Case-fold at current position in group. */ - if (gfolded_pos >= gfolded_len) { - gfolded_len = full_case_fold(locale_info, - char_at(state->text, string_pos), gfolded); - gfolded_pos = 0; - } - - if (folded_pos < folded_len && same_char_ign(encoding, - locale_info, - folded[folded_pos], - gfolded[gfolded_pos])) { - ++folded_pos; - ++gfolded_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_group_fld(safe_state, search, - &state->text_pos, node, &folded_pos, folded_len, - &string_pos, &gfolded_pos, gfolded_len, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - - if (folded_pos >= folded_len && folded_len > 0) - ++state->text_pos; - - if (gfolded_pos >= gfolded_len) - ++string_pos; - } - - string_pos = -1; - - if (folded_pos < folded_len || gfolded_pos < gfolded_len) - goto backtrack; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_REF_GROUP_FLD_REV: /* Reference to a capture group, ignoring case. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, - Py_UCS4* folded); - int folded_len; - int gfolded_len; - Py_UCS4 folded[RE_MAX_FOLDED]; - Py_UCS4 gfolded[RE_MAX_FOLDED]; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - full_case_fold = encoding->full_case_fold; - - if (string_pos < 0) { - string_pos = span->end; - folded_pos = 0; - folded_len = 0; - gfolded_pos = 0; - gfolded_len = 0; - } else { - folded_len = full_case_fold(locale_info, char_at(state->text, - state->text_pos - 1), folded); - gfolded_len = full_case_fold(locale_info, char_at(state->text, - string_pos - 1), gfolded); - } - - /* Try comparing. */ - while (string_pos > span->start) { - /* Case-fold at current position in text. */ - if (folded_pos <= 0) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start) - folded_len = full_case_fold(locale_info, - char_at(state->text, state->text_pos - 1), folded); - else - folded_len = 0; - - folded_pos = folded_len; - } - - /* Case-fold at current position in group. */ - if (gfolded_pos <= 0) { - gfolded_len = full_case_fold(locale_info, - char_at(state->text, string_pos - 1), gfolded); - gfolded_pos = gfolded_len; - } - - if (folded_pos > 0 && same_char_ign(encoding, locale_info, - folded[folded_pos - 1], - gfolded[gfolded_pos - 1])) { - --folded_pos; - --gfolded_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_group_fld(safe_state, search, - &state->text_pos, node, &folded_pos, folded_len, - &string_pos, &gfolded_pos, gfolded_len, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - - if (folded_pos <= 0 && folded_len > 0) - --state->text_pos; - - if (gfolded_pos <= 0) - --string_pos; - } - - string_pos = -1; - - if (folded_pos > 0 || gfolded_pos > 0) - goto backtrack; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_REF_GROUP_IGN: /* Reference to a capture group, ignoring case. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - if (string_pos < 0) - string_pos = span->start; - - /* Try comparing. */ - while (string_pos < span->end) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - same_char_ign(encoding, locale_info, char_at(state->text, - state->text_pos), char_at(state->text, string_pos))) { - ++string_pos; - ++state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_REF_GROUP_IGN_REV: /* Reference to a capture group, ignoring case. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - if (string_pos < 0) - string_pos = span->end; - - /* Try comparing. */ - while (string_pos > span->start) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - same_char_ign(encoding, locale_info, char_at(state->text, - state->text_pos - 1), char_at(state->text, string_pos - 1))) - { - --string_pos; - --state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_REF_GROUP_REV: /* Reference to a capture group. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - if (string_pos < 0) - string_pos = span->end; - - /* Try comparing. */ - while (string_pos > span->start) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - same_char(char_at(state->text, state->text_pos - 1), - char_at(state->text, string_pos - 1))) { - --string_pos; - --state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - if (state->text_pos == state->search_anchor) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_SET_DIFF: /* Character set. */ - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && matches_SET(encoding, - locale_info, node, char_at(state->text, state->text_pos)) == - node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_SET_DIFF_IGN: /* Character set, ignoring case. */ - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && matches_SET_IGN(encoding, - locale_info, node, char_at(state->text, state->text_pos)) == - node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_SET_DIFF_IGN_REV: /* Character set, ignoring case. */ - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_SET_IGN(encoding, locale_info, node, char_at(state->text, - state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_SET_DIFF_REV: /* Character set. */ - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && matches_SET(encoding, - locale_info, node, char_at(state->text, state->text_pos - 1)) == - node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_SKIP: /* Skip the part of the text already matched. */ - TRACE(("%s\n", re_op_text[node->op])) - - if (node->status & RE_STATUS_REVERSE) - state->slice_end = state->text_pos; - else - state->slice_start = state->text_pos; - - prune_backtracking(state); - node = node->next_1.node; - break; - case RE_OP_START_GROUP: /* Start of a capture group. */ - { - RE_CODE private_index; - RE_CODE public_index; - RE_GroupData* group; - RE_BacktrackData* bt_data; - TRACE(("%s %d\n", re_op_text[node->op], node->values[1])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - */ - private_index = node->values[0]; - public_index = node->values[1]; - group = &state->groups[private_index - 1]; - - if (!add_backtrack(safe_state, RE_OP_START_GROUP)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->group.private_index = private_index; - bt_data->group.public_index = public_index; - bt_data->group.text_pos = group->span.start; - bt_data->group.capture = (BOOL)node->values[2]; - bt_data->group.current_capture = group->current_capture; - - if (pattern->group_info[private_index - 1].referenced && - group->span.start != state->text_pos) - ++state->capture_change; - group->span.start = state->text_pos; - - /* Save the capture? */ - if (node->values[2]) { - group->current_capture = (Py_ssize_t)group->capture_count; - if (!save_capture(safe_state, private_index, public_index)) - return RE_ERROR_MEMORY; - } - - node = node->next_1.node; - break; - } - case RE_OP_START_OF_LINE: /* At the start of a line. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_START_OF_LINE(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_START_OF_LINE_U: /* At the start of a line. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_START_OF_LINE_U(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_START_OF_STRING: /* At the start of the string. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_START_OF_STRING(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_START_OF_WORD: /* At the start of a word. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_START_OF_WORD(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_STRING: /* A string. */ - { - Py_ssize_t length; - RE_CODE* values; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - if (string_pos < 0) - string_pos = 0; - - values = node->values; - - /* Try comparing. */ - while (string_pos < length) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - same_char(char_at(state->text, state->text_pos), - values[string_pos])) { - ++string_pos; - ++state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_FLD: /* A string, ignoring case. */ - { - Py_ssize_t length; - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, - Py_UCS4* folded); - RE_CODE* values; - int folded_len; - Py_UCS4 folded[RE_MAX_FOLDED]; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - full_case_fold = encoding->full_case_fold; - - if (string_pos < 0) { - string_pos = 0; - folded_pos = 0; - folded_len = 0; - } else { - folded_len = full_case_fold(locale_info, - char_at(state->text, state->text_pos), folded); - if (folded_pos >= folded_len) { - if (state->text_pos >= state->slice_end) - goto backtrack; - - ++state->text_pos; - folded_pos = 0; - folded_len = 0; - } - } - - values = node->values; - - /* Try comparing. */ - while (string_pos < length) { - if (folded_pos >= folded_len) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end) - folded_len = full_case_fold(locale_info, - char_at(state->text, state->text_pos), folded); - else - folded_len = 0; - - folded_pos = 0; - } - - if (folded_pos < folded_len && same_char_ign(encoding, - locale_info, folded[folded_pos], values[string_pos])) { - ++string_pos; - ++folded_pos; - - if (folded_pos >= folded_len) - ++state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string_fld(safe_state, search, - &state->text_pos, node, &string_pos, &folded_pos, - folded_len, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - - if (folded_pos >= folded_len && folded_len > 0) - ++state->text_pos; - } else { - string_pos = -1; - goto backtrack; - } - } - - if (node->status & RE_STATUS_FUZZY) { - while (folded_pos < folded_len) { - BOOL matched; - - if (!fuzzy_match_string_fld(safe_state, search, - &state->text_pos, node, &string_pos, &folded_pos, - folded_len, &matched, 1)) - return RE_ERROR_BACKTRACKING; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - - if (folded_pos >= folded_len && folded_len > 0) - ++state->text_pos; - } - } - - string_pos = -1; - - if (folded_pos < folded_len) - goto backtrack; - } - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_FLD_REV: /* A string, ignoring case. */ - { - Py_ssize_t length; - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, - Py_UCS4* folded); - RE_CODE* values; - int folded_len; - Py_UCS4 folded[RE_MAX_FOLDED]; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - full_case_fold = encoding->full_case_fold; - - if (string_pos < 0) { - string_pos = length; - folded_pos = 0; - folded_len = 0; - } else { - folded_len = full_case_fold(locale_info, - char_at(state->text, state->text_pos - 1), folded); - if (folded_pos <= 0) { - if (state->text_pos <= state->slice_start) - goto backtrack; - - --state->text_pos; - folded_pos = 0; - folded_len = 0; - } - } - - values = node->values; - - /* Try comparing. */ - while (string_pos > 0) { - if (folded_pos <= 0) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start) - folded_len = full_case_fold(locale_info, - char_at(state->text, state->text_pos - 1), - folded); - else - folded_len = 0; - - folded_pos = folded_len; - } - - if (folded_pos > 0 && same_char_ign(encoding, locale_info, - folded[folded_pos - 1], values[string_pos - 1])) { - --string_pos; - --folded_pos; - - if (folded_pos <= 0) - --state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string_fld(safe_state, search, - &state->text_pos, node, &string_pos, &folded_pos, - folded_len, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - - if (folded_pos <= 0 && folded_len > 0) - --state->text_pos; - } else { - string_pos = -1; - goto backtrack; - } - } - - if (node->status & RE_STATUS_FUZZY) { - while (folded_pos > 0) { - BOOL matched; - - if (!fuzzy_match_string_fld(safe_state, search, - &state->text_pos, node, &string_pos, &folded_pos, - folded_len, &matched, -1)) - return RE_ERROR_BACKTRACKING; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - - if (folded_pos <= 0 && folded_len > 0) - --state->text_pos; - } - } - - string_pos = -1; - - if (folded_pos > 0) - goto backtrack; - } - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_IGN: /* A string, ignoring case. */ - { - Py_ssize_t length; - RE_CODE* values; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - if (string_pos < 0) - string_pos = 0; - - values = node->values; - - /* Try comparing. */ - while (string_pos < length) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - same_char_ign(encoding, locale_info, char_at(state->text, - state->text_pos), values[string_pos])) { - ++string_pos; - ++state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_IGN_REV: /* A string, ignoring case. */ - { - Py_ssize_t length; - RE_CODE* values; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - if (string_pos < 0) - string_pos = length; - - values = node->values; - - /* Try comparing. */ - while (string_pos > 0) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - same_char_ign(encoding, locale_info, char_at(state->text, - state->text_pos - 1), values[string_pos - 1])) { - --string_pos; - --state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_REV: /* A string. */ - { - Py_ssize_t length; - RE_CODE* values; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - if (string_pos < 0) - string_pos = length; - - values = node->values; - - /* Try comparing. */ - while (string_pos > 0) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - same_char(char_at(state->text, state->text_pos - 1), - values[string_pos - 1])) { - --string_pos; - --state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET: /* Member of a string set. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_fwdrev(safe_state, node, FALSE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET_FLD: /* Member of a string set, ignoring case. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_fld_fwdrev(safe_state, node, FALSE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET_FLD_REV: /* Member of a string set, ignoring case. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_fld_fwdrev(safe_state, node, TRUE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET_IGN: /* Member of a string set, ignoring case. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_ign_fwdrev(safe_state, node, FALSE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET_IGN_REV: /* Member of a string set, ignoring case. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_ign_fwdrev(safe_state, node, TRUE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET_REV: /* Member of a string set. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_fwdrev(safe_state, node, TRUE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_SUCCESS: /* Success. */ - /* Must the match advance past its start? */ - TRACE(("%s\n", re_op_text[node->op])) - - if (state->text_pos == state->search_anchor && state->must_advance) - goto backtrack; - - if (state->match_all) { - /* We want to match all of the slice. */ - if (state->reverse) { - if (state->text_pos != state->slice_start) - goto backtrack; - } else { - if (state->text_pos != state->slice_end) - goto backtrack; - } - } - - if (state->pattern->flags & RE_FLAG_POSIX) { - /* If we're looking for a POSIX match, check whether this one - * is better and then keep looking. - */ - if (!check_posix_match(safe_state)) - return RE_ERROR_MEMORY; - - goto backtrack; - } - - return RE_ERROR_SUCCESS; - default: /* Illegal opcode! */ - TRACE(("UNKNOWN OP %d\n", node->op)) - return RE_ERROR_ILLEGAL; - } - } - -backtrack: - for (;;) { - RE_BacktrackData* bt_data; - TRACE(("BACKTRACK ")) - - /* Should we abort the matching? */ - ++state->iterations; - - if (state->iterations == 0 && safe_check_signals(safe_state)) - return RE_ERROR_INTERRUPTED; - - bt_data = last_backtrack(state); - - switch (bt_data->op) { - case RE_OP_ANY: /* Any character except a newline. */ - case RE_OP_ANY_ALL: /* Any character at all. */ - case RE_OP_ANY_ALL_REV: /* Any character at all, backwards. */ - case RE_OP_ANY_REV: /* Any character except a newline, backwards. */ - case RE_OP_ANY_U: /* Any character except a line separator. */ - case RE_OP_ANY_U_REV: /* Any character except a line separator, backwards. */ - case RE_OP_CHARACTER: /* A character. */ - case RE_OP_CHARACTER_IGN: /* A character, ignoring case. */ - case RE_OP_CHARACTER_IGN_REV: /* A character, ignoring case, backwards. */ - case RE_OP_CHARACTER_REV: /* A character, backwards. */ - case RE_OP_PROPERTY: /* A property. */ - case RE_OP_PROPERTY_IGN: /* A property, ignoring case. */ - case RE_OP_PROPERTY_IGN_REV: /* A property, ignoring case, backwards. */ - case RE_OP_PROPERTY_REV: /* A property, backwards. */ - case RE_OP_RANGE: /* A range. */ - case RE_OP_RANGE_IGN: /* A range, ignoring case. */ - case RE_OP_RANGE_IGN_REV: /* A range, ignoring case, backwards. */ - case RE_OP_RANGE_REV: /* A range, backwards. */ - case RE_OP_SET_DIFF: /* Set difference. */ - case RE_OP_SET_DIFF_IGN: /* Set difference, ignoring case. */ - case RE_OP_SET_DIFF_IGN_REV: /* Set difference, ignoring case, backwards. */ - case RE_OP_SET_DIFF_REV: /* Set difference, backwards. */ - case RE_OP_SET_INTER: /* Set intersection. */ - case RE_OP_SET_INTER_IGN: /* Set intersection, ignoring case. */ - case RE_OP_SET_INTER_IGN_REV: /* Set intersection, ignoring case, backwards. */ - case RE_OP_SET_INTER_REV: /* Set intersection, backwards. */ - case RE_OP_SET_SYM_DIFF: /* Set symmetric difference. */ - case RE_OP_SET_SYM_DIFF_IGN: /* Set symmetric difference, ignoring case. */ - case RE_OP_SET_SYM_DIFF_IGN_REV: /* Set symmetric difference, ignoring case, backwards. */ - case RE_OP_SET_SYM_DIFF_REV: /* Set symmetric difference, backwards. */ - case RE_OP_SET_UNION: /* Set union. */ - case RE_OP_SET_UNION_IGN: /* Set union, ignoring case. */ - case RE_OP_SET_UNION_IGN_REV: /* Set union, ignoring case, backwards. */ - case RE_OP_SET_UNION_REV: /* Set union, backwards. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - status = retry_fuzzy_match_item(safe_state, search, - &state->text_pos, &node, TRUE); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (node) - goto advance; - break; - case RE_OP_ATOMIC: /* Start of an atomic group. */ - { - RE_AtomicData* atomic; - TRACE(("%s\n", re_op_text[bt_data->op])) - - /* Backtrack to the start of an atomic group. */ - atomic = pop_atomic(safe_state); - - if (atomic->has_repeats) - pop_repeats(state); - - if (atomic->has_groups) - pop_groups(state); - - state->too_few_errors = bt_data->atomic.too_few_errors; - state->capture_change = bt_data->atomic.capture_change; - state->current_group_call_frame = atomic->call_frame; - - discard_backtrack(state); - break; - } - case RE_OP_BODY_END: - { - RE_RepeatData* rp_data; - TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) - - /* We're backtracking into the body. */ - rp_data = &state->repeats[bt_data->repeat.index]; - - /* Restore the repeat info. */ - rp_data->count = bt_data->repeat.count; - rp_data->start = bt_data->repeat.start; - rp_data->capture_change = bt_data->repeat.capture_change; - - discard_backtrack(state); - break; - } - case RE_OP_BODY_START: - { - TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) - - /* The body may have failed to match at this position. */ - if (!guard_repeat(safe_state, bt_data->repeat.index, - bt_data->repeat.text_pos, RE_STATUS_BODY, TRUE)) - return RE_ERROR_MEMORY; - - discard_backtrack(state); - break; - } - case RE_OP_BOUNDARY: /* On a word boundary. */ - case RE_OP_DEFAULT_BOUNDARY: /* On a default word boundary. */ - case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ - case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ - case RE_OP_END_OF_LINE: /* At the end of a line. */ - case RE_OP_END_OF_LINE_U: /* At the end of a line. */ - case RE_OP_END_OF_STRING: /* At the end of the string. */ - case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ - case RE_OP_END_OF_STRING_LINE_U: /* At end of string or final newline. */ - case RE_OP_END_OF_WORD: /* At end of a word. */ - case RE_OP_GRAPHEME_BOUNDARY: /* On a grapheme boundary. */ - case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ - case RE_OP_START_OF_LINE: /* At the start of a line. */ - case RE_OP_START_OF_LINE_U: /* At the start of a line. */ - case RE_OP_START_OF_STRING: /* At the start of the string. */ - case RE_OP_START_OF_WORD: /* At start of a word. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - status = retry_fuzzy_match_item(safe_state, search, - &state->text_pos, &node, FALSE); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (node) - goto advance; - break; - case RE_OP_BRANCH: /* 2-way branch. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - node = bt_data->branch.position.node; - state->text_pos = bt_data->branch.position.text_pos; - discard_backtrack(state); - goto advance; - case RE_OP_CALL_REF: /* A group call ref. */ - case RE_OP_GROUP_CALL: /* Group call. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - pop_group_return(state); - discard_backtrack(state); - break; - case RE_OP_CONDITIONAL: /* Conditional subpattern. */ - { - TRACE(("%s\n", re_op_text[bt_data->op])) - - if (bt_data->lookaround.inside) { - /* Backtracked to the start of a lookaround. */ - RE_AtomicData* conditional; - - conditional = pop_atomic(safe_state); - state->text_pos = conditional->text_pos; - state->slice_end = conditional->slice_end; - state->slice_start = conditional->slice_start; - state->current_backtrack_block = - conditional->current_backtrack_block; - state->current_backtrack_block->count = - conditional->backtrack_count; - - /* Restore the groups and repeats and certain flags. */ - if (conditional->has_repeats) - pop_repeats(state); - - if (conditional->has_groups) - pop_groups(state); - - state->too_few_errors = bt_data->lookaround.too_few_errors; - state->capture_change = bt_data->lookaround.capture_change; - - if (bt_data->lookaround.node->match) { - /* It's a positive lookaround that's failed. - * - * Go to the 'false' branch. - */ - node = bt_data->lookaround.node->nonstring.next_2.node; - } else { - /* It's a negative lookaround that's failed. - * - * Go to the 'true' branch. - */ - node = bt_data->lookaround.node->nonstring.next_2.node; - } - - discard_backtrack(state); - - goto advance; - } else { - /* Backtracked to a lookaround. If it's a positive lookaround - * that succeeded, we need to restore the groups; if it's a - * negative lookaround that failed, it would have completely - * backtracked inside and already restored the groups. We also - * need to restore certain flags. - */ - RE_Node* node; - - node = bt_data->lookaround.node; - if (node->match && (node->status & RE_STATUS_HAS_GROUPS)) - pop_groups(state); - - state->too_few_errors = bt_data->lookaround.too_few_errors; - state->capture_change = bt_data->lookaround.capture_change; - - discard_backtrack(state); - } - break; - } - case RE_OP_END_FUZZY: /* End of fuzzy matching. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - state->total_fuzzy_counts[RE_FUZZY_SUB] -= - state->fuzzy_info.counts[RE_FUZZY_SUB]; - state->total_fuzzy_counts[RE_FUZZY_INS] -= - state->fuzzy_info.counts[RE_FUZZY_INS]; - state->total_fuzzy_counts[RE_FUZZY_DEL] -= - state->fuzzy_info.counts[RE_FUZZY_DEL]; - - /* We need to retry the fuzzy match. */ - status = retry_fuzzy_insert(safe_state, &state->text_pos, &node); - if (status < 0) - return RE_ERROR_PARTIAL; - - /* If there were too few errors, in the fuzzy section, try again. - */ - if (state->too_few_errors) { - state->too_few_errors = FALSE; - goto backtrack; - } - - if (node) { - state->total_fuzzy_counts[RE_FUZZY_SUB] += - state->fuzzy_info.counts[RE_FUZZY_SUB]; - state->total_fuzzy_counts[RE_FUZZY_INS] += - state->fuzzy_info.counts[RE_FUZZY_INS]; - state->total_fuzzy_counts[RE_FUZZY_DEL] += - state->fuzzy_info.counts[RE_FUZZY_DEL]; - - node = node->next_1.node; - goto advance; - } - break; - case RE_OP_END_GROUP: /* End of a capture group. */ - { - RE_CODE private_index; - RE_GroupData* group; - TRACE(("%s %d\n", re_op_text[bt_data->op], - bt_data->group.public_index)) - - private_index = bt_data->group.private_index; - group = &state->groups[private_index - 1]; - - /* Unsave the capture? */ - if (bt_data->group.capture) - unsave_capture(state, bt_data->group.private_index, - bt_data->group.public_index); - - if (pattern->group_info[private_index - 1].referenced && - group->span.end != bt_data->group.text_pos) - --state->capture_change; - group->span.end = bt_data->group.text_pos; - group->current_capture = bt_data->group.current_capture; - - discard_backtrack(state); - break; - } - case RE_OP_FAILURE: /* Failure. */ - { - TRACE(("%s\n", re_op_text[bt_data->op])) - - /* Have we been looking for a POSIX match? */ - if (state->found_match) { - restore_best_match(safe_state); - return RE_OP_SUCCESS; - } - - /* Do we have to advance? */ - if (!search) - return RE_ERROR_FAILURE; - - /* Can we advance? */ - state->text_pos = state->match_pos; - - if (state->reverse) { - if (state->text_pos <= state->slice_start) - return RE_ERROR_FAILURE; - } else { - if (state->text_pos >= state->slice_end) - return RE_ERROR_FAILURE; - } - - /* Skip over any repeated leading characters. */ - switch (start_node->op) { - case RE_OP_GREEDY_REPEAT_ONE: - case RE_OP_LAZY_REPEAT_ONE: - { - size_t count; - BOOL is_partial; - - /* How many characters did the repeat actually match? */ - count = count_one(state, start_node->nonstring.next_2.node, - state->text_pos, start_node->values[2], &is_partial); - - /* If it's fewer than the maximum then skip over those - * characters. - */ - if (count < start_node->values[2]) - state->text_pos += (Py_ssize_t)count * pattern_step; - break; - } - } - - /* Advance and try to match again. e also need to check whether we - * need to skip. - */ - if (state->reverse) { - if (state->text_pos > state->slice_end) - state->text_pos = state->slice_end; - else - --state->text_pos; - } else { - if (state->text_pos < state->slice_start) - state->text_pos = state->slice_start; - else - ++state->text_pos; - } - - /* Clear the groups. */ - clear_groups(state); - - /* Reset the guards. */ - reset_guards(state); - - goto start_match; - } - case RE_OP_FUZZY: /* Fuzzy matching. */ - { - RE_FuzzyInfo* fuzzy_info; - TRACE(("%s\n", re_op_text[bt_data->op])) - - /* Restore the previous fuzzy info. */ - fuzzy_info = &state->fuzzy_info; - memmove(fuzzy_info, &bt_data->fuzzy.fuzzy_info, - sizeof(RE_FuzzyInfo)); - - discard_backtrack(state); - break; - } - case RE_OP_GREEDY_REPEAT: /* Greedy repeat. */ - case RE_OP_LAZY_REPEAT: /* Lazy repeat. */ - { - RE_RepeatData* rp_data; - TRACE(("%s\n", re_op_text[bt_data->op])) - - /* The repeat failed to match. */ - rp_data = &state->repeats[bt_data->repeat.index]; - - /* The body may have failed to match at this position. */ - if (!guard_repeat(safe_state, bt_data->repeat.index, - bt_data->repeat.text_pos, RE_STATUS_BODY, TRUE)) - return RE_ERROR_MEMORY; - - /* Restore the previous repeat. */ - rp_data->count = bt_data->repeat.count; - rp_data->start = bt_data->repeat.start; - rp_data->capture_change = bt_data->repeat.capture_change; - - discard_backtrack(state); - break; - } - case RE_OP_GREEDY_REPEAT_ONE: /* Greedy repeat for one character. */ - { - RE_RepeatData* rp_data; - size_t count; - Py_ssize_t step; - Py_ssize_t pos; - Py_ssize_t limit; - RE_Node* test; - BOOL match; - BOOL m; - size_t index; - TRACE(("%s\n", re_op_text[bt_data->op])) - - node = bt_data->repeat.position.node; - - rp_data = &state->repeats[bt_data->repeat.index]; - - /* Unmatch one character at a time until the tail could match or we - * have reached the minimum. - */ - state->text_pos = rp_data->start; - - count = rp_data->count; - step = node->step; - pos = state->text_pos + (Py_ssize_t)count * step; - limit = state->text_pos + (Py_ssize_t)node->values[1] * step; - - /* The tail failed to match at this position. */ - if (!guard_repeat(safe_state, bt_data->repeat.index, pos, - RE_STATUS_TAIL, TRUE)) - return RE_ERROR_MEMORY; - - /* A (*SKIP) might have change the size of the slice. */ - if (step > 0) { - if (limit < state->slice_start) - limit = state->slice_start; - } else { - if (limit > state->slice_end) - limit = state->slice_end; - } - - if (pos == limit) { - /* We've backtracked the repeat as far as we can. */ - rp_data->start = bt_data->repeat.text_pos; - rp_data->count = bt_data->repeat.count; - discard_backtrack(state); - break; - } - - test = node->next_1.test; - - m = test->match; - index = node->values[0]; - - match = FALSE; - - if (test->status & RE_STATUS_FUZZY) { - for (;;) { - int status; - RE_Position next_position; - - pos -= step; - - status = try_match(state, &node->next_1, pos, - &next_position); - if (status < 0) - return status; - - if (status != RE_ERROR_FAILURE && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - } - } else { - /* A repeated single-character match is often followed by a - * literal, so checking specially for it can be a good - * optimisation when working with long strings. - */ - switch (test->op) { - case RE_OP_CHARACTER: - { - Py_UCS4 ch; - - ch = test->values[0]; - - for (;;) { - --pos; - - if (same_char(char_at(state->text, pos), ch) == m && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - - } - break; - } - case RE_OP_CHARACTER_IGN: - { - Py_UCS4 ch; - - ch = test->values[0]; - - for (;;) { - --pos; - - if (same_char_ign(encoding, locale_info, - char_at(state->text, pos), ch) == m && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - - } - break; - } - case RE_OP_CHARACTER_IGN_REV: - { - Py_UCS4 ch; - - ch = test->values[0]; - - for (;;) { - ++pos; - - if (same_char_ign(encoding, locale_info, - char_at(state->text, pos - 1), ch) == m && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - - } - break; - } - case RE_OP_CHARACTER_REV: - { - Py_UCS4 ch; - - ch = test->values[0]; - - for (;;) { - ++pos; - - if (same_char(char_at(state->text, pos - 1), ch) == m - && !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - - } - break; - } - case RE_OP_STRING: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = min_ssize_t(pos - 1, state->slice_end - length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos < limit) - break; - - found = string_search_rev(safe_state, test, pos + - length, limit, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found - length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - --pos; - } - break; - } - case RE_OP_STRING_FLD: - { - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 - ch, Py_UCS4* folded); - Py_ssize_t folded_length; - size_t i; - Py_UCS4 folded[RE_MAX_FOLDED]; - - full_case_fold = encoding->full_case_fold; - - folded_length = 0; - for (i = 0; i < test->value_count; i++) - folded_length += full_case_fold(locale_info, - test->values[i], folded); - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = min_ssize_t(pos - 1, state->slice_end - - folded_length); - - for (;;) { - Py_ssize_t found; - Py_ssize_t new_pos; - BOOL is_partial; - - if (pos < limit) - break; - - found = string_search_fld_rev(safe_state, test, pos + - folded_length, limit, &new_pos, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found - folded_length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - --pos; - } - break; - } - case RE_OP_STRING_FLD_REV: - { - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 - ch, Py_UCS4* folded); - Py_ssize_t folded_length; - size_t i; - Py_UCS4 folded[RE_MAX_FOLDED]; - - full_case_fold = encoding->full_case_fold; - - folded_length = 0; - for (i = 0; i < test->value_count; i++) - folded_length += full_case_fold(locale_info, - test->values[i], folded); - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = max_ssize_t(pos + 1, state->slice_start + - folded_length); - - for (;;) { - Py_ssize_t found; - Py_ssize_t new_pos; - BOOL is_partial; - - if (pos > limit) - break; - - found = string_search_fld(safe_state, test, pos - - folded_length, limit, &new_pos, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found + folded_length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - ++pos; - } - break; - } - case RE_OP_STRING_IGN: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = min_ssize_t(pos - 1, state->slice_end - length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos < limit) - break; - - found = string_search_ign_rev(safe_state, test, pos + - length, limit, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found - length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - --pos; - } - break; - } - case RE_OP_STRING_IGN_REV: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = max_ssize_t(pos + 1, state->slice_start + length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos > limit) - break; - - found = string_search_ign(safe_state, test, pos - - length, limit, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found + length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - ++pos; - } - break; - } - case RE_OP_STRING_REV: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = max_ssize_t(pos + 1, state->slice_start + length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos > limit) - break; - - found = string_search(safe_state, test, pos - length, - limit, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found + length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - ++pos; - } - break; - } - default: - for (;;) { - RE_Position next_position; - - pos -= step; - - status = try_match(state, &node->next_1, pos, - &next_position); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - } - break; - } - } - - if (match) { - count = (size_t)abs_ssize_t(pos - state->text_pos); - - /* The tail could match. */ - if (count > node->values[1]) - /* The match is longer than the minimum, so we might need - * to backtrack the repeat again to consume less. - */ - rp_data->count = count; - else { - /* We've reached or passed the minimum, so we won't need to - * backtrack the repeat again. - */ - rp_data->start = bt_data->repeat.text_pos; - rp_data->count = bt_data->repeat.count; - discard_backtrack(state); - - /* Have we passed the minimum? */ - if (count < node->values[1]) - goto backtrack; - } - - node = node->next_1.node; - state->text_pos = pos; - goto advance; - } else { - /* Don't try this repeated match again. */ - if (step > 0) { - if (!guard_repeat_range(safe_state, bt_data->repeat.index, - limit, pos, RE_STATUS_BODY, TRUE)) - return RE_ERROR_MEMORY; - } else if (step < 0) { - if (!guard_repeat_range(safe_state, bt_data->repeat.index, - pos, limit, RE_STATUS_BODY, TRUE)) - return RE_ERROR_MEMORY; - } - - /* We've backtracked the repeat as far as we can. */ - rp_data->start = bt_data->repeat.text_pos; - rp_data->count = bt_data->repeat.count; - discard_backtrack(state); - } - break; - } - case RE_OP_GROUP_RETURN: /* Group return. */ - { - RE_Node* return_node; - TRACE(("%s\n", re_op_text[bt_data->op])) - - return_node = bt_data->group_call.node; - - push_group_return(safe_state, return_node); - - if (return_node) { - /* Restore the groups. */ - pop_groups(state); - state->capture_change = bt_data->group_call.capture_change; - - /* Restore the repeats. */ - pop_repeats(state); - } - - discard_backtrack(state); - break; - } - case RE_OP_KEEP: /* Keep. */ - { - state->match_pos = bt_data->keep.match_pos; - discard_backtrack(state); - break; - } - case RE_OP_LAZY_REPEAT_ONE: /* Lazy repeat for one character. */ - { - RE_RepeatData* rp_data; - size_t count; - Py_ssize_t step; - Py_ssize_t pos; - Py_ssize_t available; - size_t max_count; - Py_ssize_t limit; - RE_Node* repeated; - RE_Node* test; - BOOL match; - BOOL m; - size_t index; - TRACE(("%s\n", re_op_text[bt_data->op])) - - node = bt_data->repeat.position.node; - - rp_data = &state->repeats[bt_data->repeat.index]; - - /* Match one character at a time until the tail could match or we - * have reached the maximum. - */ - state->text_pos = rp_data->start; - count = rp_data->count; - - step = node->step; - pos = state->text_pos + (Py_ssize_t)count * step; - available = step > 0 ? state->slice_end - state->text_pos : - state->text_pos - state->slice_start; - max_count = min_size_t((size_t)available, node->values[2]); - limit = state->text_pos + (Py_ssize_t)max_count * step; - - repeated = node->nonstring.next_2.node; - - test = node->next_1.test; - - m = test->match; - index = node->values[0]; - - match = FALSE; - - if (test->status & RE_STATUS_FUZZY) { - for (;;) { - RE_Position next_position; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - pos += step; - - status = try_match(state, &node->next_1, pos, - &next_position); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - } - } else { - /* A repeated single-character match is often followed by a - * literal, so checking specially for it can be a good - * optimisation when working with long strings. - */ - switch (test->op) { - case RE_OP_CHARACTER: - { - Py_UCS4 ch; - - ch = test->values[0]; - - /* The tail is a character. We don't want to go off the end - * of the slice. - */ - limit = min_ssize_t(limit, state->slice_end - 1); - - for (;;) { - if (pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (pos >= limit) - break; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - ++pos; - - if (same_char(char_at(state->text, pos), ch) == m && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_CHARACTER_IGN: - { - Py_UCS4 ch; - - ch = test->values[0]; - - /* The tail is a character. We don't want to go off the end - * of the slice. - */ - limit = min_ssize_t(limit, state->slice_end - 1); - - for (;;) { - if (pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (pos >= limit) - break; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - ++pos; - - if (same_char_ign(encoding, locale_info, - char_at(state->text, pos), ch) == m && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_CHARACTER_IGN_REV: - { - Py_UCS4 ch; - - ch = test->values[0]; - - /* The tail is a character. We don't want to go off the end - * of the slice. - */ - limit = max_ssize_t(limit, state->slice_start + 1); - - for (;;) { - if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (pos <= limit) - break; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - --pos; - - if (same_char_ign(encoding, locale_info, - char_at(state->text, pos - 1), ch) == m && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_CHARACTER_REV: - { - Py_UCS4 ch; - - ch = test->values[0]; - - /* The tail is a character. We don't want to go off the end - * of the slice. - */ - limit = max_ssize_t(limit, state->slice_start + 1); - - for (;;) { - if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (pos <= limit) - break; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - --pos; - - if (same_char(char_at(state->text, pos - 1), ch) == m - && !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = min_ssize_t(limit, state->slice_end - length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (pos >= limit) - break; - - /* Look for the tail string. */ - found = string_search(safe_state, test, pos + 1, limit - + length, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - ++pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING_FLD: - { - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = min_ssize_t(limit, state->slice_end); - - for (;;) { - Py_ssize_t found; - Py_ssize_t new_pos; - BOOL is_partial; - - if (pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (pos >= limit) - break; - - /* Look for the tail string. */ - found = string_search_fld(safe_state, test, pos + 1, - limit, &new_pos, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - ++pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING_FLD_REV: - { - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = max_ssize_t(limit, state->slice_start); - - for (;;) { - Py_ssize_t found; - Py_ssize_t new_pos; - BOOL is_partial; - - if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (pos <= limit) - break; - - /* Look for the tail string. */ - found = string_search_fld_rev(safe_state, test, pos - - 1, limit, &new_pos, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - --pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING_IGN: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = min_ssize_t(limit, state->slice_end - length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (pos >= limit) - break; - - /* Look for the tail string. */ - found = string_search_ign(safe_state, test, pos + 1, - limit + length, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - ++pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING_IGN_REV: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = max_ssize_t(limit, state->slice_start + length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (pos <= limit) - break; - - /* Look for the tail string. */ - found = string_search_ign_rev(safe_state, test, pos - - 1, limit - length, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - --pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING_REV: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = max_ssize_t(limit, state->slice_start + length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (pos <= limit) - break; - - /* Look for the tail string. */ - found = string_search_rev(safe_state, test, pos - 1, - limit - length, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - --pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - default: - for (;;) { - RE_Position next_position; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - pos += step; - - status = try_match(state, &node->next_1, pos, - &next_position); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (status == RE_ERROR_SUCCESS && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - } - break; - } - } - - if (match) { - /* The tail could match. */ - count = (size_t)abs_ssize_t(pos - state->text_pos); - state->text_pos = pos; - - if (count < max_count) { - /* The match is shorter than the maximum, so we might need - * to backtrack the repeat again to consume more. - */ - rp_data->count = count; - } else { - /* We've reached or passed the maximum, so we won't need to - * backtrack the repeat again. - */ - rp_data->start = bt_data->repeat.text_pos; - rp_data->count = bt_data->repeat.count; - discard_backtrack(state); - - /* Have we passed the maximum? */ - if (count > max_count) - goto backtrack; - } - - node = node->next_1.node; - goto advance; - } else { - /* The tail couldn't match. */ - rp_data->start = bt_data->repeat.text_pos; - rp_data->count = bt_data->repeat.count; - discard_backtrack(state); - } - break; - } - case RE_OP_LOOKAROUND: /* Lookaround subpattern. */ - { - TRACE(("%s\n", re_op_text[bt_data->op])) - - if (bt_data->lookaround.inside) { - /* Backtracked to the start of a lookaround. */ - RE_AtomicData* lookaround; - - lookaround = pop_atomic(safe_state); - state->text_pos = lookaround->text_pos; - state->slice_end = lookaround->slice_end; - state->slice_start = lookaround->slice_start; - state->current_backtrack_block = - lookaround->current_backtrack_block; - state->current_backtrack_block->count = - lookaround->backtrack_count; - - /* Restore the groups and repeats and certain flags. */ - if (lookaround->has_repeats) - pop_repeats(state); - - if (lookaround->has_groups) - pop_groups(state); - - state->too_few_errors = bt_data->lookaround.too_few_errors; - state->capture_change = bt_data->lookaround.capture_change; - - if (bt_data->lookaround.node->match) { - /* It's a positive lookaround that's failed. */ - discard_backtrack(state); - } else { - /* It's a negative lookaround that's failed. Record that - * we've now left the lookaround and continue to the - * following node. - */ - bt_data->lookaround.inside = FALSE; - node = bt_data->lookaround.node->nonstring.next_2.node; - goto advance; - } - } else { - /* Backtracked to a lookaround. If it's a positive lookaround - * that succeeded, we need to restore the groups; if it's a - * negative lookaround that failed, it would have completely - * backtracked inside and already restored the groups. We also - * need to restore certain flags. - */ - if (bt_data->lookaround.node->match && - (bt_data->lookaround.node->status & RE_STATUS_HAS_GROUPS)) - pop_groups(state); - - state->too_few_errors = bt_data->lookaround.too_few_errors; - state->capture_change = bt_data->lookaround.capture_change; - - discard_backtrack(state); - } - break; - } - case RE_OP_MATCH_BODY: - { - RE_RepeatData* rp_data; - TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) - - /* We want to match the body. */ - rp_data = &state->repeats[bt_data->repeat.index]; - - /* Restore the repeat info. */ - rp_data->count = bt_data->repeat.count; - rp_data->start = bt_data->repeat.start; - rp_data->capture_change = bt_data->repeat.capture_change; - - /* Record backtracking info in case the body fails to match. */ - bt_data->op = RE_OP_BODY_START; - - /* Advance into the body. */ - node = bt_data->repeat.position.node; - state->text_pos = bt_data->repeat.position.text_pos; - goto advance; - } - case RE_OP_MATCH_TAIL: - { - RE_RepeatData* rp_data; - TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) - - /* We want to match the tail. */ - rp_data = &state->repeats[bt_data->repeat.index]; - - /* Restore the repeat info. */ - rp_data->count = bt_data->repeat.count; - rp_data->start = bt_data->repeat.start; - rp_data->capture_change = bt_data->repeat.capture_change; - - /* Advance into the tail. */ - node = bt_data->repeat.position.node; - state->text_pos = bt_data->repeat.position.text_pos; - - discard_backtrack(state); - goto advance; - } - case RE_OP_REF_GROUP: /* Reference to a capture group. */ - case RE_OP_REF_GROUP_IGN: /* Reference to a capture group, ignoring case. */ - case RE_OP_REF_GROUP_IGN_REV: /* Reference to a capture group, backwards, ignoring case. */ - case RE_OP_REF_GROUP_REV: /* Reference to a capture group, backwards. */ - case RE_OP_STRING: /* A string. */ - case RE_OP_STRING_IGN: /* A string, ignoring case. */ - case RE_OP_STRING_IGN_REV: /* A string, backwards, ignoring case. */ - case RE_OP_STRING_REV: /* A string, backwards. */ - { - BOOL matched; - TRACE(("%s\n", re_op_text[bt_data->op])) - - status = retry_fuzzy_match_string(safe_state, search, - &state->text_pos, &node, &string_pos, &matched); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (matched) - goto advance; - - string_pos = -1; - break; - } - case RE_OP_REF_GROUP_FLD: /* Reference to a capture group, ignoring case. */ - case RE_OP_REF_GROUP_FLD_REV: /* Reference to a capture group, backwards, ignoring case. */ - { - BOOL matched; - TRACE(("%s\n", re_op_text[bt_data->op])) - - status = retry_fuzzy_match_group_fld(safe_state, search, - &state->text_pos, &node, &folded_pos, &string_pos, &gfolded_pos, - &matched); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (matched) - goto advance; - - string_pos = -1; - break; - } - case RE_OP_START_GROUP: /* Start of a capture group. */ - { - RE_CODE private_index; - RE_GroupData* group; - TRACE(("%s %d\n", re_op_text[bt_data->op], - bt_data->group.public_index)) - - private_index = bt_data->group.private_index; - group = &state->groups[private_index - 1]; - - /* Unsave the capture? */ - if (bt_data->group.capture) - unsave_capture(state, bt_data->group.private_index, - bt_data->group.public_index); - - if (pattern->group_info[private_index - 1].referenced && - group->span.start != bt_data->group.text_pos) - --state->capture_change; - group->span.start = bt_data->group.text_pos; - group->current_capture = bt_data->group.current_capture; - - discard_backtrack(state); - break; - } - case RE_OP_STRING_FLD: /* A string, ignoring case. */ - case RE_OP_STRING_FLD_REV: /* A string, backwards, ignoring case. */ - { - BOOL matched; - TRACE(("%s\n", re_op_text[bt_data->op])) - - status = retry_fuzzy_match_string_fld(safe_state, search, - &state->text_pos, &node, &string_pos, &folded_pos, &matched); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (matched) - goto advance; - - string_pos = -1; - break; - } - default: - TRACE(("UNKNOWN OP %d\n", bt_data->op)) - return RE_ERROR_ILLEGAL; - } - } -} - -/* Saves group data for fuzzy matching. */ -Py_LOCAL_INLINE(RE_GroupData*) save_groups(RE_SafeState* safe_state, - RE_GroupData* saved_groups) { - RE_State* state; - PatternObject* pattern; - size_t g; - - /* Re-acquire the GIL. */ - acquire_GIL(safe_state); - - state = safe_state->re_state; - pattern = state->pattern; - - if (!saved_groups) { - saved_groups = (RE_GroupData*)re_alloc(pattern->true_group_count * - sizeof(RE_GroupData)); - if (!saved_groups) - goto error; - - memset(saved_groups, 0, pattern->true_group_count * - sizeof(RE_GroupData)); - } - - for (g = 0; g < pattern->true_group_count; g++) { - RE_GroupData* orig; - RE_GroupData* copy; - - orig = &state->groups[g]; - copy = &saved_groups[g]; - - copy->span = orig->span; - - if (orig->capture_count > copy->capture_capacity) { - RE_GroupSpan* cap_copy; - - cap_copy = (RE_GroupSpan*)re_realloc(copy->captures, - orig->capture_count * sizeof(RE_GroupSpan)); - if (!cap_copy) - goto error; - - copy->capture_capacity = orig->capture_count; - copy->captures = cap_copy; - } - - copy->capture_count = orig->capture_count; - Py_MEMCPY(copy->captures, orig->captures, orig->capture_count * - sizeof(RE_GroupSpan)); - } - - /* Release the GIL. */ - release_GIL(safe_state); - - return saved_groups; - -error: - if (saved_groups) { - for (g = 0; g < pattern->true_group_count; g++) - re_dealloc(saved_groups[g].captures); - - re_dealloc(saved_groups); - } - - /* Release the GIL. */ - release_GIL(safe_state); - - return NULL; -} - -/* Restores group data for fuzzy matching. */ -Py_LOCAL_INLINE(void) restore_groups(RE_SafeState* safe_state, RE_GroupData* - saved_groups) { - RE_State* state; - PatternObject* pattern; - size_t g; - - /* Re-acquire the GIL. */ - acquire_GIL(safe_state); - - state = safe_state->re_state; - pattern = state->pattern; - - for (g = 0; g < pattern->true_group_count; g++) { - RE_GroupData* group; - RE_GroupData* saved; - - group = &state->groups[g]; - saved = &saved_groups[g]; - - group->span = saved->span; - - group->capture_count = saved->capture_count; - Py_MEMCPY(group->captures, saved->captures, saved->capture_count * - sizeof(RE_GroupSpan)); - - re_dealloc(saved->captures); - } - - re_dealloc(saved_groups); - - /* Release the GIL. */ - release_GIL(safe_state); -} - -/* Discards group data for fuzzy matching. */ -Py_LOCAL_INLINE(void) discard_groups(RE_SafeState* safe_state, RE_GroupData* - saved_groups) { - RE_State* state; - PatternObject* pattern; - size_t g; - - /* Re-acquire the GIL. */ - acquire_GIL(safe_state); - - state = safe_state->re_state; - pattern = state->pattern; - - for (g = 0; g < pattern->true_group_count; g++) - re_dealloc(saved_groups[g].captures); - - re_dealloc(saved_groups); - - /* Release the GIL. */ - release_GIL(safe_state); -} - -/* Saves the fuzzy info. */ -Py_LOCAL_INLINE(void) save_fuzzy_counts(RE_State* state, size_t* fuzzy_counts) - { - Py_MEMCPY(fuzzy_counts, state->total_fuzzy_counts, - sizeof(state->total_fuzzy_counts)); -} - -/* Restores the fuzzy info. */ -Py_LOCAL_INLINE(void) restore_fuzzy_counts(RE_State* state, size_t* - fuzzy_counts) { - Py_MEMCPY(state->total_fuzzy_counts, fuzzy_counts, - sizeof(state->total_fuzzy_counts)); -} - -/* Makes the list of best matches found so far. */ -Py_LOCAL_INLINE(void) make_best_list(RE_BestList* best_list) { - best_list->capacity = 0; - best_list->count = 0; - best_list->entries = NULL; -} - -/* Clears the list of best matches found so far. */ -Py_LOCAL_INLINE(void) clear_best_list(RE_BestList* best_list) { - best_list->count = 0; -} - -/* Adds a new entry to the list of best matches found so far. */ -Py_LOCAL_INLINE(BOOL) add_to_best_list(RE_SafeState* safe_state, RE_BestList* - best_list, Py_ssize_t match_pos, Py_ssize_t text_pos) { - RE_BestEntry* entry; - - if (best_list->count >= best_list->capacity) { - RE_BestEntry* new_entries; - - best_list->capacity = best_list->capacity == 0 ? 16 : - best_list->capacity * 2; - new_entries = safe_realloc(safe_state, best_list->entries, - best_list->capacity * sizeof(RE_BestEntry)); - if (!new_entries) - return FALSE; - - best_list->entries = new_entries; - } - - entry = &best_list->entries[best_list->count++]; - entry->match_pos = match_pos; - entry->text_pos = text_pos; - - return TRUE; -} - -/* Destroy the list of best matches found so far. */ -Py_LOCAL_INLINE(void) destroy_best_list(RE_SafeState* safe_state, RE_BestList* - best_list) { - if (best_list->entries) - safe_dealloc(safe_state, best_list->entries); -} - -/* Performs a match or search from the current text position for a best fuzzy - * match. - */ -Py_LOCAL_INLINE(int) do_best_fuzzy_match(RE_SafeState* safe_state, BOOL search) - { - RE_State* state; - Py_ssize_t available; - int step; - size_t fewest_errors; - BOOL must_advance; - BOOL found_match; - RE_BestList best_list; - Py_ssize_t start_pos; - int status; - TRACE(("<>\n")) - - state = safe_state->re_state; - - if (state->reverse) { - available = state->text_pos - state->slice_start; - step = -1; - } else { - available = state->slice_end - state->text_pos; - step = 1; - } - - /* The maximum permitted cost. */ - state->max_errors = PY_SSIZE_T_MAX; - fewest_errors = PY_SSIZE_T_MAX; - - state->best_text_pos = state->reverse ? state->slice_start : - state->slice_end; - - must_advance = state->must_advance; - found_match = FALSE; - - make_best_list(&best_list); - - /* Search the text for the best match. */ - start_pos = state->text_pos; - while (state->slice_start <= start_pos && start_pos <= state->slice_end) { - state->text_pos = start_pos; - state->must_advance = must_advance; - - /* Initialise the state. */ - init_match(state); - - status = RE_ERROR_SUCCESS; - if (state->max_errors == 0 && state->partial_side == RE_PARTIAL_NONE) { - /* An exact match, and partial matches not permitted. */ - if (available < state->min_width || (available == 0 && - state->must_advance)) - status = RE_ERROR_FAILURE; - } - - if (status == RE_ERROR_SUCCESS) - status = basic_match(safe_state, search); - - /* Has an error occurred, or is it a partial match? */ - if (status < 0) - break; - - if (status == RE_ERROR_SUCCESS) { - /* It was a successful match. */ - found_match = TRUE; - - if (state->total_errors < fewest_errors) { - /* This match was better than any of the previous ones. */ - fewest_errors = state->total_errors; - - if (state->total_errors == 0) - /* It was a perfect match. */ - break; - - /* Forget all the previous worse matches and remember this one. - */ - clear_best_list(&best_list); - if (!add_to_best_list(safe_state, &best_list, state->match_pos, - state->text_pos)) - return RE_ERROR_MEMORY; - } else if (state->total_errors == fewest_errors) - /* This match was as good as the previous matches. Remember - * this one. - */ - add_to_best_list(safe_state, &best_list, state->match_pos, - state->text_pos); - } else - start_pos = state->match_pos + step; - - /* Should we keep searching? */ - if (!search) - break; - - state->max_errors = fewest_errors - 1; - } - - if (found_match) { - /* We found a match. */ - if (fewest_errors > 0) { - /* It doesn't look like a perfect match. */ - int i; - Py_ssize_t slice_start; - Py_ssize_t slice_end; - size_t error_limit; - size_t best_fuzzy_counts[RE_FUZZY_COUNT]; - RE_GroupData* best_groups; - Py_ssize_t best_match_pos; - Py_ssize_t best_text_pos; - - slice_start = state->slice_start; - slice_end = state->slice_end; - - error_limit = fewest_errors; - - if (error_limit > RE_MAX_ERRORS) - error_limit = RE_MAX_ERRORS; - - best_groups = NULL; - - /* Look again at the best of the matches that we've seen. */ - for (i = 0; i < best_list.count; i++) { - RE_BestEntry* entry; - Py_ssize_t max_offset; - Py_ssize_t offset; - - /* Look for the best fit at this position. */ - entry = &best_list.entries[i]; - - if (search) { - max_offset = state->reverse ? entry->match_pos - - state->slice_start : state->slice_end - entry->match_pos; - - if (max_offset > (Py_ssize_t)fewest_errors) - max_offset = (Py_ssize_t)fewest_errors; - - if (max_offset > (Py_ssize_t)error_limit) - max_offset = (Py_ssize_t)error_limit; - } else - max_offset = 0; - - start_pos = entry->match_pos; - offset = 0; - - while (offset <= max_offset) { - state->max_errors = 1; - - while (state->max_errors <= error_limit) { - state->text_pos = start_pos; - init_match(state); - status = basic_match(safe_state, FALSE); - - if (status == RE_ERROR_SUCCESS) { - BOOL better; - - if (state->total_errors < error_limit || i == 0 && - offset == 0) - better = TRUE; - else if (state->total_errors == error_limit) - /* The cost is as low as the current best, but - * is it earlier? - */ - better = state->reverse ? state->match_pos > - best_match_pos : state->match_pos < - best_match_pos; - - if (better) { - save_fuzzy_counts(state, best_fuzzy_counts); - - best_groups = save_groups(safe_state, - best_groups); - if (!best_groups) { - destroy_best_list(safe_state, &best_list); - return RE_ERROR_MEMORY; - } - - best_match_pos = state->match_pos; - best_text_pos = state->text_pos; - error_limit = state->total_errors; - } - - break; - } - - ++state->max_errors; - } - - start_pos += step; - ++offset; - } - - if (status == RE_ERROR_SUCCESS && state->total_errors == 0) - break; - } - - if (best_groups) { - status = RE_ERROR_SUCCESS; - state->match_pos = best_match_pos; - state->text_pos = best_text_pos; - - restore_groups(safe_state, best_groups); - restore_fuzzy_counts(state, best_fuzzy_counts); - } else { - /* None of the "best" matches could be improved on, so pick the - * first. - */ - RE_BestEntry* entry; - - /* Look at only the part of the string around the match. */ - entry = &best_list.entries[0]; - - if (state->reverse) { - state->slice_start = entry->text_pos; - state->slice_end = entry->match_pos; - } else { - state->slice_start = entry->match_pos; - state->slice_end = entry->text_pos; - } - - /* We'll expand the part that we're looking at to take to - * compensate for any matching errors that have occurred. - */ - if (state->slice_start - slice_start >= - (Py_ssize_t)fewest_errors) - state->slice_start -= (Py_ssize_t)fewest_errors; - else - state->slice_start = slice_start; - - if (slice_end - state->slice_end >= (Py_ssize_t)fewest_errors) - state->slice_end += (Py_ssize_t)fewest_errors; - else - state->slice_end = slice_end; - - state->max_errors = fewest_errors; - state->text_pos = entry->match_pos; - init_match(state); - status = basic_match(safe_state, search); - } - - state->slice_start = slice_start; - state->slice_end = slice_end; - } - } - - destroy_best_list(safe_state, &best_list); - - return status; -} - -/* Performs a match or search from the current text position for an enhanced - * fuzzy match. - */ -Py_LOCAL_INLINE(int) do_enhanced_fuzzy_match(RE_SafeState* safe_state, BOOL - search) { - RE_State* state; - PatternObject* pattern; - Py_ssize_t available; - size_t fewest_errors; - RE_GroupData* best_groups; - Py_ssize_t best_match_pos; - BOOL must_advance; - Py_ssize_t slice_start; - Py_ssize_t slice_end; - int status; - size_t best_fuzzy_counts[RE_FUZZY_COUNT]; - Py_ssize_t best_text_pos = 0; /* Initialise to stop compiler warning. */ - TRACE(("<>\n")) - - state = safe_state->re_state; - pattern = state->pattern; - - if (state->reverse) - available = state->text_pos - state->slice_start; - else - available = state->slice_end - state->text_pos; - - /* The maximum permitted cost. */ - state->max_errors = PY_SSIZE_T_MAX; - fewest_errors = PY_SSIZE_T_MAX; - - best_groups = NULL; - - state->best_match_pos = state->text_pos; - state->best_text_pos = state->reverse ? state->slice_start : - state->slice_end; - - best_match_pos = state->text_pos; - must_advance = state->must_advance; - - slice_start = state->slice_start; - slice_end = state->slice_end; - - for (;;) { - /* If there's a better match, it won't start earlier in the string than - * the current best match, so there's no need to start earlier than - * that match. - */ - state->must_advance = must_advance; - - /* Initialise the state. */ - init_match(state); - - status = RE_ERROR_SUCCESS; - if (state->max_errors == 0 && state->partial_side == RE_PARTIAL_NONE) { - /* An exact match, and partial matches not permitted. */ - if (available < state->min_width || (available == 0 && - state->must_advance)) - status = RE_ERROR_FAILURE; - } - - if (status == RE_ERROR_SUCCESS) - status = basic_match(safe_state, search); - - /* Has an error occurred, or is it a partial match? */ - if (status < 0) - break; - - if (status == RE_ERROR_SUCCESS) { - BOOL better; - - better = state->total_errors < fewest_errors; - - if (better) { - BOOL same_match; - - fewest_errors = state->total_errors; - state->max_errors = fewest_errors; - - save_fuzzy_counts(state, best_fuzzy_counts); - - same_match = state->match_pos == best_match_pos && - state->text_pos == best_text_pos; - same_match = FALSE; - - if (best_groups) { - size_t g; - - /* Did we get the same match as the best so far? */ - for (g = 0; same_match && g < pattern->public_group_count; - g++) { - same_match = state->groups[g].span.start == - best_groups[g].span.start && - state->groups[g].span.end == best_groups[g].span.end; - } - } - - /* Save the best result so far. */ - best_groups = save_groups(safe_state, best_groups); - if (!best_groups) { - status = RE_ERROR_MEMORY; - break; - } - - best_match_pos = state->match_pos; - best_text_pos = state->text_pos; - - if (same_match || state->total_errors == 0) - break; - - state->max_errors = state->total_errors; - if (state->max_errors < RE_MAX_ERRORS) - --state->max_errors; - } else - break; - - if (state->reverse) { - state->slice_start = state->text_pos; - state->slice_end = state->match_pos; - } else { - state->slice_start = state->match_pos; - state->slice_end = state->text_pos; - } - - state->text_pos = state->match_pos; - - if (state->max_errors == PY_SSIZE_T_MAX) - state->max_errors = 0; - } else - break; - } - - state->slice_start = slice_start; - state->slice_end = slice_end; - - if (best_groups) { - if (status == RE_ERROR_SUCCESS && state->total_errors == 0) - /* We have a perfect match, so the previous best match. */ - discard_groups(safe_state, best_groups); - else { - /* Restore the previous best match. */ - status = RE_ERROR_SUCCESS; - - state->match_pos = best_match_pos; - state->text_pos = best_text_pos; - - restore_groups(safe_state, best_groups); - restore_fuzzy_counts(state, best_fuzzy_counts); - } - } - - return status; -} - -/* Performs a match or search from the current text position for a simple fuzzy - * match. - */ -Py_LOCAL_INLINE(int) do_simple_fuzzy_match(RE_SafeState* safe_state, BOOL - search) { - RE_State* state; - Py_ssize_t available; - int status; - TRACE(("<>\n")) - - state = safe_state->re_state; - - if (state->reverse) - available = state->text_pos - state->slice_start; - else - available = state->slice_end - state->text_pos; - - /* The maximum permitted cost. */ - state->max_errors = PY_SSIZE_T_MAX; - - state->best_match_pos = state->text_pos; - state->best_text_pos = state->reverse ? state->slice_start : - state->slice_end; - - /* Initialise the state. */ - init_match(state); - - status = RE_ERROR_SUCCESS; - if (state->max_errors == 0 && state->partial_side == RE_PARTIAL_NONE) { - /* An exact match, and partial matches not permitted. */ - if (available < state->min_width || (available == 0 && - state->must_advance)) - status = RE_ERROR_FAILURE; - } - - if (status == RE_ERROR_SUCCESS) - status = basic_match(safe_state, search); - - return status; -} - -/* Performs a match or search from the current text position for an exact - * match. - */ -Py_LOCAL_INLINE(int) do_exact_match(RE_SafeState* safe_state, BOOL search) { - RE_State* state; - Py_ssize_t available; - int status; - TRACE(("<>\n")) - - state = safe_state->re_state; - - if (state->reverse) - available = state->text_pos - state->slice_start; - else - available = state->slice_end - state->text_pos; - - /* The maximum permitted cost. */ - state->max_errors = 0; - - state->best_match_pos = state->text_pos; - state->best_text_pos = state->reverse ? state->slice_start : - state->slice_end; - - /* Initialise the state. */ - init_match(state); - - status = RE_ERROR_SUCCESS; - if (state->max_errors == 0 && state->partial_side == RE_PARTIAL_NONE) { - /* An exact match, and partial matches not permitted. */ - if (available < state->min_width || (available == 0 && - state->must_advance)) - status = RE_ERROR_FAILURE; - } - - if (status == RE_ERROR_SUCCESS) - status = basic_match(safe_state, search); - - return status; -} - -/* Performs a match or search from the current text position. - * - * The state can sometimes be shared across threads. In such instances there's - * a lock (mutex) on it. The lock is held for the duration of matching. - */ -Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { - RE_State* state; - PatternObject* pattern; - int status; - TRACE(("<>\n")) - - state = safe_state->re_state; - pattern = state->pattern; - - /* Is there enough to search? */ - if (state->reverse) { - if (state->text_pos < state->slice_start) - return FALSE; - } else { - if (state->text_pos > state->slice_end) - return FALSE; - } - - /* Release the GIL. */ - release_GIL(safe_state); - - if (pattern->is_fuzzy) { - if (pattern->flags & RE_FLAG_BESTMATCH) - status = do_best_fuzzy_match(safe_state, search); - else if (pattern->flags & RE_FLAG_ENHANCEMATCH) - status = do_enhanced_fuzzy_match(safe_state, search); - else - status = do_simple_fuzzy_match(safe_state, search); - } else - status = do_exact_match(safe_state, search); - - if (status == RE_ERROR_SUCCESS || status == RE_ERROR_PARTIAL) { - Py_ssize_t max_end_index; - RE_GroupInfo* group_info; - size_t g; - - /* Store the results. */ - state->lastindex = -1; - state->lastgroup = -1; - max_end_index = -1; - - if (status == RE_ERROR_PARTIAL) { - /* We've matched up to the limit of the slice. */ - if (state->reverse) - state->text_pos = state->slice_start; - else - state->text_pos = state->slice_end; - } - - /* Store the capture groups. */ - group_info = pattern->group_info; - - for (g = 0; g < pattern->public_group_count; g++) { - RE_GroupSpan* span; - - span = &state->groups[g].span; - /* The string positions are of type Py_ssize_t, so the format needs - * to specify that. - */ - TRACE(("group %d from %" PY_FORMAT_SIZE_T "d to %" PY_FORMAT_SIZE_T - "d\n", g + 1, span->start, span->end)) - - if (span->start >= 0 && span->end >= 0 && group_info[g].end_index > - max_end_index) { - max_end_index = group_info[g].end_index; - state->lastindex = (Py_ssize_t)g + 1; - if (group_info[g].has_name) - state->lastgroup = (Py_ssize_t)g + 1; - } - } - } - - /* Re-acquire the GIL. */ - acquire_GIL(safe_state); - - if (status < 0 && status != RE_ERROR_PARTIAL && !PyErr_Occurred()) - set_error(status, NULL); - - return status; -} - -/* Gets a string from a Python object. - * - * If the function returns true and str_info->should_release is true then it's - * the responsibility of the caller to release the buffer when it's no longer - * needed. - */ -Py_LOCAL_INLINE(BOOL) get_string(PyObject* string, RE_StringInfo* str_info) { - /* Given a Python object, return a data pointer, a length (in characters), - * and a character size. Return FALSE if the object is not a string (or not - * compatible). - */ - PyBufferProcs* buffer; - Py_ssize_t bytes; - Py_ssize_t size; - - /* Unicode objects do not support the buffer API. So, get the data directly - * instead. - */ - if (PyUnicode_Check(string)) { - /* Unicode strings don't always support the buffer interface. */ - str_info->characters = (void*)PyUnicode_AS_DATA(string); - str_info->length = PyUnicode_GET_SIZE(string); - str_info->charsize = sizeof(Py_UNICODE); - str_info->is_unicode = TRUE; - str_info->should_release = FALSE; - return TRUE; - } - -#if defined(PYPY_VERSION) - if (PyString_Check(string)) { - /* Bytestrings don't always support the buffer interface. */ - str_info->characters = (void*)PyString_AS_STRING(string); - str_info->length = PyString_GET_SIZE(string); - str_info->charsize = 1; - str_info->is_unicode = FALSE; - str_info->should_release = FALSE; - return TRUE; - } - -#endif -#if defined(PYPY_VERSION) - /* Get pointer to string buffer. */ - if (PyObject_GetBuffer(string, &str_info->view, PyBUF_SIMPLE) != 0) { - printf("PyObject_GetBuffer failed!\n"); - PyErr_SetString(PyExc_TypeError, "expected string or buffer"); - return FALSE; - } - - if (!str_info->view.buf) { - PyBuffer_Release(&str_info->view); - PyErr_SetString(PyExc_ValueError, "buffer is NULL"); - return FALSE; - } - - str_info->should_release = TRUE; - - str_info->characters = str_info->view.buf; - str_info->length = str_info->view.len; - str_info->charsize = 1; - str_info->is_unicode = FALSE; -#else - /* Get pointer to string buffer. */ -#if PY_VERSION_HEX >= 0x02060000 - buffer = Py_TYPE(string)->tp_as_buffer; - str_info->view.len = -1; -#else - buffer = string->ob_type->tp_as_buffer; -#endif - - if (!buffer) { - PyErr_SetString(PyExc_TypeError, "expected string or buffer"); - return FALSE; - } - -#if PY_VERSION_HEX >= 0x02060000 - if (buffer->bf_getbuffer && (*buffer->bf_getbuffer)(string, - &str_info->view, PyBUF_SIMPLE) >= 0) - /* It's a new-style buffer. */ - str_info->should_release = TRUE; - else -#endif - if (buffer->bf_getreadbuffer && buffer->bf_getsegcount && - buffer->bf_getsegcount(string, NULL) == 1) - /* It's an old-style buffer. */ - str_info->should_release = FALSE; - else { - PyErr_SetString(PyExc_TypeError, "expected string or buffer"); - return FALSE; - } - - /* Determine buffer size. */ -#if PY_VERSION_HEX >= 0x02060000 - if (str_info->should_release) { - /* It's a new-style buffer. */ - bytes = str_info->view.len; - str_info->characters = str_info->view.buf; - - if (str_info->characters == NULL) { - PyBuffer_Release(&str_info->view); - PyErr_SetString(PyExc_ValueError, "buffer is NULL"); - return FALSE; - } - } else -#endif - /* It's an old-style buffer. */ - bytes = buffer->bf_getreadbuffer(string, 0, &str_info->characters); - - if (bytes < 0) { -#if PY_VERSION_HEX >= 0x02060000 - if (str_info->should_release) - PyBuffer_Release(&str_info->view); -#endif - PyErr_SetString(PyExc_TypeError, "buffer has negative size"); - return FALSE; - } - - /* Determine character size. */ - size = PyObject_Size(string); - - if (PyString_Check(string) || bytes == size) - str_info->charsize = 1; - else { -#if PY_VERSION_HEX >= 0x02060000 - if (str_info->should_release) - PyBuffer_Release(&str_info->view); -#endif - PyErr_SetString(PyExc_TypeError, "buffer size mismatch"); - return FALSE; - } - - str_info->length = size; - str_info->is_unicode = FALSE; -#endif - - return TRUE; -} - -/* Deallocates the groups storage. */ -Py_LOCAL_INLINE(void) dealloc_groups(RE_GroupData* groups, size_t group_count) - { - size_t g; - - if (!groups) - return; - - for (g = 0; g < group_count; g++) - re_dealloc(groups[g].captures); - - re_dealloc(groups); -} - -/* Initialises a state object. */ -Py_LOCAL_INLINE(BOOL) state_init_2(RE_State* state, PatternObject* pattern, - PyObject* string, RE_StringInfo* str_info, Py_ssize_t start, Py_ssize_t end, - BOOL overlapped, int concurrent, BOOL partial, BOOL use_lock, BOOL - visible_captures, BOOL match_all) { - int i; - Py_ssize_t final_pos; - - state->groups = NULL; - state->best_match_groups = NULL; - state->repeats = NULL; - state->visible_captures = visible_captures; - state->match_all = match_all; - state->backtrack_block.previous = NULL; - state->backtrack_block.next = NULL; - state->backtrack_block.capacity = RE_BACKTRACK_BLOCK_SIZE; - state->backtrack_allocated = RE_BACKTRACK_BLOCK_SIZE; - state->current_atomic_block = NULL; - state->first_saved_groups = NULL; - state->current_saved_groups = NULL; - state->first_saved_repeats = NULL; - state->current_saved_repeats = NULL; - state->lock = NULL; - state->fuzzy_guards = NULL; - state->first_group_call_frame = NULL; - state->current_group_call_frame = NULL; - state->group_call_guard_list = NULL; - state->req_pos = -1; - - /* The call guards used by recursive patterns. */ - if (pattern->call_ref_info_count > 0) { - state->group_call_guard_list = - (RE_GuardList*)re_alloc(pattern->call_ref_info_count * - sizeof(RE_GuardList)); - if (!state->group_call_guard_list) - goto error; - memset(state->group_call_guard_list, 0, pattern->call_ref_info_count * - sizeof(RE_GuardList)); - } - - /* The capture groups. */ - if (pattern->true_group_count) { - size_t g; - - if (pattern->groups_storage) { - state->groups = pattern->groups_storage; - pattern->groups_storage = NULL; - } else { - state->groups = (RE_GroupData*)re_alloc(pattern->true_group_count * - sizeof(RE_GroupData)); - if (!state->groups) - goto error; - memset(state->groups, 0, pattern->true_group_count * - sizeof(RE_GroupData)); - - for (g = 0; g < pattern->true_group_count; g++) { - RE_GroupSpan* captures; - - captures = (RE_GroupSpan*)re_alloc(sizeof(RE_GroupSpan)); - if (!captures) { - size_t i; - - for (i = 0; i < g; i++) - re_dealloc(state->groups[i].captures); - - goto error; - } - - state->groups[g].captures = captures; - state->groups[g].capture_capacity = 1; - } - } - } - - /* Adjust boundaries. */ - if (start < 0) - start += str_info->length; - if (start < 0) - start = 0; - else if (start > str_info->length) - start = str_info->length; - - if (end < 0) - end += str_info->length; - if (end < 0) - end = 0; - else if (end > str_info->length) - end = str_info->length; - - state->overlapped = overlapped; - state->min_width = pattern->min_width; - - /* Initialise the getters and setters for the character size. */ - state->charsize = str_info->charsize; - state->is_unicode = str_info->is_unicode; - -#if PY_VERSION_HEX >= 0x02060000 - /* Are we using a buffer object? If so, we need to copy the info. */ - state->should_release = str_info->should_release; - if (state->should_release) - state->view = str_info->view; - -#endif - switch (state->charsize) { - case 1: - state->char_at = bytes1_char_at; - state->set_char_at = bytes1_set_char_at; - state->point_to = bytes1_point_to; - break; - case 2: - state->char_at = bytes2_char_at; - state->set_char_at = bytes2_set_char_at; - state->point_to = bytes2_point_to; - break; - case 4: - state->char_at = bytes4_char_at; - state->set_char_at = bytes4_set_char_at; - state->point_to = bytes4_point_to; - break; - default: - goto error; - } - - state->encoding = pattern->encoding; - state->locale_info = pattern->locale_info; - - /* The state object contains a reference to the string and also a pointer - * to its contents. - * - * The documentation says that the end of the slice behaves like the end of - * the string. - */ - state->text = str_info->characters; - state->text_length = end; - - state->reverse = (pattern->flags & RE_FLAG_REVERSE) != 0; - if (partial) - state->partial_side = state->reverse ? RE_PARTIAL_LEFT : - RE_PARTIAL_RIGHT; - else - state->partial_side = RE_PARTIAL_NONE; - - state->slice_start = start; - state->slice_end = state->text_length; - state->text_pos = state->reverse ? state->slice_end : state->slice_start; - - /* Point to the final newline and line separator if it's at the end of the - * string, otherwise just -1. - */ - state->final_newline = -1; - state->final_line_sep = -1; - final_pos = state->text_length - 1; - if (final_pos >= 0) { - Py_UCS4 ch; - - ch = state->char_at(state->text, final_pos); - if (ch == 0x0A) { - /* The string ends with LF. */ - state->final_newline = final_pos; - state->final_line_sep = final_pos; - - /* Does the string end with CR/LF? */ - --final_pos; - if (final_pos >= 0 && state->char_at(state->text, final_pos) == - 0x0D) - state->final_line_sep = final_pos; - } else { - /* The string doesn't end with LF, but it could be another kind of - * line separator. - */ - if (state->encoding->is_line_sep(ch)) - state->final_line_sep = final_pos; - } - } - - /* If the 'new' behaviour is enabled then split correctly on zero-width - * matches. - */ - state->version_0 = (pattern->flags & RE_FLAG_VERSION1) == 0; - state->must_advance = FALSE; - - state->pattern = pattern; - state->string = string; - - if (pattern->repeat_count) { - if (pattern->repeats_storage) { - state->repeats = pattern->repeats_storage; - pattern->repeats_storage = NULL; - } else { - state->repeats = (RE_RepeatData*)re_alloc(pattern->repeat_count * - sizeof(RE_RepeatData)); - if (!state->repeats) - goto error; - memset(state->repeats, 0, pattern->repeat_count * - sizeof(RE_RepeatData)); - } - } - - if (pattern->fuzzy_count) { - state->fuzzy_guards = (RE_FuzzyGuards*)re_alloc(pattern->fuzzy_count * - sizeof(RE_FuzzyGuards)); - if (!state->fuzzy_guards) - goto error; - memset(state->fuzzy_guards, 0, pattern->fuzzy_count * - sizeof(RE_FuzzyGuards)); - } - - Py_INCREF(state->pattern); - Py_INCREF(state->string); - - /* Multithreading is allowed during matching when explicitly enabled or on - * immutable strings. - */ - switch (concurrent) { - case RE_CONC_NO: - state->is_multithreaded = FALSE; - break; - case RE_CONC_YES: - state->is_multithreaded = TRUE; - break; - default: - state->is_multithreaded = PyUnicode_Check(string) || - PyString_Check(string); - break; - } - - /* A state struct can sometimes be shared across threads. In such - * instances, if multithreading is enabled we need to protect the state - * with a lock (mutex) during matching. - */ - if (state->is_multithreaded && use_lock) - state->lock = PyThread_allocate_lock(); - - for (i = 0; i < MAX_SEARCH_POSITIONS; i++) - state->search_positions[i].start_pos = -1; - - return TRUE; - -error: - re_dealloc(state->group_call_guard_list); - re_dealloc(state->repeats); - dealloc_groups(state->groups, pattern->true_group_count); - re_dealloc(state->fuzzy_guards); - state->repeats = NULL; - state->groups = NULL; - state->fuzzy_guards = NULL; - return FALSE; -} - -#if PY_VERSION_HEX >= 0x02060000 -/* Releases the string's buffer, if necessary. */ -Py_LOCAL_INLINE(void) release_buffer(RE_StringInfo* str_info) { - if (str_info->should_release) - PyBuffer_Release(&str_info->view); -} - -#endif -/* Initialises a state object. */ -Py_LOCAL_INLINE(BOOL) state_init(RE_State* state, PatternObject* pattern, - PyObject* string, Py_ssize_t start, Py_ssize_t end, BOOL overlapped, int - concurrent, BOOL partial, BOOL use_lock, BOOL visible_captures, BOOL - match_all) { - RE_StringInfo str_info; - - /* Get the string to search or match. */ - if (!get_string(string, &str_info)) - return FALSE; - - /* If we fail to initialise the state then we need to release the buffer if - * the string is a buffer object. - */ - if (!state_init_2(state, pattern, string, &str_info, start, end, - overlapped, concurrent, partial, use_lock, visible_captures, match_all)) - { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return FALSE; - } - - /* The state has been initialised successfully, so now the state has the - * responsibility of releasing the buffer if the string is a buffer object. - */ - return TRUE; -} - -/* Deallocates repeat data. */ -Py_LOCAL_INLINE(void) dealloc_repeats(RE_RepeatData* repeats, size_t - repeat_count) { - size_t i; - - if (!repeats) - return; - - for (i = 0; i < repeat_count; i++) { - re_dealloc(repeats[i].body_guard_list.spans); - re_dealloc(repeats[i].tail_guard_list.spans); - } - - re_dealloc(repeats); -} - -/* Deallocates fuzzy guards. */ -Py_LOCAL_INLINE(void) dealloc_fuzzy_guards(RE_FuzzyGuards* guards, size_t - fuzzy_count) { - size_t i; - - if (!guards) - return; - - for (i = 0; i < fuzzy_count; i++) { - re_dealloc(guards[i].body_guard_list.spans); - re_dealloc(guards[i].tail_guard_list.spans); - } - - re_dealloc(guards); -} - -/* Finalises a state object, discarding its contents. */ -Py_LOCAL_INLINE(void) state_fini(RE_State* state) { - RE_BacktrackBlock* current_backtrack; - RE_AtomicBlock* current_atomic; - PatternObject* pattern; - RE_SavedGroups* saved_groups; - RE_SavedRepeats* saved_repeats; - RE_GroupCallFrame* frame; - size_t i; - - /* Discard the lock (mutex) if there's one. */ - if (state->lock) - PyThread_free_lock(state->lock); - - /* Deallocate the backtrack blocks. */ - current_backtrack = state->backtrack_block.next; - while (current_backtrack) { - RE_BacktrackBlock* next; - - next = current_backtrack->next; - re_dealloc(current_backtrack); - state->backtrack_allocated -= RE_BACKTRACK_BLOCK_SIZE; - current_backtrack = next; - } - - /* Deallocate the atomic blocks. */ - current_atomic = state->current_atomic_block; - while (current_atomic) { - RE_AtomicBlock* next; - - next = current_atomic->next; - re_dealloc(current_atomic); - current_atomic = next; - } - - state->current_atomic_block = NULL; - - pattern = state->pattern; - - saved_groups = state->first_saved_groups; - while (saved_groups) { - RE_SavedGroups* next; - - next = saved_groups->next; - re_dealloc(saved_groups->spans); - re_dealloc(saved_groups->counts); - re_dealloc(saved_groups); - saved_groups = next; - } - - saved_repeats = state->first_saved_repeats; - while (saved_repeats) { - RE_SavedRepeats* next; - - next = saved_repeats->next; - - dealloc_repeats(saved_repeats->repeats, pattern->repeat_count); - - re_dealloc(saved_repeats); - saved_repeats = next; - } - - if (state->best_match_groups) - dealloc_groups(state->best_match_groups, pattern->true_group_count); - - if (pattern->groups_storage) - dealloc_groups(state->groups, pattern->true_group_count); - else - pattern->groups_storage = state->groups; - - if (pattern->repeats_storage) - dealloc_repeats(state->repeats, pattern->repeat_count); - else - pattern->repeats_storage = state->repeats; - - frame = state->first_group_call_frame; - while (frame) { - RE_GroupCallFrame* next; - - next = frame->next; - - dealloc_groups(frame->groups, pattern->true_group_count); - dealloc_repeats(frame->repeats, pattern->repeat_count); - - re_dealloc(frame); - frame = next; - } - - for (i = 0; i < pattern->call_ref_info_count; i++) - re_dealloc(state->group_call_guard_list[i].spans); - - if (state->group_call_guard_list) - re_dealloc(state->group_call_guard_list); - - if (state->fuzzy_guards) - dealloc_fuzzy_guards(state->fuzzy_guards, pattern->fuzzy_count); - - Py_DECREF(state->pattern); - Py_DECREF(state->string); -#if PY_VERSION_HEX >= 0x02060000 - - if (state->should_release) - PyBuffer_Release(&state->view); -#endif -} - -/* Converts a string index to an integer. - * - * If the index is None then the default will be returned. - */ -Py_LOCAL_INLINE(Py_ssize_t) as_string_index(PyObject* obj, Py_ssize_t def) { - Py_ssize_t value; - - if (obj == Py_None) - return def; - - value = PyInt_AsSsize_t(obj); - if (!(value == -1 && PyErr_Occurred())) - return value; - - PyErr_Clear(); - - value = PyLong_AsLong(obj); - if (!(value == -1 && PyErr_Occurred())) - return value; - - set_error(RE_ERROR_INDEX, NULL); - return 0; -} - -/* Deallocates a MatchObject. */ -static void match_dealloc(PyObject* self_) { - MatchObject* self; - - self = (MatchObject*)self_; - - Py_XDECREF(self->string); - Py_XDECREF(self->substring); - Py_DECREF(self->pattern); - if (self->groups) - re_dealloc(self->groups); - Py_XDECREF(self->regs); - PyObject_DEL(self); -} - -/* Restricts a value to a range. */ -Py_LOCAL_INLINE(Py_ssize_t) limited_range(Py_ssize_t value, Py_ssize_t lower, - Py_ssize_t upper) { - if (value < lower) - return lower; - - if (value > upper) - return upper; - - return value; -} - -/* Gets a slice from a Unicode string. */ -Py_LOCAL_INLINE(PyObject*) unicode_slice(PyObject* string, Py_ssize_t start, - Py_ssize_t end) { - Py_ssize_t length; - Py_UNICODE* buffer; - - length = PyUnicode_GET_SIZE(string); - start = limited_range(start, 0, length); - end = limited_range(end, 0, length); - - buffer = PyUnicode_AsUnicode(string); - - return PyUnicode_FromUnicode(buffer + start, end - start); -} - -/* Gets a slice from a bytestring. */ -Py_LOCAL_INLINE(PyObject*) bytes_slice(PyObject* string, Py_ssize_t start, - Py_ssize_t end) { - Py_ssize_t length; - char* buffer; - - length = PyString_GET_SIZE(string); - start = limited_range(start, 0, length); - end = limited_range(end, 0, length); - - buffer = PyString_AsString(string); - - return PyString_FromStringAndSize(buffer + start, end - start); -} - -/* Gets a slice from a string, returning either a Unicode string or a - * bytestring. - */ -Py_LOCAL_INLINE(PyObject*) get_slice(PyObject* string, Py_ssize_t start, - Py_ssize_t end) { - if (PyUnicode_Check(string)) - return unicode_slice(string, start, end); - - if (PyString_Check(string)) - return bytes_slice(string, start, end); - - return PySequence_GetSlice(string, start, end); -} - -/* Gets a MatchObject's group by integer index. */ -static PyObject* match_get_group_by_index(MatchObject* self, Py_ssize_t index, - PyObject* def) { - RE_GroupSpan* span; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) - return get_slice(self->substring, self->match_start - - self->substring_offset, self->match_end - self->substring_offset); - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - span = &self->groups[index - 1].span; - - if (span->start < 0 || span->end < 0) { - /* Return default value if the string or group is undefined. */ - Py_INCREF(def); - return def; - } - - return get_slice(self->substring, span->start - self->substring_offset, - span->end - self->substring_offset); -} - -/* Gets a MatchObject's start by integer index. */ -static PyObject* match_get_start_by_index(MatchObject* self, Py_ssize_t index) - { - RE_GroupSpan* span; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) - return Py_BuildValue("n", self->match_start); - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - span = &self->groups[index - 1].span; - return Py_BuildValue("n", span->start); -} - -/* Gets a MatchObject's starts by integer index. */ -static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index) - { - RE_GroupData* group; - PyObject* result; - PyObject* item; - size_t i; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) { - result = PyList_New(1); - if (!result) - return NULL; - - item = Py_BuildValue("n", self->match_start); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, 0, item); - - return result; - } - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - group = &self->groups[index - 1]; - - result = PyList_New((Py_ssize_t)group->capture_count); - if (!result) - return NULL; - - for (i = 0; i < group->capture_count; i++) { - item = Py_BuildValue("n", group->captures[i].start); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, i, item); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* Gets a MatchObject's end by integer index. */ -static PyObject* match_get_end_by_index(MatchObject* self, Py_ssize_t index) { - RE_GroupSpan* span; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) - return Py_BuildValue("n", self->match_end); - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - span = &self->groups[index - 1].span; - return Py_BuildValue("n", span->end); -} - -/* Gets a MatchObject's ends by integer index. */ -static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) { - RE_GroupData* group; - PyObject* result; - PyObject* item; - size_t i; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) { - result = PyList_New(1); - if (!result) - return NULL; - - item = Py_BuildValue("n", self->match_end); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, 0, item); - - return result; - } - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - group = &self->groups[index - 1]; - - result = PyList_New((Py_ssize_t)group->capture_count); - if (!result) - return NULL; - - for (i = 0; i < group->capture_count; i++) { - item = Py_BuildValue("n", group->captures[i].end); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, i, item); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* Gets a MatchObject's span by integer index. */ -static PyObject* match_get_span_by_index(MatchObject* self, Py_ssize_t index) { - RE_GroupSpan* span; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) - return Py_BuildValue("nn", self->match_start, self->match_end); - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - span = &self->groups[index - 1].span; - return Py_BuildValue("nn", span->start, span->end); -} - -/* Gets a MatchObject's spans by integer index. */ -static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index) - { - PyObject* result; - PyObject* item; - RE_GroupData* group; - size_t i; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) { - result = PyList_New(1); - if (!result) - return NULL; - - item = Py_BuildValue("nn", self->match_start, self->match_end); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, 0, item); - - return result; - } - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - group = &self->groups[index - 1]; - - result = PyList_New((Py_ssize_t)group->capture_count); - if (!result) - return NULL; - - for (i = 0; i < group->capture_count; i++) { - item = Py_BuildValue("nn", group->captures[i].start, - group->captures[i].end); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, i, item); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* Gets a MatchObject's captures by integer index. */ -static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t - index) { - PyObject* result; - PyObject* slice; - RE_GroupData* group; - size_t i; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) { - result = PyList_New(1); - if (!result) - return NULL; - - slice = get_slice(self->substring, self->match_start - - self->substring_offset, self->match_end - self->substring_offset); - if (!slice) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, 0, slice); - - return result; - } - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - group = &self->groups[index - 1]; - - result = PyList_New((Py_ssize_t)group->capture_count); - if (!result) - return NULL; - - for (i = 0; i < group->capture_count; i++) { - slice = get_slice(self->substring, group->captures[i].start - - self->substring_offset, group->captures[i].end - - self->substring_offset); - if (!slice) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, i, slice); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* Converts a group index to an integer. */ -Py_LOCAL_INLINE(Py_ssize_t) as_group_index(PyObject* obj) { - Py_ssize_t value; - - value = PyInt_AsSsize_t(obj); - if (!(value == -1 && PyErr_Occurred())) - return value; - - PyErr_Clear(); - - value = PyLong_AsLong(obj); - if (!(value == -1 && PyErr_Occurred())) - return value; - - set_error(RE_ERROR_INDEX, NULL); - return -1; -} - -/* Gets a MatchObject's group index. - * - * The supplied index can be an integer or a string (group name) object. - */ -Py_LOCAL_INLINE(Py_ssize_t) match_get_group_index(MatchObject* self, PyObject* - index, BOOL allow_neg) { - Py_ssize_t group; - - /* Is the index an integer? */ - group = as_group_index(index); - if (!(group == -1 && PyErr_Occurred())) { - Py_ssize_t min_group = 0; - - /* Adjust negative indices where valid and allowed. */ - if (group < 0 && allow_neg) { - group += (Py_ssize_t)self->group_count + 1; - min_group = 1; - } - - if (min_group <= group && (size_t)group <= self->group_count) - return group; - - return -1; - } - - PyErr_Clear(); - - /* The index might be a group name. */ - if (self->pattern->groupindex) { - /* Look up the name. */ - index = PyObject_GetItem(self->pattern->groupindex, index); - if (index) { - /* Check that we have an integer. */ - group = as_group_index(index); - Py_DECREF(index); - if (!(group == -1 && PyErr_Occurred())) - return group; - } - } - - PyErr_Clear(); - return -1; -} - -/* Gets a MatchObject's group by object index. */ -Py_LOCAL_INLINE(PyObject*) match_get_group(MatchObject* self, PyObject* index, - PyObject* def, BOOL allow_neg) { - /* Check that the index is an integer or a string. */ - if (PyInt_Check(index) || PyLong_Check(index) || PyUnicode_Check(index) || - PyString_Check(index)) - return match_get_group_by_index(self, match_get_group_index(self, - index, allow_neg), def); - - set_error(RE_ERROR_GROUP_INDEX_TYPE, index); - return NULL; -} - -/* Gets info from a MatchObject by object index. */ -Py_LOCAL_INLINE(PyObject*) get_by_arg(MatchObject* self, PyObject* index, - RE_GetByIndexFunc get_by_index) { - /* Check that the index is an integer or a string. */ - if (PyInt_Check(index) || PyLong_Check(index) || PyUnicode_Check(index) || - PyString_Check(index)) - return get_by_index(self, match_get_group_index(self, index, FALSE)); - - set_error(RE_ERROR_GROUP_INDEX_TYPE, index); - return NULL; -} - -/* MatchObject's 'group' method. */ -static PyObject* match_group(MatchObject* self, PyObject* args) { - Py_ssize_t size; - PyObject* result; - Py_ssize_t i; - - size = PyTuple_GET_SIZE(args); - - switch (size) { - case 0: - /* group() */ - result = match_get_group_by_index(self, 0, Py_None); - break; - case 1: - /* group(x). PyTuple_GET_ITEM borrows the reference. */ - result = match_get_group(self, PyTuple_GET_ITEM(args, 0), Py_None, - FALSE); - break; - default: - /* group(x, y, z, ...) */ - /* Fetch multiple items. */ - result = PyTuple_New(size); - if (!result) - return NULL; - - for (i = 0; i < size; i++) { - PyObject* item; - - /* PyTuple_GET_ITEM borrows the reference. */ - item = match_get_group(self, PyTuple_GET_ITEM(args, i), Py_None, - FALSE); - if (!item) { - Py_DECREF(result); - return NULL; - } - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(result, i, item); - } - break; - } - - return result; -} - -/* Generic method for getting info from a MatchObject. */ -Py_LOCAL_INLINE(PyObject*) get_from_match(MatchObject* self, PyObject* args, - RE_GetByIndexFunc get_by_index) { - Py_ssize_t size; - PyObject* result; - Py_ssize_t i; - - size = PyTuple_GET_SIZE(args); - - switch (size) { - case 0: - /* get() */ - result = get_by_index(self, 0); - break; - case 1: - /* get(x). PyTuple_GET_ITEM borrows the reference. */ - result = get_by_arg(self, PyTuple_GET_ITEM(args, 0), get_by_index); - break; - default: - /* get(x, y, z, ...) */ - /* Fetch multiple items. */ - result = PyTuple_New(size); - if (!result) - return NULL; - - for (i = 0; i < size; i++) { - PyObject* item; - - /* PyTuple_GET_ITEM borrows the reference. */ - item = get_by_arg(self, PyTuple_GET_ITEM(args, i), get_by_index); - if (!item) { - Py_DECREF(result); - return NULL; - } - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(result, i, item); - } - break; - } - - return result; -} - -/* MatchObject's 'start' method. */ -static PyObject* match_start(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_start_by_index); -} - -/* MatchObject's 'starts' method. */ -static PyObject* match_starts(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_starts_by_index); -} - -/* MatchObject's 'end' method. */ -static PyObject* match_end(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_end_by_index); -} - -/* MatchObject's 'ends' method. */ -static PyObject* match_ends(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_ends_by_index); -} - -/* MatchObject's 'span' method. */ -static PyObject* match_span(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_span_by_index); -} - -/* MatchObject's 'spans' method. */ -static PyObject* match_spans(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_spans_by_index); -} - -/* MatchObject's 'captures' method. */ -static PyObject* match_captures(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_captures_by_index); -} - -/* MatchObject's 'groups' method. */ -static PyObject* match_groups(MatchObject* self, PyObject* args, PyObject* - kwargs) { - PyObject* result; - size_t g; - - PyObject* def = Py_None; - static char* kwlist[] = { "default", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:groups", kwlist, &def)) - return NULL; - - result = PyTuple_New((Py_ssize_t)self->group_count); - if (!result) - return NULL; - - /* Group 0 is the entire matched portion of the string. */ - for (g = 0; g < self->group_count; g++) { - PyObject* item; - - item = match_get_group_by_index(self, (Py_ssize_t)g + 1, def); - if (!item) - goto error; - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(result, g, item); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* MatchObject's 'groupdict' method. */ -static PyObject* match_groupdict(MatchObject* self, PyObject* args, PyObject* - kwargs) { - PyObject* result; - PyObject* keys; - Py_ssize_t g; - - PyObject* def = Py_None; - static char* kwlist[] = { "default", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:groupdict", kwlist, - &def)) - return NULL; - - result = PyDict_New(); - if (!result || !self->pattern->groupindex) - return result; - - keys = PyMapping_Keys(self->pattern->groupindex); - if (!keys) - goto failed; - - for (g = 0; g < PyList_GET_SIZE(keys); g++) { - PyObject* key; - PyObject* value; - int status; - - /* PyList_GET_ITEM borrows a reference. */ - key = PyList_GET_ITEM(keys, g); - if (!key) - goto failed; - - value = match_get_group(self, key, def, FALSE); - if (!value) - goto failed; - - status = PyDict_SetItem(result, key, value); - Py_DECREF(value); - if (status < 0) - goto failed; - } - - Py_DECREF(keys); - - return result; - -failed: - Py_XDECREF(keys); - Py_DECREF(result); - return NULL; -} - -/* MatchObject's 'capturesdict' method. */ -static PyObject* match_capturesdict(MatchObject* self) { - PyObject* result; - PyObject* keys; - Py_ssize_t g; - - result = PyDict_New(); - if (!result || !self->pattern->groupindex) - return result; - - keys = PyMapping_Keys(self->pattern->groupindex); - if (!keys) - goto failed; - - for (g = 0; g < PyList_GET_SIZE(keys); g++) { - PyObject* key; - Py_ssize_t group; - PyObject* captures; - int status; - - /* PyList_GET_ITEM borrows a reference. */ - key = PyList_GET_ITEM(keys, g); - if (!key) - goto failed; - - group = match_get_group_index(self, key, FALSE); - if (group < 0) - goto failed; - - captures = match_get_captures_by_index(self, group); - if (!captures) - goto failed; - - status = PyDict_SetItem(result, key, captures); - Py_DECREF(captures); - if (status < 0) - goto failed; - } - - Py_DECREF(keys); - - return result; - -failed: - Py_XDECREF(keys); - Py_DECREF(result); - return NULL; -} - -/* Gets a Python object by name from a named module. */ -Py_LOCAL_INLINE(PyObject*) get_object(char* module_name, char* object_name) { - PyObject* module; - PyObject* object; - - module = PyImport_ImportModule(module_name); - if (!module) - return NULL; - - object = PyObject_GetAttrString(module, object_name); - Py_DECREF(module); - - return object; -} - -/* Calls a function in a module. */ -Py_LOCAL_INLINE(PyObject*) call(char* module_name, char* function_name, - PyObject* args) { - PyObject* function; - PyObject* result; - - if (!args) - return NULL; - - function = get_object(module_name, function_name); - if (!function) - return NULL; - - result = PyObject_CallObject(function, args); - Py_DECREF(function); - Py_DECREF(args); - - return result; -} - -/* Gets a replacement item from the replacement list. - * - * The replacement item could be a string literal or a group. - */ -Py_LOCAL_INLINE(PyObject*) get_match_replacement(MatchObject* self, PyObject* - item, size_t group_count) { - Py_ssize_t index; - - if (PyUnicode_Check(item) || PyString_Check(item)) { - /* It's a literal, which can be added directly to the list. */ - Py_INCREF(item); - return item; - } - - /* Is it a group reference? */ - index = as_group_index(item); - if (index == -1 && PyErr_Occurred()) { - /* Not a group either! */ - set_error(RE_ERROR_REPLACEMENT, NULL); - return NULL; - } - - if (index == 0) { - /* The entire matched portion of the string. */ - return get_slice(self->substring, self->match_start - - self->substring_offset, self->match_end - self->substring_offset); - } else if (index >= 1 && (size_t)index <= group_count) { - /* A group. If it didn't match then return None instead. */ - RE_GroupData* group; - - group = &self->groups[index - 1]; - - if (group->capture_count > 0) - return get_slice(self->substring, group->span.start - - self->substring_offset, group->span.end - - self->substring_offset); - else { - Py_INCREF(Py_None); - return Py_None; - } - } else { - /* No such group. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } -} - -/* Initialises the join list. */ -Py_LOCAL_INLINE(void) init_join_list(RE_JoinInfo* join_info, BOOL reversed, - BOOL is_unicode) { - join_info->list = NULL; - join_info->item = NULL; - join_info->reversed = reversed; - join_info->is_unicode = is_unicode; -} - -/* Adds an item to the join list. */ -Py_LOCAL_INLINE(int) add_to_join_list(RE_JoinInfo* join_info, PyObject* item) { - PyObject* new_item; - int status; - - if (join_info->is_unicode) { - if (PyUnicode_Check(item)) { - new_item = item; - Py_INCREF(new_item); - } else { - new_item = PyUnicode_FromObject(item); - if (!new_item) { - set_error(RE_ERROR_NOT_UNICODE, item); - return RE_ERROR_NOT_UNICODE; - } - } - } else { - if (PyString_Check(item)) { - new_item = item; - Py_INCREF(new_item); - } else { - new_item = PyUnicode_FromObject(item); - if (!new_item) { - set_error(RE_ERROR_NOT_STRING, item); - return RE_ERROR_NOT_STRING; - } - } - } - - /* If the list already exists then just add the item to it. */ - if (join_info->list) { - status = PyList_Append(join_info->list, new_item); - if (status < 0) - goto error; - - Py_DECREF(new_item); - return status; - } - - /* If we already have an item then we now have 2(!) and we need to put them - * into a list. - */ - if (join_info->item) { - join_info->list = PyList_New(2); - if (!join_info->list) { - status = RE_ERROR_MEMORY; - goto error; - } - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(join_info->list, 0, join_info->item); - join_info->item = NULL; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(join_info->list, 1, new_item); - return 0; - } - - /* This is the first item. */ - join_info->item = new_item; - - return 0; - -error: - Py_DECREF(new_item); - set_error(status, NULL); - return status; -} - -/* Clears the join list. */ -Py_LOCAL_INLINE(void) clear_join_list(RE_JoinInfo* join_info) { - Py_XDECREF(join_info->list); - Py_XDECREF(join_info->item); -} - -/* Joins together a list of strings for pattern_subx. */ -Py_LOCAL_INLINE(PyObject*) join_list_info(RE_JoinInfo* join_info) { - /* If the list already exists then just do the join. */ - if (join_info->list) { - PyObject* joiner; - PyObject* result; - - if (join_info->reversed) - /* The list needs to be reversed before being joined. */ - PyList_Reverse(join_info->list); - - if (join_info->is_unicode) { - /* Concatenate the Unicode strings. */ - joiner = PyUnicode_FromUnicode(NULL, 0); - if (!joiner) { - clear_join_list(join_info); - return NULL; - } - - result = PyUnicode_Join(joiner, join_info->list); - } else { - joiner = PyString_FromString(""); - if (!joiner) { - clear_join_list(join_info); - return NULL; - } - - /* Concatenate the bytestrings. */ - result = _PyString_Join(joiner, join_info->list); - } - - Py_DECREF(joiner); - clear_join_list(join_info); - - return result; - } - - /* If we have only 1 item, so we'll just return it. */ - if (join_info->item) - return join_info->item; - - /* There are no items, so return an empty string. */ - if (join_info->is_unicode) - return PyUnicode_FromUnicode(NULL, 0); - else - return PyString_FromString(""); -} - -/* Checks whether a string replacement is a literal. - * - * To keep it simple we'll say that a literal is a string which can be used - * as-is. - * - * Returns its length if it is a literal, otherwise -1. - */ -Py_LOCAL_INLINE(Py_ssize_t) check_replacement_string(PyObject* str_replacement, - unsigned char special_char) { - RE_StringInfo str_info; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - Py_ssize_t pos; - - if (!get_string(str_replacement, &str_info)) - return -1; - - switch (str_info.charsize) { - case 1: - char_at = bytes1_char_at; - break; - case 2: - char_at = bytes2_char_at; - break; - case 4: - char_at = bytes4_char_at; - break; - default: -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); -#endif - return -1; - } - - for (pos = 0; pos < str_info.length; pos++) { - if (char_at(str_info.characters, pos) == special_char) { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return -1; - } - } - -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return str_info.length; -} - -/* MatchObject's 'expand' method. */ -static PyObject* match_expand(MatchObject* self, PyObject* str_template) { - Py_ssize_t literal_length; - PyObject* replacement; - RE_JoinInfo join_info; - Py_ssize_t size; - Py_ssize_t i; - - /* Is the template just a literal? */ - literal_length = check_replacement_string(str_template, '\\'); - if (literal_length >= 0) { - /* It's a literal. */ - Py_INCREF(str_template); - return str_template; - } - - /* Hand the template to the template compiler. */ - replacement = call(RE_MODULE, "_compile_replacement_helper", - PyTuple_Pack(2, self->pattern, str_template)); - if (!replacement) - return NULL; - - init_join_list(&join_info, FALSE, PyUnicode_Check(self->string)); - - /* Add each part of the template to the list. */ - size = PyList_GET_SIZE(replacement); - for (i = 0; i < size; i++) { - PyObject* item; - PyObject* str_item; - - /* PyList_GET_ITEM borrows a reference. */ - item = PyList_GET_ITEM(replacement, i); - str_item = get_match_replacement(self, item, self->group_count); - if (!str_item) - goto error; - - /* Add to the list. */ - if (str_item == Py_None) - Py_DECREF(str_item); - else { - int status; - - status = add_to_join_list(&join_info, str_item); - Py_DECREF(str_item); - if (status < 0) - goto error; - } - } - - Py_DECREF(replacement); - - /* Convert the list to a single string (also cleans up join_info). */ - return join_list_info(&join_info); - -error: - clear_join_list(&join_info); - Py_DECREF(replacement); - return NULL; -} - -#if PY_VERSION_HEX >= 0x02060000 -/* Gets a MatchObject's group dictionary. */ -Py_LOCAL_INLINE(PyObject*) match_get_group_dict(MatchObject* self) { - PyObject* result; - PyObject* keys; - Py_ssize_t g; - - result = PyDict_New(); - if (!result || !self->pattern->groupindex) - return result; - - keys = PyMapping_Keys(self->pattern->groupindex); - if (!keys) - goto failed; - - for (g = 0; g < PyList_GET_SIZE(keys); g++) { - PyObject* key; - PyObject* value; - int status; - - /* PyList_GET_ITEM borrows a reference. */ - key = PyList_GET_ITEM(keys, g); - if (!key) - goto failed; - - value = match_get_group(self, key, Py_None, FALSE); - if (!value) - goto failed; - - status = PyDict_SetItem(result, key, value); - Py_DECREF(value); - if (status < 0) - goto failed; - } - - Py_DECREF(keys); - - return result; - -failed: - Py_XDECREF(keys); - Py_DECREF(result); - return NULL; -} - -static PyTypeObject Capture_Type = { - PyObject_HEAD_INIT(NULL) - 0, - "_" RE_MODULE "." "Capture", - sizeof(MatchObject) -}; - -/* Creates a new CaptureObject. */ -Py_LOCAL_INLINE(PyObject*) make_capture_object(MatchObject** match_indirect, - Py_ssize_t index) { - CaptureObject* capture; - - capture = PyObject_NEW(CaptureObject, &Capture_Type); - if (!capture) - return NULL; - - capture->group_index = index; - capture->match_indirect = match_indirect; - - return (PyObject*)capture; -} - -#if PY_VERSION_HEX >= 0x02060000 -/* Makes a MatchObject's capture dictionary. */ -Py_LOCAL_INLINE(PyObject*) make_capture_dict(MatchObject* match, MatchObject** - match_indirect) { - PyObject* result; - PyObject* keys; - PyObject* values = NULL; - Py_ssize_t g; - - result = PyDict_New(); - if (!result) - return result; - - keys = PyMapping_Keys(match->pattern->groupindex); - if (!keys) - goto failed; - - values = PyMapping_Values(match->pattern->groupindex); - if (!values) - goto failed; - - for (g = 0; g < PyList_GET_SIZE(keys); g++) { - PyObject* key; - PyObject* value; - Py_ssize_t v; - int status; - - /* PyList_GET_ITEM borrows a reference. */ - key = PyList_GET_ITEM(keys, g); - if (!key) - goto failed; - - /* PyList_GET_ITEM borrows a reference. */ - value = PyList_GET_ITEM(values, g); - if (!value) - goto failed; - - v = PyLong_AsLong(value); - if (v == -1 && PyErr_Occurred()) - goto failed; - - value = make_capture_object(match_indirect, v); - if (!value) - goto failed; - - status = PyDict_SetItem(result, key, value); - Py_DECREF(value); - if (status < 0) - goto failed; - } - - Py_DECREF(values); - Py_DECREF(keys); - - return result; - -failed: - Py_XDECREF(values); - Py_XDECREF(keys); - Py_DECREF(result); - return NULL; -} -#endif - -/* MatchObject's 'expandf' method. */ -static PyObject* match_expandf(MatchObject* self, PyObject* str_template) { - PyObject* format_func; - PyObject* args = NULL; - size_t g; - PyObject* kwargs = NULL; - PyObject* result; - - format_func = PyObject_GetAttrString(str_template, "format"); - if (!format_func) - return NULL; - - args = PyTuple_New((Py_ssize_t)self->group_count + 1); - if (!args) - goto error; - - for (g = 0; g < self->group_count + 1; g++) - /* PyTuple_SetItem borrows the reference. */ - PyTuple_SetItem(args, (Py_ssize_t)g, make_capture_object(&self, - (Py_ssize_t)g)); - - kwargs = make_capture_dict(self, &self); - if (!kwargs) - goto error; - - result = PyObject_Call(format_func, args, kwargs); - - Py_DECREF(kwargs); - Py_DECREF(args); - Py_DECREF(format_func); - - return result; - -error: - Py_XDECREF(args); - Py_DECREF(format_func); - return NULL; -} - -#endif -Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self); - -/* MatchObject's '__copy__' method. */ -static PyObject* match_copy(MatchObject* self, PyObject* unused) { - return make_match_copy(self); -} - -/* MatchObject's '__deepcopy__' method. */ -static PyObject* match_deepcopy(MatchObject* self, PyObject* memo) { - return make_match_copy(self); -} - -/* MatchObject's 'regs' attribute. */ -static PyObject* match_regs(MatchObject* self) { - PyObject* regs; - PyObject* item; - size_t g; - - if (self->regs) { - Py_INCREF(self->regs); - - return self->regs; - } - - regs = PyTuple_New((Py_ssize_t)self->group_count + 1); - if (!regs) - return NULL; - - item = Py_BuildValue("nn", self->match_start, self->match_end); - if (!item) - goto error; - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(regs, 0, item); - - for (g = 0; g < self->group_count; g++) { - RE_GroupSpan* span; - - span = &self->groups[g].span; - item = Py_BuildValue("nn", span->start, span->end); - if (!item) - goto error; - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(regs, g + 1, item); - } - - self->regs = regs; - - Py_INCREF(self->regs); - - return self->regs; - -error: - Py_DECREF(regs); - return NULL; -} - -/* MatchObject's slice method. */ -Py_LOCAL_INLINE(PyObject*) match_get_group_slice(MatchObject* self, PyObject* - slice) { - Py_ssize_t start; - Py_ssize_t end; - Py_ssize_t step; - Py_ssize_t slice_length; - - if (PySlice_GetIndicesEx((PySliceObject*)slice, - (Py_ssize_t)self->group_count + 1, &start, &end, &step, &slice_length) < - 0) - return NULL; - - if (slice_length <= 0) - return PyTuple_New(0); - else { - PyObject* result; - Py_ssize_t cur; - Py_ssize_t i; - - result = PyTuple_New(slice_length); - if (!result) - return NULL; - - cur = start; - for (i = 0; i < slice_length; i++) { - /* PyTuple_SetItem borrows the reference. */ - PyTuple_SetItem(result, i, match_get_group_by_index(self, cur, - Py_None)); - cur += step; - } - - return result; - } -} - -/* MatchObject's length method. */ -Py_LOCAL_INLINE(Py_ssize_t) match_length(MatchObject* self) { - return (Py_ssize_t)self->group_count + 1; -} - -/* MatchObject's '__getitem__' method. */ -static PyObject* match_getitem(MatchObject* self, PyObject* item) { - if (PySlice_Check(item)) - return match_get_group_slice(self, item); - - return match_get_group(self, item, Py_None, TRUE); -} - -/* Determines the portion of the target string which is covered by the group - * captures. - */ -Py_LOCAL_INLINE(void) determine_target_substring(MatchObject* match, - Py_ssize_t* slice_start, Py_ssize_t* slice_end) { - Py_ssize_t start; - Py_ssize_t end; - size_t g; - - start = match->pos; - end = match->endpos; - - for (g = 0; g < match->group_count; g++) { - RE_GroupSpan* span; - size_t c; - - span = &match->groups[g].span; - if (span->start >= 0 && span->start < start) - start = span->start; - if (span->end >= 0 && span->end > end) - end = span->end; - - for (c = 0; c < match->groups[g].capture_count; c++) { - RE_GroupSpan* span; - - span = match->groups[g].captures; - if (span->start >= 0 && span->start < start) - start = span->start; - if (span->end >= 0 && span->end > end) - end = span->end; - } - } - - *slice_start = start; - *slice_end = end; -} - -/* MatchObject's 'detach_string' method. */ -static PyObject* match_detach_string(MatchObject* self, PyObject* unused) { - if (self->string) { - Py_ssize_t start; - Py_ssize_t end; - PyObject* substring; - - determine_target_substring(self, &start, &end); - - substring = get_slice(self->string, start, end); - if (substring) { - Py_XDECREF(self->substring); - self->substring = substring; - self->substring_offset = start; - - Py_DECREF(self->string); - self->string = NULL; - } - } - - Py_INCREF(Py_None); - return Py_None; -} - -/* The documentation of a MatchObject. */ -PyDoc_STRVAR(match_group_doc, - "group([group1, ...]) --> string or tuple of strings.\n\ - Return one or more subgroups of the match. If there is a single argument,\n\ - the result is a single string, or None if the group did not contribute to\n\ - the match; if there are multiple arguments, the result is a tuple with one\n\ - item per argument; if there are no arguments, the whole match is returned.\n\ - Group 0 is the whole match."); - -PyDoc_STRVAR(match_start_doc, - "start([group1, ...]) --> int or tuple of ints.\n\ - Return the index of the start of one or more subgroups of the match. If\n\ - there is a single argument, the result is an index, or -1 if the group did\n\ - not contribute to the match; if there are multiple arguments, the result is\n\ - a tuple with one item per argument; if there are no arguments, the index of\n\ - the start of the whole match is returned. Group 0 is the whole match."); - -PyDoc_STRVAR(match_end_doc, - "end([group1, ...]) --> int or tuple of ints.\n\ - Return the index of the end of one or more subgroups of the match. If there\n\ - is a single argument, the result is an index, or -1 if the group did not\n\ - contribute to the match; if there are multiple arguments, the result is a\n\ - tuple with one item per argument; if there are no arguments, the index of\n\ - the end of the whole match is returned. Group 0 is the whole match."); - -PyDoc_STRVAR(match_span_doc, - "span([group1, ...]) --> 2-tuple of int or tuple of 2-tuple of ints.\n\ - Return the span (a 2-tuple of the indices of the start and end) of one or\n\ - more subgroups of the match. If there is a single argument, the result is a\n\ - span, or (-1, -1) if the group did not contribute to the match; if there are\n\ - multiple arguments, the result is a tuple with one item per argument; if\n\ - there are no arguments, the span of the whole match is returned. Group 0 is\n\ - the whole match."); - -PyDoc_STRVAR(match_groups_doc, - "groups(default=None) --> tuple of strings.\n\ - Return a tuple containing all the subgroups of the match. The argument is\n\ - the default for groups that did not participate in the match."); - -PyDoc_STRVAR(match_groupdict_doc, - "groupdict(default=None) --> dict.\n\ - Return a dictionary containing all the named subgroups of the match, keyed\n\ - by the subgroup name. The argument is the value to be given for groups that\n\ - did not participate in the match."); - -PyDoc_STRVAR(match_capturesdict_doc, - "capturesdict() --> dict.\n\ - Return a dictionary containing the captures of all the named subgroups of the\n\ - match, keyed by the subgroup name."); - -PyDoc_STRVAR(match_expand_doc, - "expand(template) --> string.\n\ - Return the string obtained by doing backslash substitution on the template,\n\ - as done by the sub() method."); - -#if PY_VERSION_HEX >= 0x02060000 -PyDoc_STRVAR(match_expandf_doc, - "expandf(format) --> string.\n\ - Return the string obtained by using the format, as done by the subf()\n\ - method."); - -#endif -PyDoc_STRVAR(match_captures_doc, - "captures([group1, ...]) --> list of strings or tuple of list of strings.\n\ - Return the captures of one or more subgroups of the match. If there is a\n\ - single argument, the result is a list of strings; if there are multiple\n\ - arguments, the result is a tuple of lists with one item per argument; if\n\ - there are no arguments, the captures of the whole match is returned. Group\n\ - 0 is the whole match."); - -PyDoc_STRVAR(match_starts_doc, - "starts([group1, ...]) --> list of ints or tuple of list of ints.\n\ - Return the indices of the starts of the captures of one or more subgroups of\n\ - the match. If there is a single argument, the result is a list of indices;\n\ - if there are multiple arguments, the result is a tuple of lists with one\n\ - item per argument; if there are no arguments, the indices of the starts of\n\ - the captures of the whole match is returned. Group 0 is the whole match."); - -PyDoc_STRVAR(match_ends_doc, - "ends([group1, ...]) --> list of ints or tuple of list of ints.\n\ - Return the indices of the ends of the captures of one or more subgroups of\n\ - the match. If there is a single argument, the result is a list of indices;\n\ - if there are multiple arguments, the result is a tuple of lists with one\n\ - item per argument; if there are no arguments, the indices of the ends of the\n\ - captures of the whole match is returned. Group 0 is the whole match."); - -PyDoc_STRVAR(match_spans_doc, - "spans([group1, ...]) --> list of 2-tuple of ints or tuple of list of 2-tuple of ints.\n\ - Return the spans (a 2-tuple of the indices of the start and end) of the\n\ - captures of one or more subgroups of the match. If there is a single\n\ - argument, the result is a list of spans; if there are multiple arguments,\n\ - the result is a tuple of lists with one item per argument; if there are no\n\ - arguments, the spans of the captures of the whole match is returned. Group\n\ - 0 is the whole match."); - -PyDoc_STRVAR(match_detach_string_doc, - "detach_string()\n\ - Detaches the target string from the match object. The 'string' attribute\n\ - will become None."); - -/* MatchObject's methods. */ -static PyMethodDef match_methods[] = { - {"group", (PyCFunction)match_group, METH_VARARGS, match_group_doc}, - {"start", (PyCFunction)match_start, METH_VARARGS, match_start_doc}, - {"end", (PyCFunction)match_end, METH_VARARGS, match_end_doc}, - {"span", (PyCFunction)match_span, METH_VARARGS, match_span_doc}, - {"groups", (PyCFunction)match_groups, METH_VARARGS|METH_KEYWORDS, - match_groups_doc}, - {"groupdict", (PyCFunction)match_groupdict, METH_VARARGS|METH_KEYWORDS, - match_groupdict_doc}, - {"capturesdict", (PyCFunction)match_capturesdict, METH_NOARGS, - match_capturesdict_doc}, - {"expand", (PyCFunction)match_expand, METH_O, match_expand_doc}, -#if PY_VERSION_HEX >= 0x02060000 - {"expandf", (PyCFunction)match_expandf, METH_O, match_expandf_doc}, -#endif - {"captures", (PyCFunction)match_captures, METH_VARARGS, - match_captures_doc}, - {"starts", (PyCFunction)match_starts, METH_VARARGS, match_starts_doc}, - {"ends", (PyCFunction)match_ends, METH_VARARGS, match_ends_doc}, - {"spans", (PyCFunction)match_spans, METH_VARARGS, match_spans_doc}, - {"detach_string", (PyCFunction)match_detach_string, METH_NOARGS, - match_detach_string_doc}, - {"__copy__", (PyCFunction)match_copy, METH_NOARGS}, - {"__deepcopy__", (PyCFunction)match_deepcopy, METH_O}, - {"__getitem__", (PyCFunction)match_getitem, METH_O|METH_COEXIST}, - {NULL, NULL} -}; - -PyDoc_STRVAR(match_doc, "Match object"); - -/* MatchObject's 'lastindex' attribute. */ -static PyObject* match_lastindex(PyObject* self_) { - MatchObject* self; - - self = (MatchObject*)self_; - - if (self->lastindex >= 0) - return Py_BuildValue("n", self->lastindex); - - Py_INCREF(Py_None); - return Py_None; -} - -/* MatchObject's 'lastgroup' attribute. */ -static PyObject* match_lastgroup(PyObject* self_) { - MatchObject* self; - - self = (MatchObject*)self_; - - if (self->pattern->indexgroup && self->lastgroup >= 0) { - PyObject* index; - PyObject* result; - - index = Py_BuildValue("n", self->lastgroup); - - /* PyDict_GetItem returns borrows a reference. */ - result = PyDict_GetItem(self->pattern->indexgroup, index); - Py_DECREF(index); - if (result) { - Py_INCREF(result); - return result; - } - PyErr_Clear(); - } - - Py_INCREF(Py_None); - return Py_None; -} - -/* MatchObject's 'string' attribute. */ -static PyObject* match_string(PyObject* self_) { - MatchObject* self; - - self = (MatchObject*)self_; - - if (self->string) { - Py_INCREF(self->string); - return self->string; - } else { - Py_INCREF(Py_None); - return Py_None; - } -} -#if PY_VERSION_HEX < 0x02060000 - -/* MatchObject's 'partial' attribute. */ -static PyObject* match_partial(PyObject* self_) { - MatchObject* self; - PyObject* result; - - self = (MatchObject*)self_; - - result = self->partial ? Py_True : Py_False; - Py_INCREF(result); - - return result; -} -#endif - -/* MatchObject's 'fuzzy_counts' attribute. */ -static PyObject* match_fuzzy_counts(PyObject* self_) { - MatchObject* self; - - self = (MatchObject*)self_; - - return Py_BuildValue("nnn", self->fuzzy_counts[RE_FUZZY_SUB], - self->fuzzy_counts[RE_FUZZY_INS], self->fuzzy_counts[RE_FUZZY_DEL]); -} - -static PyGetSetDef match_getset[] = { - {"lastindex", (getter)match_lastindex, (setter)NULL, - "The group number of the last matched capturing group, or None."}, - {"lastgroup", (getter)match_lastgroup, (setter)NULL, - "The name of the last matched capturing group, or None."}, - {"regs", (getter)match_regs, (setter)NULL, - "A tuple of the spans of the capturing groups."}, - {"string", (getter)match_string, (setter)NULL, - "The string that was searched, or None if it has been detached."}, -#if PY_VERSION_HEX < 0x02060000 - {"partial", (getter)match_partial, (setter)NULL, - "Whether it's a partial match."}, -#endif - {"fuzzy_counts", (getter)match_fuzzy_counts, (setter)NULL, - "A tuple of the number of substitutions, insertions and deletions."}, - {NULL} /* Sentinel */ -}; - -static PyMemberDef match_members[] = { - {"re", T_OBJECT, offsetof(MatchObject, pattern), READONLY, - "The regex object that produced this match object."}, - {"pos", T_PYSSIZET, offsetof(MatchObject, pos), READONLY, - "The position at which the regex engine starting searching."}, - {"endpos", T_PYSSIZET, offsetof(MatchObject, endpos), READONLY, - "The final position beyond which the regex engine won't search."}, -#if PY_VERSION_HEX >= 0x02060000 - {"partial", T_BOOL, offsetof(MatchObject, partial), READONLY, - "Whether it's a partial match."}, -#endif - {NULL} /* Sentinel */ -}; - -static PyMappingMethods match_as_mapping = { - (lenfunc)match_length, /* mp_length */ - (binaryfunc)match_getitem, /* mp_subscript */ - 0, /* mp_ass_subscript */ -}; - -static PyTypeObject Match_Type = { - PyObject_HEAD_INIT(NULL) - 0, - "_" RE_MODULE "." "Match", - sizeof(MatchObject) -}; - -/* Copies the groups. */ -Py_LOCAL_INLINE(RE_GroupData*) copy_groups(RE_GroupData* groups, size_t - group_count) { - size_t span_count; - size_t g; - RE_GroupData* groups_copy; - RE_GroupSpan* spans_copy; - size_t offset; - - /* Calculate the total size of the group info. */ - span_count = 0; - for (g = 0; g < group_count; g++) - span_count += groups[g].capture_count; - - /* Allocate the storage for the group info in a single block. */ - groups_copy = (RE_GroupData*)re_alloc(group_count * sizeof(RE_GroupData) + - span_count * sizeof(RE_GroupSpan)); - if (!groups_copy) - return NULL; - - /* The storage for the spans comes after the other group info. */ - spans_copy = (RE_GroupSpan*)&groups_copy[group_count]; - - /* There's no need to initialise the spans info. */ - memset(groups_copy, 0, group_count * sizeof(RE_GroupData)); - - offset = 0; - for (g = 0; g < group_count; g++) { - RE_GroupData* orig; - RE_GroupData* copy; - - orig = &groups[g]; - copy = &groups_copy[g]; - copy->span = orig->span; - - copy->captures = &spans_copy[offset]; - offset += orig->capture_count; - - if (orig->capture_count > 0) { - Py_MEMCPY(copy->captures, orig->captures, orig->capture_count * - sizeof(RE_GroupSpan)); - copy->capture_capacity = orig->capture_count; - copy->capture_count = orig->capture_count; - } - } - - return groups_copy; -} - -/* Makes a copy of a MatchObject. */ -Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self) { - MatchObject* match; - - if (!self->string) { - /* The target string has been detached, so the MatchObject is now - * immutable. - */ - Py_INCREF(self); - return (PyObject*)self; - } - - /* Create a MatchObject. */ - match = PyObject_NEW(MatchObject, &Match_Type); - if (!match) - return NULL; - - match->string = self->string; - match->substring = self->substring; - match->substring_offset = self->substring_offset; - match->pattern = self->pattern; - match->pos = self->pos; - match->endpos = self->endpos; - match->match_start = self->match_start; - match->match_end = self->match_end; - match->lastindex = self->lastindex; - match->lastgroup = self->lastgroup; - match->group_count = self->group_count; - match->groups = NULL; /* Copy them later. */ - match->regs = self->regs; - Py_MEMCPY(match->fuzzy_counts, self->fuzzy_counts, - sizeof(self->fuzzy_counts)); - match->partial = self->partial; - Py_INCREF(match->string); - Py_INCREF(match->substring); - Py_INCREF(match->pattern); - Py_XINCREF(match->regs); - - /* Copy the groups to the MatchObject. */ - if (self->group_count > 0) { - match->groups = copy_groups(self->groups, self->group_count); - if (!match->groups) { - Py_DECREF(match); - return NULL; - } - } - - return (PyObject*)match; -} - -/* Creates a new MatchObject. */ -Py_LOCAL_INLINE(PyObject*) pattern_new_match(PatternObject* pattern, RE_State* - state, int status) { - /* Create MatchObject (from state object). */ - if (status > 0 || status == RE_ERROR_PARTIAL) { - MatchObject* match; - - /* Create a MatchObject. */ - match = PyObject_NEW(MatchObject, &Match_Type); - if (!match) - return NULL; - - match->string = state->string; - match->substring = state->string; - match->substring_offset = 0; - match->pattern = pattern; - match->regs = NULL; - - if (pattern->is_fuzzy) { - match->fuzzy_counts[RE_FUZZY_SUB] = - state->total_fuzzy_counts[RE_FUZZY_SUB]; - match->fuzzy_counts[RE_FUZZY_INS] = - state->total_fuzzy_counts[RE_FUZZY_INS]; - match->fuzzy_counts[RE_FUZZY_DEL] = - state->total_fuzzy_counts[RE_FUZZY_DEL]; - } else - memset(match->fuzzy_counts, 0, sizeof(match->fuzzy_counts)); - - match->partial = status == RE_ERROR_PARTIAL; - Py_INCREF(match->string); - Py_INCREF(match->substring); - Py_INCREF(match->pattern); - - /* Copy the groups to the MatchObject. */ - if (pattern->public_group_count > 0) { - match->groups = copy_groups(state->groups, - pattern->public_group_count); - if (!match->groups) { - Py_DECREF(match); - return NULL; - } - } else - match->groups = NULL; - - match->group_count = pattern->public_group_count; - - match->pos = state->slice_start; - match->endpos = state->slice_end; - - if (state->reverse) { - match->match_start = state->text_pos; - match->match_end = state->match_pos; - } else { - match->match_start = state->match_pos; - match->match_end = state->text_pos; - } - - match->lastindex = state->lastindex; - match->lastgroup = state->lastgroup; - - return (PyObject*)match; - } else if (status == 0) { - /* No match. */ - Py_INCREF(Py_None); - return Py_None; - } else { - /* Internal error. */ - set_error(status, NULL); - return NULL; - } -} - -/* Gets the text of a capture group from a state. */ -Py_LOCAL_INLINE(PyObject*) state_get_group(RE_State* state, Py_ssize_t index, - PyObject* string, BOOL empty) { - RE_GroupData* group; - Py_ssize_t start; - Py_ssize_t end; - - group = &state->groups[index - 1]; - - if (string != Py_None && index >= 1 && (size_t)index <= - state->pattern->public_group_count && group->capture_count > 0) { - start = group->span.start; - end = group->span.end; - } else { - if (empty) - /* Want an empty string. */ - start = end = 0; - else { - Py_INCREF(Py_None); - return Py_None; - } - } - - return get_slice(string, start, end); -} - -/* Acquires the lock (mutex) on the state if there's one. - * - * It also increments the owner's refcount just to ensure that it won't be - * destroyed by another thread. - */ -Py_LOCAL_INLINE(void) acquire_state_lock(PyObject* owner, RE_SafeState* - safe_state) { - RE_State* state; - - state = safe_state->re_state; - - if (state->lock) { - /* In order to avoid deadlock we need to release the GIL while trying - * to acquire the lock. - */ - Py_INCREF(owner); - if (!PyThread_acquire_lock(state->lock, 0)) { - release_GIL(safe_state); - PyThread_acquire_lock(state->lock, 1); - acquire_GIL(safe_state); - } - } -} - -/* Releases the lock (mutex) on the state if there's one. - * - * It also decrements the owner's refcount, which was incremented when the lock - * was acquired. - */ -Py_LOCAL_INLINE(void) release_state_lock(PyObject* owner, RE_SafeState* - safe_state) { - RE_State* state; - - state = safe_state->re_state; - - if (state->lock) { - PyThread_release_lock(state->lock); - Py_DECREF(owner); - } -} - -/* Implements the functionality of ScanObject's search and match methods. */ -Py_LOCAL_INLINE(PyObject*) scanner_search_or_match(ScannerObject* self, BOOL - search) { - RE_State* state; - RE_SafeState safe_state; - PyObject* match; - - state = &self->state; - - /* Initialise the "safe state" structure. */ - safe_state.re_state = state; - safe_state.thread_state = NULL; - - /* Acquire the state lock in case we're sharing the scanner object across - * threads. - */ - acquire_state_lock((PyObject*)self, &safe_state); - - if (self->status == RE_ERROR_FAILURE || self->status == RE_ERROR_PARTIAL) { - /* No or partial match. */ - release_state_lock((PyObject*)self, &safe_state); - Py_INCREF(Py_None); - return Py_None; - } else if (self->status < 0) { - /* Internal error. */ - release_state_lock((PyObject*)self, &safe_state); - set_error(self->status, NULL); - return NULL; - } - - /* Look for another match. */ - self->status = do_match(&safe_state, search); - if (self->status >= 0 || self->status == RE_ERROR_PARTIAL) { - /* Create the match object. */ - match = pattern_new_match(self->pattern, state, self->status); - - if (search && state->overlapped) { - /* Advance one character. */ - Py_ssize_t step; - - step = state->reverse ? -1 : 1; - state->text_pos = state->match_pos + step; - state->must_advance = FALSE; - } else - /* Continue from where we left off, but don't allow 2 contiguous - * zero-width matches. - */ - state->must_advance = state->text_pos == state->match_pos; - } else - /* Internal error. */ - match = NULL; - - /* Release the state lock. */ - release_state_lock((PyObject*)self, &safe_state); - - return match; -} - -/* ScannerObject's 'match' method. */ -static PyObject* scanner_match(ScannerObject* self, PyObject* unused) { - return scanner_search_or_match(self, FALSE); -} - -/* ScannerObject's 'search' method. */ -static PyObject* scanner_search(ScannerObject* self, PyObject* unused) { - return scanner_search_or_match(self, TRUE); -} - -/* ScannerObject's 'next' method. */ -static PyObject* scanner_next(PyObject* self) { - PyObject* match; - - match = scanner_search((ScannerObject*)self, NULL); - - if (match == Py_None) { - /* No match. */ - Py_DECREF(Py_None); - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - return match; -} - -/* Returns an iterator for a ScannerObject. - * - * The iterator is actually the ScannerObject itself. - */ -static PyObject* scanner_iter(PyObject* self) { - Py_INCREF(self); - return self; -} - -/* Gets the next result from a scanner iterator. */ -static PyObject* scanner_iternext(PyObject* self) { - PyObject* match; - - match = scanner_search((ScannerObject*)self, NULL); - - if (match == Py_None) { - /* No match. */ - Py_DECREF(match); - return NULL; - } - - return match; -} - -/* Makes a copy of a ScannerObject. - * - * It actually doesn't make a copy, just returns the original object. - */ -Py_LOCAL_INLINE(PyObject*) make_scanner_copy(ScannerObject* self) { - Py_INCREF(self); - return (PyObject*)self; -} - -/* ScannerObject's '__copy__' method. */ -static PyObject* scanner_copy(ScannerObject* self, PyObject* unused) { - return make_scanner_copy(self); -} - -/* ScannerObject's '__deepcopy__' method. */ -static PyObject* scanner_deepcopy(ScannerObject* self, PyObject* memo) { - return make_scanner_copy(self); -} - -/* The documentation of a ScannerObject. */ -PyDoc_STRVAR(scanner_match_doc, - "match() --> MatchObject or None.\n\ - Match at the current position in the string."); - -PyDoc_STRVAR(scanner_search_doc, - "search() --> MatchObject or None.\n\ - Search from the current position in the string."); - -/* ScannerObject's methods. */ -static PyMethodDef scanner_methods[] = { - {"next", (PyCFunction)scanner_next, METH_NOARGS}, - {"match", (PyCFunction)scanner_match, METH_NOARGS, scanner_match_doc}, - {"search", (PyCFunction)scanner_search, METH_NOARGS, scanner_search_doc}, - {"__copy__", (PyCFunction)scanner_copy, METH_NOARGS}, - {"__deepcopy__", (PyCFunction)scanner_deepcopy, METH_O}, - {NULL, NULL} -}; - -PyDoc_STRVAR(scanner_doc, "Scanner object"); - -/* Deallocates a ScannerObject. */ -static void scanner_dealloc(PyObject* self_) { - ScannerObject* self; - - self = (ScannerObject*)self_; - - if (self->status != RE_ERROR_INITIALISING) - state_fini(&self->state); - Py_DECREF(self->pattern); - PyObject_DEL(self); -} - -static PyMemberDef scanner_members[] = { - {"pattern", T_OBJECT, offsetof(ScannerObject, pattern), READONLY, - "The regex object that produced this scanner object."}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject Scanner_Type = { - PyObject_HEAD_INIT(NULL) - 0, - "_" RE_MODULE "." "Scanner", - sizeof(ScannerObject) -}; - -/* Decodes a 'concurrent' argument. */ -Py_LOCAL_INLINE(int) decode_concurrent(PyObject* concurrent) { - Py_ssize_t value; - - if (concurrent == Py_None) - return RE_CONC_DEFAULT; - - value = PyLong_AsLong(concurrent); - if (value == -1 && PyErr_Occurred()) { - set_error(RE_ERROR_CONCURRENT, NULL); - return -1; - } - - return value ? RE_CONC_YES : RE_CONC_NO; -} - -/* Decodes a 'partial' argument. */ -Py_LOCAL_INLINE(BOOL) decode_partial(PyObject* partial) { - Py_ssize_t value; - - if (partial == Py_False) - return FALSE; - - if (partial == Py_True) - return TRUE; - - value = PyLong_AsLong(partial); - if (value == -1 && PyErr_Occurred()) { - PyErr_Clear(); - return TRUE; - } - - return value != 0; -} - -/* Creates a new ScannerObject. */ -static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args, - PyObject* kwargs) { - /* Create search state object. */ - ScannerObject* self; - Py_ssize_t start; - Py_ssize_t end; - int conc; - BOOL part; - - PyObject* string; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - Py_ssize_t overlapped = FALSE; - PyObject* concurrent = Py_None; - PyObject* partial = Py_False; - static char* kwlist[] = { "string", "pos", "endpos", "overlapped", - "concurrent", "partial", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOnOO:scanner", kwlist, - &string, &pos, &endpos, &overlapped, &concurrent, &partial)) - return NULL; - - start = as_string_index(pos, 0); - if (start == -1 && PyErr_Occurred()) - return NULL; - - end = as_string_index(endpos, PY_SSIZE_T_MAX); - if (end == -1 && PyErr_Occurred()) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - part = decode_partial(partial); - - /* Create a scanner object. */ - self = PyObject_NEW(ScannerObject, &Scanner_Type); - if (!self) - return NULL; - - self->pattern = pattern; - Py_INCREF(self->pattern); - self->status = RE_ERROR_INITIALISING; - - /* The MatchObject, and therefore repeated captures, will be visible. */ - if (!state_init(&self->state, pattern, string, start, end, overlapped != 0, - conc, part, TRUE, TRUE, FALSE)) { - Py_DECREF(self); - return NULL; - } - - self->status = RE_ERROR_SUCCESS; - - return (PyObject*) self; -} - -/* Performs the split for the SplitterObject. */ -Py_LOCAL_INLINE(PyObject*) next_split_part(SplitterObject* self) { - RE_State* state; - RE_SafeState safe_state; - PyObject* result = NULL; /* Initialise to stop compiler warning. */ - - state = &self->state; - - /* Initialise the "safe state" structure. */ - safe_state.re_state = state; - safe_state.thread_state = NULL; - - /* Acquire the state lock in case we're sharing the splitter object across - * threads. - */ - acquire_state_lock((PyObject*)self, &safe_state); - - if (self->status == RE_ERROR_FAILURE || self->status == RE_ERROR_PARTIAL) { - /* Finished. */ - release_state_lock((PyObject*)self, &safe_state); - result = Py_False; - Py_INCREF(result); - return result; - } else if (self->status < 0) { - /* Internal error. */ - release_state_lock((PyObject*)self, &safe_state); - set_error(self->status, NULL); - return NULL; - } - - if (self->index == 0) { - if (self->split_count < self->maxsplit) { - Py_ssize_t step; - Py_ssize_t end_pos; - - if (state->reverse) { - step = -1; - end_pos = state->slice_start; - } else { - step = 1; - end_pos = state->slice_end; - } - -retry: - self->status = do_match(&safe_state, TRUE); - if (self->status < 0) - goto error; - - if (self->status == RE_ERROR_SUCCESS) { - if (state->version_0) { - /* Version 0 behaviour is to advance one character if the - * split was zero-width. Unfortunately, this can give an - * incorrect result. GvR wants this behaviour to be - * retained so as not to break any existing software which - * might rely on it. - */ - if (state->text_pos == state->match_pos) { - if (self->last_pos == end_pos) - goto no_match; - - /* Advance one character. */ - state->text_pos += step; - state->must_advance = FALSE; - goto retry; - } - } - - ++self->split_count; - - /* Get segment before this match. */ - if (state->reverse) - result = get_slice(state->string, state->match_pos, - self->last_pos); - else - result = get_slice(state->string, self->last_pos, - state->match_pos); - if (!result) - goto error; - - self->last_pos = state->text_pos; - - /* Version 0 behaviour is to advance one character if the match - * was zero-width. Unfortunately, this can give an incorrect - * result. GvR wants this behaviour to be retained so as not to - * break any existing software which might rely on it. - */ - if (state->version_0) { - if (state->text_pos == state->match_pos) - /* Advance one character. */ - state->text_pos += step; - - state->must_advance = FALSE; - } else - /* Continue from where we left off, but don't allow a - * contiguous zero-width match. - */ - state->must_advance = TRUE; - } - } else - goto no_match; - - if (self->status == RE_ERROR_FAILURE || self->status == - RE_ERROR_PARTIAL) { -no_match: - /* Get segment following last match (even if empty). */ - if (state->reverse) - result = get_slice(state->string, 0, self->last_pos); - else - result = get_slice(state->string, self->last_pos, - state->text_length); - if (!result) - goto error; - } - } else { - /* Add group. */ - result = state_get_group(state, self->index, state->string, FALSE); - if (!result) - goto error; - } - - ++self->index; - if ((size_t)self->index > state->pattern->public_group_count) - self->index = 0; - - /* Release the state lock. */ - release_state_lock((PyObject*)self, &safe_state); - - return result; - -error: - /* Release the state lock. */ - release_state_lock((PyObject*)self, &safe_state); - - return NULL; -} - -/* SplitterObject's 'split' method. */ -static PyObject* splitter_split(SplitterObject* self, PyObject* unused) { - PyObject* result; - - result = next_split_part(self); - - if (result == Py_False) { - /* The sentinel. */ - Py_DECREF(Py_False); - Py_INCREF(Py_None); - return Py_None; - } - - return result; -} - -/* SplitterObject's 'next' method. */ -static PyObject* splitter_next(PyObject* self) { - PyObject* result; - - result = next_split_part((SplitterObject*)self); - - if (result == Py_False) { - /* No match. */ - Py_DECREF(Py_False); - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - return result; -} - -/* Returns an iterator for a SplitterObject. - * - * The iterator is actually the SplitterObject itself. - */ -static PyObject* splitter_iter(PyObject* self) { - Py_INCREF(self); - return self; -} - -/* Gets the next result from a splitter iterator. */ -static PyObject* splitter_iternext(PyObject* self) { - PyObject* result; - - result = next_split_part((SplitterObject*)self); - - if (result == Py_False) { - /* No match. */ - Py_DECREF(result); - return NULL; - } - - return result; -} - -/* Makes a copy of a SplitterObject. - * - * It actually doesn't make a copy, just returns the original object. - */ -Py_LOCAL_INLINE(PyObject*) make_splitter_copy(SplitterObject* self) { - Py_INCREF(self); - return (PyObject*)self; -} - -/* SplitterObject's '__copy__' method. */ -static PyObject* splitter_copy(SplitterObject* self, PyObject* unused) { - return make_splitter_copy(self); -} - -/* SplitterObject's '__deepcopy__' method. */ -static PyObject* splitter_deepcopy(SplitterObject* self, PyObject* memo) { - return make_splitter_copy(self); -} - -/* The documentation of a SplitterObject. */ -PyDoc_STRVAR(splitter_split_doc, - "split() --> string or None.\n\ - Return the next part of the split string."); - -/* SplitterObject's methods. */ -static PyMethodDef splitter_methods[] = { - {"next", (PyCFunction)splitter_next, METH_NOARGS}, - {"split", (PyCFunction)splitter_split, METH_NOARGS, splitter_split_doc}, - {"__copy__", (PyCFunction)splitter_copy, METH_NOARGS}, - {"__deepcopy__", (PyCFunction)splitter_deepcopy, METH_O}, - {NULL, NULL} -}; - -PyDoc_STRVAR(splitter_doc, "Splitter object"); - -/* Deallocates a SplitterObject. */ -static void splitter_dealloc(PyObject* self_) { - SplitterObject* self; - - self = (SplitterObject*)self_; - - if (self->status != RE_ERROR_INITIALISING) - state_fini(&self->state); - Py_DECREF(self->pattern); - PyObject_DEL(self); -} -#if PY_VERSION_HEX >= 0x02060000 - -/* Converts a captures index to an integer. - * - * A negative capture index in 'expandf' and 'subf' is passed as a string - * because negative indexes are not supported by 'str.format'. - */ -Py_LOCAL_INLINE(Py_ssize_t) index_to_integer(PyObject* item) { - Py_ssize_t value; - - value = PyInt_AsSsize_t(item); - if (!(value == -1 && PyErr_Occurred())) - return value; - - PyErr_Clear(); - - value = PyLong_AsLong(item); - if (!(value == -1 && PyErr_Occurred())) - return value; - - PyErr_Clear(); - - /* Is the index a string representation of an integer? */ - if (PyUnicode_Check(item)) { - PyObject* int_obj; - Py_UNICODE* characters; - Py_ssize_t length; - - characters = (Py_UNICODE*)PyUnicode_AS_DATA(item); - length = PyUnicode_GET_SIZE(item); - int_obj = PyLong_FromUnicode(characters, length, 0); - if (!int_obj) - goto error; - - value = PyLong_AsLong(int_obj); - Py_DECREF(int_obj); - if (!PyErr_Occurred()) - return value; - } else if (PyString_Check(item)) { - char* characters; - PyObject* int_obj; - - characters = PyString_AsString(item); - int_obj = PyLong_FromString(characters, NULL, 0); - if (!int_obj) - goto error; - - value = PyLong_AsLong(int_obj); - Py_DECREF(int_obj); - if (!PyErr_Occurred()) - return value; - } - -error: - PyErr_Clear(); - PyErr_Format(PyExc_TypeError, "list indices must be integers, not %.200s", - item->ob_type->tp_name); - - return -1; -} - -/* CaptureObject's length method. */ -Py_LOCAL_INLINE(Py_ssize_t) capture_length(CaptureObject* self) { - MatchObject* match; - RE_GroupData* group; - - if (self->group_index == 0) - return 1; - - match = *self->match_indirect; - group = &match->groups[self->group_index - 1]; - - return (Py_ssize_t)group->capture_count; -} - -/* CaptureObject's '__getitem__' method. */ -static PyObject* capture_getitem(CaptureObject* self, PyObject* item) { - Py_ssize_t index; - MatchObject* match; - Py_ssize_t start; - Py_ssize_t end; - - index = index_to_integer(item); - if (index == -1 && PyErr_Occurred()) - return NULL; - - match = *self->match_indirect; - - if (self->group_index == 0) { - if (index < 0) - index += 1; - - if (index != 0) { - PyErr_SetString(PyExc_IndexError, "list index out of range"); - return NULL; - } - - start = match->match_start; - end = match->match_end; - } else { - RE_GroupData* group; - RE_GroupSpan* span; - - group = &match->groups[self->group_index - 1]; - - if (index < 0) - index += group->capture_count; - - if (index < 0 || index >= (Py_ssize_t)group->capture_count) { - PyErr_SetString(PyExc_IndexError, "list index out of range"); - return NULL; - } - - span = &group->captures[index]; - - start = span->start; - end = span->end; - } - - return get_slice(match->substring, start - match->substring_offset, end - - match->substring_offset); -} - -static PyMappingMethods capture_as_mapping = { - (lenfunc)capture_length, /* mp_length */ - (binaryfunc)capture_getitem, /* mp_subscript */ - 0, /* mp_ass_subscript */ -}; - -/* CaptureObject's methods. */ -static PyMethodDef capture_methods[] = { - {"__getitem__", (PyCFunction)capture_getitem, METH_O|METH_COEXIST}, - {NULL, NULL} -}; - -/* Deallocates a CaptureObject. */ -static void capture_dealloc(PyObject* self_) { - CaptureObject* self; - - self = (CaptureObject*)self_; - PyObject_DEL(self); -} - -/* CaptureObject's 'str' method. */ -static PyObject* capture_str(PyObject* self_) { - CaptureObject* self; - MatchObject* match; - - self = (CaptureObject*)self_; - match = *self->match_indirect; - - return match_get_group_by_index(match, self->group_index, Py_None); -} -#endif - -static PyMemberDef splitter_members[] = { - {"pattern", T_OBJECT, offsetof(SplitterObject, pattern), READONLY, - "The regex object that produced this splitter object."}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject Splitter_Type = { - PyObject_HEAD_INIT(NULL) - 0, - "_" RE_MODULE "." "Splitter", - sizeof(SplitterObject) -}; - -/* Creates a new SplitterObject. */ -Py_LOCAL_INLINE(PyObject*) pattern_splitter(PatternObject* pattern, PyObject* - args, PyObject* kwargs) { - /* Create split state object. */ - int conc; - SplitterObject* self; - - PyObject* string; - Py_ssize_t maxsplit = 0; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "string", "maxsplit", "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nO:splitter", kwlist, - &string, &maxsplit, &concurrent)) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - /* Create a splitter object. */ - self = PyObject_NEW(SplitterObject, &Splitter_Type); - if (!self) - return NULL; - - self->pattern = pattern; - Py_INCREF(self->pattern); - self->status = RE_ERROR_INITIALISING; - - if (maxsplit == 0) - maxsplit = PY_SSIZE_T_MAX; - - /* The MatchObject, and therefore repeated captures, will not be visible. - */ - if (!state_init(&self->state, pattern, string, 0, PY_SSIZE_T_MAX, FALSE, - conc, FALSE, TRUE, FALSE, FALSE)) { - Py_DECREF(self); - return NULL; - } - - self->maxsplit = maxsplit; - self->last_pos = self->state.reverse ? self->state.text_length : 0; - self->split_count = 0; - self->index = 0; - self->status = RE_ERROR_SUCCESS; - - return (PyObject*) self; -} - -/* Implements the functionality of PatternObject's search and match methods. */ -Py_LOCAL_INLINE(PyObject*) pattern_search_or_match(PatternObject* self, - PyObject* args, PyObject* kwargs, char* args_desc, BOOL search, BOOL - match_all) { - Py_ssize_t start; - Py_ssize_t end; - int conc; - BOOL part; - RE_State state; - RE_SafeState safe_state; - int status; - PyObject* match; - - PyObject* string; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - PyObject* concurrent = Py_None; - PyObject* partial = Py_False; - static char* kwlist[] = { "string", "pos", "endpos", "concurrent", - "partial", NULL }; - /* When working with a short string, such as a line from a file, the - * relative cost of PyArg_ParseTupleAndKeywords can be significant, and - * it's worth not using it when there are only positional arguments. - */ - Py_ssize_t arg_count; - if (args && !kwargs && PyTuple_CheckExact(args)) - arg_count = PyTuple_GET_SIZE(args); - else - arg_count = -1; - - if (1 <= arg_count && arg_count <= 5) { - /* PyTuple_GET_ITEM borrows the reference. */ - string = PyTuple_GET_ITEM(args, 0); - if (arg_count >= 2) - pos = PyTuple_GET_ITEM(args, 1); - if (arg_count >= 3) - endpos = PyTuple_GET_ITEM(args, 2); - if (arg_count >= 4) - concurrent = PyTuple_GET_ITEM(args, 3); - if (arg_count >= 5) - partial = PyTuple_GET_ITEM(args, 4); - } else if (!PyArg_ParseTupleAndKeywords(args, kwargs, args_desc, kwlist, - &string, &pos, &endpos, &concurrent, &partial)) - return NULL; - - start = as_string_index(pos, 0); - if (start == -1 && PyErr_Occurred()) - return NULL; - - end = as_string_index(endpos, PY_SSIZE_T_MAX); - if (end == -1 && PyErr_Occurred()) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - part = decode_partial(partial); - - /* The MatchObject, and therefore repeated captures, will be visible. */ - if (!state_init(&state, self, string, start, end, FALSE, conc, part, FALSE, - TRUE, match_all)) - return NULL; - - /* Initialise the "safe state" structure. */ - safe_state.re_state = &state; - safe_state.thread_state = NULL; - - status = do_match(&safe_state, search); - - if (status >= 0 || status == RE_ERROR_PARTIAL) - /* Create the match object. */ - match = pattern_new_match(self, &state, status); - else - match = NULL; - - state_fini(&state); - - return match; -} - -/* PatternObject's 'match' method. */ -static PyObject* pattern_match(PatternObject* self, PyObject* args, PyObject* - kwargs) { - return pattern_search_or_match(self, args, kwargs, "O|OOOO:match", FALSE, - FALSE); -} - -/* PatternObject's 'fullmatch' method. */ -static PyObject* pattern_fullmatch(PatternObject* self, PyObject* args, - PyObject* kwargs) { - return pattern_search_or_match(self, args, kwargs, "O|OOOO:fullmatch", - FALSE, TRUE); -} - -/* PatternObject's 'search' method. */ -static PyObject* pattern_search(PatternObject* self, PyObject* args, PyObject* - kwargs) { - return pattern_search_or_match(self, args, kwargs, "O|OOOO:search", TRUE, - FALSE); -} - -/* Gets the limits of the matching. */ -Py_LOCAL_INLINE(BOOL) get_limits(PyObject* pos, PyObject* endpos, Py_ssize_t - length, Py_ssize_t* start, Py_ssize_t* end) { - Py_ssize_t s; - Py_ssize_t e; - - s = as_string_index(pos, 0); - if (s == -1 && PyErr_Occurred()) - return FALSE; - - e = as_string_index(endpos, PY_SSIZE_T_MAX); - if (e == -1 && PyErr_Occurred()) - return FALSE; - - /* Adjust boundaries. */ - if (s < 0) - s += length; - if (s < 0) - s = 0; - else if (s > length) - s = length; - - if (e < 0) - e += length; - if (e < 0) - e = 0; - else if (e > length) - e = length; - - *start = s; - *end = e; - - return TRUE; -} - -/* Gets a replacement item from the replacement list. - * - * The replacement item could be a string literal or a group. - * - * It can return None to represent an empty string. - */ -Py_LOCAL_INLINE(PyObject*) get_sub_replacement(PyObject* item, PyObject* - string, RE_State* state, size_t group_count) { - Py_ssize_t index; - - if (PyUnicode_CheckExact(item) || PyString_CheckExact(item)) { - /* It's a literal, which can be added directly to the list. */ - Py_INCREF(item); - return item; - } - - /* Is it a group reference? */ - index = as_group_index(item); - if (index == -1 && PyErr_Occurred()) { - /* Not a group either! */ - set_error(RE_ERROR_REPLACEMENT, NULL); - return NULL; - } - - if (index == 0) { - /* The entire matched portion of the string. */ - if (state->match_pos == state->text_pos) { - /* Return None for "". */ - Py_INCREF(Py_None); - return Py_None; - } - - if (state->reverse) - return get_slice(string, state->text_pos, state->match_pos); - else - return get_slice(string, state->match_pos, state->text_pos); - } else if (1 <= index && (size_t)index <= group_count) { - /* A group. */ - RE_GroupData* group; - - group = &state->groups[index - 1]; - - if (group->capture_count == 0 && group->span.start != group->span.end) - { - /* The group didn't match or is "", so return None for "". */ - Py_INCREF(Py_None); - return Py_None; - } - - return get_slice(string, group->span.start, group->span.end); - } else { - /* No such group. */ - set_error(RE_ERROR_INVALID_GROUP_REF, NULL); - return NULL; - } -} - -/* PatternObject's 'subx' method. */ -Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* - str_template, PyObject* string, Py_ssize_t maxsub, int sub_type, PyObject* - pos, PyObject* endpos, int concurrent) { - RE_StringInfo str_info; - Py_ssize_t start; - Py_ssize_t end; - BOOL is_callable = FALSE; - PyObject* replacement = NULL; - BOOL is_literal = FALSE; -#if PY_VERSION_HEX >= 0x02060000 - BOOL is_format = FALSE; -#endif - BOOL is_template = FALSE; - RE_State state; - RE_SafeState safe_state; - RE_JoinInfo join_info; - Py_ssize_t sub_count; - Py_ssize_t last_pos; - Py_ssize_t step; - PyObject* item; - MatchObject* match; -#if PY_VERSION_HEX >= 0x02060000 - BOOL built_capture = FALSE; -#endif - PyObject* args; - PyObject* kwargs; - Py_ssize_t end_pos; - - /* Get the string. */ - if (!get_string(string, &str_info)) - return NULL; - - /* Get the limits of the search. */ - if (!get_limits(pos, endpos, str_info.length, &start, &end)) { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return NULL; - } - - /* If the pattern is too long for the string, then take a shortcut, unless - * it's a fuzzy pattern. - */ - if (!self->is_fuzzy && self->min_width > end - start) { - PyObject* result; - - Py_INCREF(string); - - if (sub_type & RE_SUBN) - result = Py_BuildValue("Nn", string, 0); - else - result = string; - -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return result; - } - - if (maxsub == 0) - maxsub = PY_SSIZE_T_MAX; - - /* sub/subn takes either a function or a string template. */ - if (PyCallable_Check(str_template)) { - /* It's callable. */ - is_callable = TRUE; - - replacement = str_template; - Py_INCREF(replacement); -#if PY_VERSION_HEX >= 0x02060000 - } else if (sub_type & RE_SUBF) { - /* Is it a literal format? - * - * To keep it simple we'll say that a literal is a string which can be - * used as-is, so no placeholders. - */ - Py_ssize_t literal_length; - - literal_length = check_replacement_string(str_template, '{'); - if (literal_length > 0) { - /* It's a literal. */ - is_literal = TRUE; - - replacement = str_template; - Py_INCREF(replacement); - } else if (literal_length < 0) { - /* It isn't a literal, so get the 'format' method. */ - is_format = TRUE; - - replacement = PyObject_GetAttrString(str_template, "format"); - if (!replacement) { - release_buffer(&str_info); - return NULL; - } - } -#endif - } else { - /* Is it a literal template? - * - * To keep it simple we'll say that a literal is a string which can be - * used as-is, so no backslashes. - */ - Py_ssize_t literal_length; - - literal_length = check_replacement_string(str_template, '\\'); - if (literal_length > 0) { - /* It's a literal. */ - is_literal = TRUE; - - replacement = str_template; - Py_INCREF(replacement); - } else if (literal_length < 0 ) { - /* It isn't a literal, so hand it over to the template compiler. */ - is_template = TRUE; - - replacement = call(RE_MODULE, "_compile_replacement_helper", - PyTuple_Pack(2, self, str_template)); - if (!replacement) { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return NULL; - } - } - } - - /* The MatchObject, and therefore repeated captures, will be visible only - * if the replacement is callable or subf is used. - */ -#if PY_VERSION_HEX >= 0x02060000 - if (!state_init_2(&state, self, string, &str_info, start, end, FALSE, - concurrent, FALSE, FALSE, is_callable || (sub_type & RE_SUBF) != 0, - FALSE)) { - release_buffer(&str_info); - -#else - if (!state_init_2(&state, self, string, &str_info, start, end, FALSE, - concurrent, FALSE, FALSE, is_callable, FALSE)) { -#endif - Py_XDECREF(replacement); - return NULL; - } - - /* Initialise the "safe state" structure. */ - safe_state.re_state = &state; - safe_state.thread_state = NULL; - - init_join_list(&join_info, state.reverse, PyUnicode_Check(string)); - - sub_count = 0; - last_pos = state.reverse ? state.text_length : 0; - step = state.reverse ? -1 : 1; - while (sub_count < maxsub) { - int status; - - status = do_match(&safe_state, TRUE); - if (status < 0) - goto error; - - if (status == 0) - break; - - /* Append the segment before this match. */ - if (state.match_pos != last_pos) { - if (state.reverse) - item = get_slice(string, state.match_pos, last_pos); - else - item = get_slice(string, last_pos, state.match_pos); - if (!item) - goto error; - - /* Add to the list. */ - status = add_to_join_list(&join_info, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - /* Add this match. */ - if (is_literal) { - /* The replacement is a literal string. */ - status = add_to_join_list(&join_info, replacement); - if (status < 0) - goto error; -#if PY_VERSION_HEX >= 0x02060000 - } else if (is_format) { - /* The replacement is a format string. */ - size_t g; - - /* We need to create the arguments for the 'format' method. We'll - * start by creating a MatchObject. - */ - match = (MatchObject*)pattern_new_match(self, &state, 1); - if (!match) - goto error; - - /* We'll build the args and kwargs the first time. They'll be using - * capture objects which refer to the match object indirectly; this - * means that args and kwargs can be reused with different match - * objects. - */ - if (!built_capture) { - /* The args are a tuple of the capture group matches. */ - args = PyTuple_New(match->group_count + 1); - if (!args) { - Py_DECREF(match); - goto error; - } - - for (g = 0; g < match->group_count + 1; g++) - /* PyTuple_SetItem borrows the reference. */ - PyTuple_SetItem(args, (Py_ssize_t)g, - make_capture_object(&match, (Py_ssize_t)g)); - - /* The kwargs are a dict of the named capture group matches. */ - kwargs = make_capture_dict(match, &match); - if (!kwargs) { - Py_DECREF(args); - Py_DECREF(match); - goto error; - } - - built_capture = TRUE; - } - - /* Call the 'format' method. */ - item = PyObject_Call(replacement, args, kwargs); - - Py_DECREF(match); - if (!item) - goto error; - - /* Add the result to the list. */ - status = add_to_join_list(&join_info, item); - Py_DECREF(item); - if (status < 0) - goto error; -#endif - } else if (is_template) { - /* The replacement is a list template. */ - Py_ssize_t count; - Py_ssize_t index; - Py_ssize_t step; - - /* Add each part of the template to the list. */ - count = PyList_GET_SIZE(replacement); - if (join_info.reversed) { - /* We're searching backwards, so we'll be reversing the list - * when it's complete. Therefore, we need to add the items of - * the template in reverse order for them to be in the correct - * order after the reversal. - */ - index = count - 1; - step = -1; - } else { - /* We're searching forwards. */ - index = 0; - step = 1; - } - - while (count > 0) { - PyObject* item; - PyObject* str_item; - - /* PyList_GET_ITEM borrows a reference. */ - item = PyList_GET_ITEM(replacement, index); - str_item = get_sub_replacement(item, string, &state, - self->public_group_count); - if (!str_item) - goto error; - - /* Add the result to the list. */ - if (str_item == Py_None) - /* None for "". */ - Py_DECREF(str_item); - else { - status = add_to_join_list(&join_info, str_item); - Py_DECREF(str_item); - if (status < 0) - goto error; - } - - --count; - index += step; - } - } else if (is_callable) { - /* Pass a MatchObject to the replacement function. */ - PyObject* match; - PyObject* args; - - /* We need to create a MatchObject to pass to the replacement - * function. - */ - match = pattern_new_match(self, &state, 1); - if (!match) - goto error; - - /* The args for the replacement function. */ - args = PyTuple_Pack(1, match); - if (!args) { - Py_DECREF(match); - goto error; - } - - /* Call the replacement function. */ - item = PyObject_CallObject(replacement, args); - Py_DECREF(args); - Py_DECREF(match); - if (!item) - goto error; - - /* Add the result to the list. */ - status = add_to_join_list(&join_info, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - ++sub_count; - - last_pos = state.text_pos; - - if (state.version_0) { - /* Always advance after a zero-width match. */ - if (state.match_pos == state.text_pos) { - state.text_pos += step; - state.must_advance = FALSE; - } else - state.must_advance = TRUE; - } else - /* Continue from where we left off, but don't allow a contiguous - * zero-width match. - */ - state.must_advance = state.match_pos == state.text_pos; - } - - /* Get the segment following the last match. We use 'length' instead of - * 'text_length' because the latter is truncated to 'slice_end', a - * documented idiosyncracy of the 're' module. - */ - end_pos = state.reverse ? 0 : str_info.length; - if (last_pos != end_pos) { - int status; - - /* The segment is part of the original string. */ - if (state.reverse) - item = get_slice(string, 0, last_pos); - else - item = get_slice(string, last_pos, str_info.length); - if (!item) - goto error; - - status = add_to_join_list(&join_info, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - Py_XDECREF(replacement); - - /* Convert the list to a single string (also cleans up join_info). */ - item = join_list_info(&join_info); - - state_fini(&state); - -#if PY_VERSION_HEX >= 0x02060000 - if (built_capture) { - Py_DECREF(kwargs); - Py_DECREF(args); - } - -#endif - if (!item) - return NULL; - - if (sub_type & RE_SUBN) - return Py_BuildValue("Nn", item, sub_count); - - return item; - -error: -#if PY_VERSION_HEX >= 0x02060000 - if (built_capture) { - Py_DECREF(kwargs); - Py_DECREF(args); - } - -#endif - clear_join_list(&join_info); - state_fini(&state); - Py_XDECREF(replacement); - return NULL; -} - -/* PatternObject's 'sub' method. */ -static PyObject* pattern_sub(PatternObject* self, PyObject* args, PyObject* - kwargs) { - int conc; - - PyObject* replacement; - PyObject* string; - Py_ssize_t count = 0; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "repl", "string", "count", "pos", "endpos", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:sub", kwlist, - &replacement, &string, &count, &pos, &endpos, &concurrent)) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - return pattern_subx(self, replacement, string, count, RE_SUB, pos, endpos, - conc); -} - -#if PY_VERSION_HEX >= 0x02060000 -/* PatternObject's 'subf' method. */ -static PyObject* pattern_subf(PatternObject* self, PyObject* args, PyObject* - kwargs) { - int conc; - - PyObject* format; - PyObject* string; - Py_ssize_t count = 0; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "format", "string", "count", "pos", "endpos", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:sub", kwlist, - &format, &string, &count, &pos, &endpos, &concurrent)) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - return pattern_subx(self, format, string, count, RE_SUBF, pos, endpos, - conc); -} - -#endif -/* PatternObject's 'subn' method. */ -static PyObject* pattern_subn(PatternObject* self, PyObject* args, PyObject* - kwargs) { - int conc; - - PyObject* replacement; - PyObject* string; - Py_ssize_t count = 0; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "repl", "string", "count", "pos", "endpos", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:subn", kwlist, - &replacement, &string, &count, &pos, &endpos, &concurrent)) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - return pattern_subx(self, replacement, string, count, RE_SUBN, pos, endpos, - conc); -} - -#if PY_VERSION_HEX >= 0x02060000 -/* PatternObject's 'subfn' method. */ -static PyObject* pattern_subfn(PatternObject* self, PyObject* args, PyObject* - kwargs) { - int conc; - - PyObject* format; - PyObject* string; - Py_ssize_t count = 0; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "format", "string", "count", "pos", "endpos", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:subn", kwlist, - &format, &string, &count, &pos, &endpos, &concurrent)) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - return pattern_subx(self, format, string, count, RE_SUBF | RE_SUBN, pos, - endpos, conc); -} - -#endif -/* PatternObject's 'split' method. */ -static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* - kwargs) { - int conc; - - RE_State state; - RE_SafeState safe_state; - PyObject* list; - PyObject* item; - int status; - Py_ssize_t split_count; - size_t g; - Py_ssize_t start_pos; - Py_ssize_t end_pos; - Py_ssize_t step; - Py_ssize_t last_pos; - - PyObject* string; - Py_ssize_t maxsplit = 0; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "string", "maxsplit", "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nO:split", kwlist, - &string, &maxsplit, &concurrent)) - return NULL; - - if (maxsplit == 0) - maxsplit = PY_SSIZE_T_MAX; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - /* The MatchObject, and therefore repeated captures, will not be visible. - */ - if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX, FALSE, conc, - FALSE, FALSE, FALSE, FALSE)) - return NULL; - - /* Initialise the "safe state" structure. */ - safe_state.re_state = &state; - safe_state.thread_state = NULL; - - list = PyList_New(0); - if (!list) { - state_fini(&state); - return NULL; - } - - split_count = 0; - if (state.reverse) { - start_pos = state.text_length; - end_pos = 0; - step = -1; - } else { - start_pos = 0; - end_pos = state.text_length; - step = 1; - } - - last_pos = start_pos; - while (split_count < maxsplit) { - status = do_match(&safe_state, TRUE); - if (status < 0) - goto error; - - if (status == 0) - /* No more matches. */ - break; - - if (state.version_0) { - /* Version 0 behaviour is to advance one character if the split was - * zero-width. Unfortunately, this can give an incorrect result. - * GvR wants this behaviour to be retained so as not to break any - * existing software which might rely on it. - */ - if (state.text_pos == state.match_pos) { - if (last_pos == end_pos) - break; - - /* Advance one character. */ - state.text_pos += step; - state.must_advance = FALSE; - continue; - } - } - - /* Get segment before this match. */ - if (state.reverse) - item = get_slice(string, state.match_pos, last_pos); - else - item = get_slice(string, last_pos, state.match_pos); - if (!item) - goto error; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - /* Add groups (if any). */ - for (g = 1; g <= self->public_group_count; g++) { - item = state_get_group(&state, (Py_ssize_t)g, string, FALSE); - if (!item) - goto error; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - ++split_count; - last_pos = state.text_pos; - - /* Version 0 behaviour is to advance one character if the match was - * zero-width. Unfortunately, this can give an incorrect result. GvR - * wants this behaviour to be retained so as not to break any existing - * software which might rely on it. - */ - if (state.version_0) { - if (state.text_pos == state.match_pos) - /* Advance one character. */ - state.text_pos += step; - - state.must_advance = FALSE; - } else - /* Continue from where we left off, but don't allow a contiguous - * zero-width match. - */ - state.must_advance = TRUE; - } - - /* Get segment following last match (even if empty). */ - if (state.reverse) - item = get_slice(string, 0, last_pos); - else - item = get_slice(string, last_pos, state.text_length); - if (!item) - goto error; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - state_fini(&state); - - return list; - -error: - Py_DECREF(list); - state_fini(&state); - return NULL; -} - -/* PatternObject's 'splititer' method. */ -static PyObject* pattern_splititer(PatternObject* pattern, PyObject* args, - PyObject* kwargs) { - return pattern_splitter(pattern, args, kwargs); -} - -/* PatternObject's 'findall' method. */ -static PyObject* pattern_findall(PatternObject* self, PyObject* args, PyObject* - kwargs) { - Py_ssize_t start; - Py_ssize_t end; - int conc; - RE_State state; - RE_SafeState safe_state; - PyObject* list; - Py_ssize_t step; - int status; - Py_ssize_t b; - Py_ssize_t e; - size_t g; - - PyObject* string; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - Py_ssize_t overlapped = FALSE; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "string", "pos", "endpos", "overlapped", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOnO:findall", kwlist, - &string, &pos, &endpos, &overlapped, &concurrent)) - return NULL; - - start = as_string_index(pos, 0); - if (start == -1 && PyErr_Occurred()) - return NULL; - - end = as_string_index(endpos, PY_SSIZE_T_MAX); - if (end == -1 && PyErr_Occurred()) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - /* The MatchObject, and therefore repeated captures, will not be visible. - */ - if (!state_init(&state, self, string, start, end, overlapped != 0, conc, - FALSE, FALSE, FALSE, FALSE)) - return NULL; - - /* Initialise the "safe state" structure. */ - safe_state.re_state = &state; - safe_state.thread_state = NULL; - - list = PyList_New(0); - if (!list) { - state_fini(&state); - return NULL; - } - - step = state.reverse ? -1 : 1; - while (state.slice_start <= state.text_pos && state.text_pos <= - state.slice_end) { - PyObject* item; - - status = do_match(&safe_state, TRUE); - if (status < 0) - goto error; - - if (status == 0) - break; - - /* Don't bother to build a MatchObject. */ - switch (self->public_group_count) { - case 0: - if (state.reverse) { - b = state.text_pos; - e = state.match_pos; - } else { - b = state.match_pos; - e = state.text_pos; - } - item = get_slice(string, b, e); - if (!item) - goto error; - break; - case 1: - item = state_get_group(&state, 1, string, TRUE); - if (!item) - goto error; - break; - default: - item = PyTuple_New((Py_ssize_t)self->public_group_count); - if (!item) - goto error; - - for (g = 0; g < self->public_group_count; g++) { - PyObject* o; - - o = state_get_group(&state, (Py_ssize_t)g + 1, string, TRUE); - if (!o) { - Py_DECREF(item); - goto error; - } - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(item, g, o); - } - break; - } - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - if (state.overlapped) { - /* Advance one character. */ - state.text_pos = state.match_pos + step; - state.must_advance = FALSE; - } else - /* Continue from where we left off, but don't allow 2 contiguous - * zero-width matches. - */ - state.must_advance = state.text_pos == state.match_pos; - } - - state_fini(&state); - - return list; - -error: - Py_DECREF(list); - state_fini(&state); - return NULL; -} - -/* PatternObject's 'finditer' method. */ -static PyObject* pattern_finditer(PatternObject* pattern, PyObject* args, - PyObject* kwargs) { - return pattern_scanner(pattern, args, kwargs); -} - -/* Makes a copy of a PatternObject. - * - * It actually doesn't make a copy, just returns the original object. - */ -Py_LOCAL_INLINE(PyObject*) make_pattern_copy(PatternObject* self) { - Py_INCREF(self); - return (PyObject*)self; -} - -/* PatternObject's '__copy__' method. */ -static PyObject* pattern_copy(PatternObject* self, PyObject* unused) { - return make_pattern_copy(self); -} - -/* PatternObject's '__deepcopy__' method. */ -static PyObject* pattern_deepcopy(PatternObject* self, PyObject* memo) { - return make_pattern_copy(self); -} - -/* The documentation of a PatternObject. */ -PyDoc_STRVAR(pattern_match_doc, - "match(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ - Match zero or more characters at the beginning of the string."); - -PyDoc_STRVAR(pattern_fullmatch_doc, - "fullmatch(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ - Match zero or more characters against all of the string."); - -PyDoc_STRVAR(pattern_search_doc, - "search(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ - Search through string looking for a match, and return a corresponding\n\ - match object instance. Return None if no match is found."); - -PyDoc_STRVAR(pattern_sub_doc, - "sub(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> newstring\n\ - Return the string obtained by replacing the leftmost (or rightmost with a\n\ - reverse pattern) non-overlapping occurrences of pattern in string by the\n\ - replacement repl."); - -#if PY_VERSION_HEX >= 0x02060000 -PyDoc_STRVAR(pattern_subf_doc, - "subf(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> newstring\n\ - Return the string obtained by replacing the leftmost (or rightmost with a\n\ - reverse pattern) non-overlapping occurrences of pattern in string by the\n\ - replacement format."); - -#endif -PyDoc_STRVAR(pattern_subn_doc, - "subn(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> (newstring, number of subs)\n\ - Return the tuple (new_string, number_of_subs_made) found by replacing the\n\ - leftmost (or rightmost with a reverse pattern) non-overlapping occurrences\n\ - of pattern with the replacement repl."); - -#if PY_VERSION_HEX >= 0x02060000 -PyDoc_STRVAR(pattern_subfn_doc, - "subfn(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> (newstring, number of subs)\n\ - Return the tuple (new_string, number_of_subs_made) found by replacing the\n\ - leftmost (or rightmost with a reverse pattern) non-overlapping occurrences\n\ - of pattern with the replacement format."); - -#endif -PyDoc_STRVAR(pattern_split_doc, - "split(string, string, maxsplit=0, concurrent=None) --> list.\n\ - Split string by the occurrences of pattern."); - -PyDoc_STRVAR(pattern_splititer_doc, - "splititer(string, maxsplit=0, concurrent=None) --> iterator.\n\ - Return an iterator yielding the parts of a split string."); - -PyDoc_STRVAR(pattern_findall_doc, - "findall(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> list.\n\ - Return a list of all matches of pattern in string. The matches may be\n\ - overlapped if overlapped is True."); - -PyDoc_STRVAR(pattern_finditer_doc, - "finditer(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> iterator.\n\ - Return an iterator over all matches for the RE pattern in string. The\n\ - matches may be overlapped if overlapped is True. For each match, the\n\ - iterator returns a MatchObject."); - -PyDoc_STRVAR(pattern_scanner_doc, - "scanner(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> scanner.\n\ - Return an scanner for the RE pattern in string. The matches may be overlapped\n\ - if overlapped is True."); - -/* The methods of a PatternObject. */ -static PyMethodDef pattern_methods[] = { - {"match", (PyCFunction)pattern_match, METH_VARARGS|METH_KEYWORDS, - pattern_match_doc}, - {"fullmatch", (PyCFunction)pattern_fullmatch, METH_VARARGS|METH_KEYWORDS, - pattern_fullmatch_doc}, - {"search", (PyCFunction)pattern_search, METH_VARARGS|METH_KEYWORDS, - pattern_search_doc}, - {"sub", (PyCFunction)pattern_sub, METH_VARARGS|METH_KEYWORDS, - pattern_sub_doc}, -#if PY_VERSION_HEX >= 0x02060000 - {"subf", (PyCFunction)pattern_subf, METH_VARARGS|METH_KEYWORDS, - pattern_subf_doc}, -#endif - {"subn", (PyCFunction)pattern_subn, METH_VARARGS|METH_KEYWORDS, - pattern_subn_doc}, -#if PY_VERSION_HEX >= 0x02060000 - {"subfn", (PyCFunction)pattern_subfn, METH_VARARGS|METH_KEYWORDS, - pattern_subfn_doc}, -#endif - {"split", (PyCFunction)pattern_split, METH_VARARGS|METH_KEYWORDS, - pattern_split_doc}, - {"splititer", (PyCFunction)pattern_splititer, METH_VARARGS|METH_KEYWORDS, - pattern_splititer_doc}, - {"findall", (PyCFunction)pattern_findall, METH_VARARGS|METH_KEYWORDS, - pattern_findall_doc}, - {"finditer", (PyCFunction)pattern_finditer, METH_VARARGS|METH_KEYWORDS, - pattern_finditer_doc}, - {"scanner", (PyCFunction)pattern_scanner, METH_VARARGS|METH_KEYWORDS, - pattern_scanner_doc}, - {"__copy__", (PyCFunction)pattern_copy, METH_NOARGS}, - {"__deepcopy__", (PyCFunction)pattern_deepcopy, METH_O}, - {NULL, NULL} -}; - -PyDoc_STRVAR(pattern_doc, "Compiled regex object"); - -/* Deallocates a PatternObject. */ -static void pattern_dealloc(PyObject* self_) { - PatternObject* self; - size_t i; - int partial_side; - - self = (PatternObject*)self_; - - /* Discard the nodes. */ - for (i = 0; i < self->node_count; i++) { - RE_Node* node; - - node = self->node_list[i]; - re_dealloc(node->values); - if (node->status & RE_STATUS_STRING) { - re_dealloc(node->string.bad_character_offset); - re_dealloc(node->string.good_suffix_offset); - } - re_dealloc(node); - } - re_dealloc(self->node_list); - - /* Discard the group info. */ - re_dealloc(self->group_info); - - /* Discard the call_ref info. */ - re_dealloc(self->call_ref_info); - - /* Discard the repeat info. */ - re_dealloc(self->repeat_info); - - dealloc_groups(self->groups_storage, self->true_group_count); - - dealloc_repeats(self->repeats_storage, self->repeat_count); - - if (self->weakreflist) - PyObject_ClearWeakRefs((PyObject*)self); - Py_XDECREF(self->pattern); - Py_XDECREF(self->groupindex); - Py_XDECREF(self->indexgroup); - - for (partial_side = 0; partial_side < 2; partial_side++) { - if (self->partial_named_lists[partial_side]) { - for (i = 0; i < self->named_lists_count; i++) - Py_XDECREF(self->partial_named_lists[partial_side][i]); - - re_dealloc(self->partial_named_lists[partial_side]); - } - } - - Py_DECREF(self->named_lists); - Py_DECREF(self->named_list_indexes); - Py_DECREF(self->required_chars); - re_dealloc(self->locale_info); - Py_DECREF(self->packed_code_list); - PyObject_DEL(self); -} - -/* Info about the various flags that can be passed in. */ -typedef struct RE_FlagName { - char* name; - int value; -} RE_FlagName; - -/* We won't bother about the A flag in Python 2. */ -static RE_FlagName flag_names[] = { - {"B", RE_FLAG_BESTMATCH}, - {"D", RE_FLAG_DEBUG}, - {"S", RE_FLAG_DOTALL}, - {"F", RE_FLAG_FULLCASE}, - {"I", RE_FLAG_IGNORECASE}, - {"L", RE_FLAG_LOCALE}, - {"M", RE_FLAG_MULTILINE}, - {"P", RE_FLAG_POSIX}, - {"R", RE_FLAG_REVERSE}, - {"T", RE_FLAG_TEMPLATE}, - {"U", RE_FLAG_UNICODE}, - {"X", RE_FLAG_VERBOSE}, - {"V0", RE_FLAG_VERSION0}, - {"V1", RE_FLAG_VERSION1}, - {"W", RE_FLAG_WORD}, -}; - -/* Appends a string to a list. */ -Py_LOCAL_INLINE(BOOL) append_string(PyObject* list, char* string) { - PyObject* item; - int status; - - item = Py_BuildValue("s", string); - if (!item) - return FALSE; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - return FALSE; - - return TRUE; -} - -/* Appends a (decimal) integer to a list. */ -Py_LOCAL_INLINE(BOOL) append_integer(PyObject* list, Py_ssize_t value) { - PyObject* int_obj; - PyObject* repr_obj; - int status; - - int_obj = Py_BuildValue("n", value); - if (!int_obj) - return FALSE; - - repr_obj = PyObject_Repr(int_obj); - Py_DECREF(int_obj); - if (!repr_obj) - return FALSE; - - status = PyList_Append(list, repr_obj); - Py_DECREF(repr_obj); - if (status < 0) - return FALSE; - - return TRUE; -} - -/* Packs the code list that's needed for pickling. */ -Py_LOCAL_INLINE(PyObject*) pack_code_list(RE_CODE* code, Py_ssize_t code_len) { - Py_ssize_t max_size; - RE_UINT8* packed; - Py_ssize_t count; - RE_UINT32 value; - Py_ssize_t i; - PyObject* packed_code_list; - - /* What is the maximum number of bytes needed to store it? - * - * A 32-bit RE_CODE might need 5 bytes ((32 + 6) / 7). - */ - max_size = code_len * 5 + ((sizeof(Py_ssize_t) * 8) + 6) / 7; - - packed = (RE_UINT8*)re_alloc((size_t)max_size); - count = 0; - - /* Store the length of the code list. */ - value = (RE_UINT32)code_len; - - while (value >= 0x80) { - packed[count++] = 0x80 | (value & 0x7F); - value >>= 7; - } - - packed[count++] = value; - - /* Store each of the elements of the code list. */ - for (i = 0; i < code_len; i++) { - value = (RE_UINT32)code[i]; - - while (value >= 0x80) { - packed[count++] = 0x80 | (value & 0x7F); - value >>= 7; - } - - packed[count++] = value; - } - - packed_code_list = PyString_FromStringAndSize((const char *)packed, count); - re_dealloc(packed); - - return packed_code_list; -} - -/* Unpacks the code list that's needed for pickling. */ -Py_LOCAL_INLINE(PyObject*) unpack_code_list(PyObject* packed) { - PyObject* code_list; - RE_UINT8* packed_data; - Py_ssize_t index; - RE_UINT32 value; - int shift; - size_t count; - - code_list = PyList_New(0); - if (!code_list) - return NULL; - - packed_data = (RE_UINT8*)PyString_AsString(packed); - index = 0; - - /* Unpack the length of the code list. */ - value = 0; - shift = 0; - - while (packed_data[index] >= 0x80) { - value |= (RE_UINT32)(packed_data[index++] & 0x7F) << shift; - shift += 7; - } - - value |= (RE_UINT32)packed_data[index++] << shift; - count = (size_t)value; - - /* Unpack each of the elements of the code list. */ - while (count > 0) { - PyObject* obj; - int status; - - value = 0; - shift = 0; - - while (packed_data[index] >= 0x80) { - value |= (RE_UINT32)(packed_data[index++] & 0x7F) << shift; - shift += 7; - } - - value |= (RE_UINT32)packed_data[index++] << shift; -#if PY_VERSION_HEX >= 0x02060000 - obj = PyLong_FromSize_t((size_t)value); -#else - obj = PyLong_FromUnsignedLongLong((size_t)value); -#endif - if (!obj) - goto error; - - status = PyList_Append(code_list, obj); - Py_DECREF(obj); - if (status == -1) - goto error; - - --count; - } - - return code_list; - -error: - Py_DECREF(code_list); - return NULL; -} - -/* MatchObject's '__repr__' method. */ -static PyObject* match_repr(PyObject* self_) { - MatchObject* self; - PyObject* list; - PyObject* matched_substring; - PyObject* matched_repr; - int status; - PyObject* separator; - PyObject* result; - - self = (MatchObject*)self_; - - list = PyList_New(0); - if (!list) - return NULL; - - if (!append_string(list, "match_start)) - goto error; - - if (! append_string(list, ", ")) - goto error; - - if (!append_integer(list, self->match_end)) - goto error; - - if (!append_string(list, "), match=")) - goto error; - - matched_substring = get_slice(self->substring, self->match_start - - self->substring_offset, self->match_end - self->substring_offset); - if (!matched_substring) - goto error; - - matched_repr = PyObject_Repr(matched_substring); - Py_DECREF(matched_substring); - if (!matched_repr) - goto error; - - status = PyList_Append(list, matched_repr); - Py_DECREF(matched_repr); - if (status < 0) - goto error; - - if (self->fuzzy_counts[RE_FUZZY_SUB] != 0 || - self->fuzzy_counts[RE_FUZZY_INS] != 0 || self->fuzzy_counts[RE_FUZZY_DEL] - != 0) { - if (! append_string(list, ", fuzzy_counts=(")) - goto error; - - if (!append_integer(list, - (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_SUB])) - goto error; - - if (! append_string(list, ", ")) - goto error; - - if (!append_integer(list, - (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_INS])) - goto error; - - if (! append_string(list, ", ")) - goto error; - if (!append_integer(list, - (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_DEL])) - goto error; - - if (! append_string(list, ")")) - goto error; - } - - if (self->partial) { - if (!append_string(list, ", partial=True")) - goto error; - } - - if (! append_string(list, ">")) - goto error; - - separator = Py_BuildValue("s", ""); - if (!separator) - goto error; - - result = PyUnicode_Join(separator, list); - Py_DECREF(separator); - Py_DECREF(list); - - return result; - -error: - Py_DECREF(list); - return NULL; -} - -/* PatternObject's '__repr__' method. */ -static PyObject* pattern_repr(PyObject* self_) { - PatternObject* self; - PyObject* list; - PyObject* item; - int status; - int flag_count; - unsigned int i; - Py_ssize_t pos; - PyObject* key; - PyObject* value; - PyObject* separator; - PyObject* result; - - self = (PatternObject*)self_; - - list = PyList_New(0); - if (!list) - return NULL; - - if (!append_string(list, "regex.Regex(")) - goto error; - - item = PyObject_Repr(self->pattern); - if (!item) - goto error; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - flag_count = 0; - for (i = 0; i < sizeof(flag_names) / sizeof(flag_names[0]); i++) { - if (self->flags & flag_names[i].value) { - if (flag_count == 0) { - if (!append_string(list, ", flags=")) - goto error; - } else { - if (!append_string(list, " | ")) - goto error; - } - - if (!append_string(list, "regex.")) - goto error; - - if (!append_string(list, flag_names[i].name)) - goto error; - - ++flag_count; - } - } - - pos = 0; - /* PyDict_Next borrows references. */ - while (PyDict_Next(self->named_lists, &pos, &key, &value)) { - if (!append_string(list, ", ")) - goto error; - - status = PyList_Append(list, key); - if (status < 0) - goto error; - - if (!append_string(list, "=")) - goto error; - - item = PyObject_Repr(value); - if (!item) - goto error; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - if (!append_string(list, ")")) - goto error; - - separator = Py_BuildValue("s", ""); - if (!separator) - goto error; - - result = PyUnicode_Join(separator, list); - Py_DECREF(separator); - Py_DECREF(list); - - return result; - -error: - Py_DECREF(list); - return NULL; -} - -/* PatternObject's 'groupindex' method. */ -static PyObject* pattern_groupindex(PyObject* self_) { - PatternObject* self; - - self = (PatternObject*)self_; - - return PyDict_Copy(self->groupindex); -} - -/* PatternObject's '_pickled_data' method. */ -static PyObject* pattern_pickled_data(PyObject* self_) { - PatternObject* self; - PyObject* pickled_data; - - self = (PatternObject*)self_; - - /* Build the data needed for picking. */ - pickled_data = Py_BuildValue("OnOOOOOnOnn", self->pattern, self->flags, - self->packed_code_list, self->groupindex, self->indexgroup, - self->named_lists, self->named_list_indexes, self->req_offset, - self->required_chars, self->req_flags, self->public_group_count); - - return pickled_data; -} - -static PyGetSetDef pattern_getset[] = { - {"groupindex", (getter)pattern_groupindex, (setter)NULL, - "A dictionary mapping group names to group numbers."}, - {"_pickled_data", (getter)pattern_pickled_data, (setter)NULL, - "Data used for pickling."}, - {NULL} /* Sentinel */ -}; - -static PyMemberDef pattern_members[] = { - {"pattern", T_OBJECT, offsetof(PatternObject, pattern), READONLY, - "The pattern string from which the regex object was compiled."}, - {"flags", T_PYSSIZET, offsetof(PatternObject, flags), READONLY, - "The regex matching flags."}, - {"groups", T_PYSSIZET, offsetof(PatternObject, public_group_count), - READONLY, "The number of capturing groups in the pattern."}, - {"named_lists", T_OBJECT, offsetof(PatternObject, named_lists), READONLY, - "The named lists used by the regex."}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject Pattern_Type = { - PyObject_HEAD_INIT(NULL) - 0, - "_" RE_MODULE "." "Pattern", - sizeof(PatternObject) -}; - -/* Building the nodes is made simpler by allowing branches to have a single - * exit. These need to be removed. - */ -Py_LOCAL_INLINE(void) skip_one_way_branches(PatternObject* pattern) { - BOOL modified; - - /* If a node refers to a 1-way branch then make the former refer to the - * latter's destination. Repeat until they're all done. - */ - do { - size_t i; - - modified = FALSE; - - for (i = 0; i < pattern->node_count; i++) { - RE_Node* node; - RE_Node* next; - - node = pattern->node_list[i]; - - /* Check the first destination. */ - next = node->next_1.node; - if (next && next->op == RE_OP_BRANCH && - !next->nonstring.next_2.node) { - node->next_1.node = next->next_1.node; - modified = TRUE; - } - - /* Check the second destination. */ - next = node->nonstring.next_2.node; - if (next && next->op == RE_OP_BRANCH && - !next->nonstring.next_2.node) { - node->nonstring.next_2.node = next->next_1.node; - modified = TRUE; - } - } - } while (modified); - - /* The start node might be a 1-way branch. Skip over it because it'll be - * removed. It might even be the first in a chain. - */ - while (pattern->start_node->op == RE_OP_BRANCH && - !pattern->start_node->nonstring.next_2.node) - pattern->start_node = pattern->start_node->next_1.node; -} - -/* Initialises a check stack. */ -Py_LOCAL_INLINE(void) CheckStack_init(RE_CheckStack* stack) { - stack->capacity = 0; - stack->count = 0; - stack->items = NULL; -} - -/* Finalises a check stack. */ -Py_LOCAL_INLINE(void) CheckStack_fini(RE_CheckStack* stack) { - PyMem_Free(stack->items); - stack->capacity = 0; - stack->count = 0; - stack->items = NULL; -} - -/* Pushes an item onto a check stack. */ -Py_LOCAL_INLINE(BOOL) CheckStack_push(RE_CheckStack* stack, RE_Node* node, - RE_STATUS_T result) { - RE_Check* check; - - if (stack->count >= stack->capacity) { - Py_ssize_t new_capacity; - RE_Check* new_items; - - new_capacity = stack->capacity * 2; - if (new_capacity == 0) - new_capacity = 16; - - new_items = (RE_Check*)PyMem_Realloc(stack->items, new_capacity * - sizeof(RE_Check)); - if (!new_items) - return FALSE; - - stack->capacity = new_capacity; - stack->items = new_items; - } - - check = &stack->items[stack->count++]; - check->node = node; - check->result = result; - - return TRUE; -} - -/* Pops an item off a check stack. Returns NULL if the stack is empty. */ -Py_LOCAL_INLINE(RE_Check*) CheckStack_pop(RE_CheckStack* stack) { - return stack->count > 0 ? &stack->items[--stack->count] : NULL; -} - -/* Adds guards to repeats which are followed by a reference to a group. */ -Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* - start_node) { - RE_CheckStack stack; - - CheckStack_init(&stack); - - CheckStack_push(&stack, start_node, RE_STATUS_NEITHER); - - for (;;) { - RE_Check* check; - RE_Node* node; - RE_STATUS_T result; - - check = CheckStack_pop(&stack); - - if (!check) - break; - - node = check->node; - result = check->result; - - if (!(node->status & RE_STATUS_VISITED_AG)) { - switch (check->node->op) { - case RE_OP_BRANCH: - { - RE_Node* branch_1; - RE_Node* branch_2; - BOOL visited_branch_1; - BOOL visited_branch_2; - - branch_1 = node->next_1.node; - branch_2 = node->nonstring.next_2.node; - visited_branch_1 = (branch_1->status & RE_STATUS_VISITED_AG); - visited_branch_2 = (branch_2->status & RE_STATUS_VISITED_AG); - - if (visited_branch_1 && visited_branch_2) { - RE_STATUS_T branch_1_result; - RE_STATUS_T branch_2_result; - - branch_1_result = branch_1->status & (RE_STATUS_REPEAT | - RE_STATUS_REF); - branch_2_result = branch_2->status & (RE_STATUS_REPEAT | - RE_STATUS_REF); - - node->status |= RE_STATUS_VISITED_AG | max_status_3(result, - branch_1_result, branch_2_result); - } else { - CheckStack_push(&stack, node, result); - if (!visited_branch_2) - CheckStack_push(&stack, branch_2, RE_STATUS_NEITHER); - if (!visited_branch_1) - CheckStack_push(&stack, branch_1, RE_STATUS_NEITHER); - } - break; - } - case RE_OP_END_GREEDY_REPEAT: - case RE_OP_END_LAZY_REPEAT: - node->status |= RE_STATUS_VISITED_AG; - break; - case RE_OP_GREEDY_REPEAT: - case RE_OP_LAZY_REPEAT: - { - BOOL limited; - RE_Node* body; - RE_Node* tail; - BOOL visited_body; - BOOL visited_tail; - - limited = ~node->values[2] != 0; - - body = node->next_1.node; - tail = node->nonstring.next_2.node; - visited_body = (body->status & RE_STATUS_VISITED_AG); - visited_tail = (tail->status & RE_STATUS_VISITED_AG); - - if (visited_body && visited_tail) { - RE_STATUS_T body_result; - RE_STATUS_T tail_result; - RE_RepeatInfo* repeat_info; - - body_result = body->status & (RE_STATUS_REPEAT | - RE_STATUS_REF); - tail_result = tail->status & (RE_STATUS_REPEAT | - RE_STATUS_REF); - - repeat_info = &pattern->repeat_info[node->values[0]]; - if (body_result != RE_STATUS_REF) - repeat_info->status |= RE_STATUS_BODY; - if (tail_result != RE_STATUS_REF) - repeat_info->status |= RE_STATUS_TAIL; - - if (limited) - result = max_status_2(result, RE_STATUS_LIMITED); - else - result = max_status_2(result, RE_STATUS_REPEAT); - node->status |= RE_STATUS_VISITED_AG | max_status_3(result, - body_result, tail_result); - } else { - CheckStack_push(&stack, node, result); - if (!visited_tail) - CheckStack_push(&stack, tail, RE_STATUS_NEITHER); - if (!visited_body) { - if (limited) - body->status |= RE_STATUS_VISITED_AG | - RE_STATUS_LIMITED; - else - CheckStack_push(&stack, body, RE_STATUS_NEITHER); - } - } - break; - } - case RE_OP_GREEDY_REPEAT_ONE: - case RE_OP_LAZY_REPEAT_ONE: - { - RE_Node* tail; - BOOL visited_tail; - - tail = node->next_1.node; - visited_tail = (tail->status & RE_STATUS_VISITED_AG); - - if (visited_tail) { - BOOL limited; - RE_STATUS_T tail_result; - RE_RepeatInfo* repeat_info; - - limited = ~node->values[2] != 0; - - tail_result = tail->status & (RE_STATUS_REPEAT | - RE_STATUS_REF); - - repeat_info = &pattern->repeat_info[node->values[0]]; - repeat_info->status |= RE_STATUS_BODY; - - if (tail_result != RE_STATUS_REF) - repeat_info->status |= RE_STATUS_TAIL; - - if (limited) - result = max_status_2(result, RE_STATUS_LIMITED); - else - result = max_status_2(result, RE_STATUS_REPEAT); - node->status |= RE_STATUS_VISITED_AG | max_status_3(result, - RE_STATUS_REPEAT, tail_result); - } else { - CheckStack_push(&stack, node, result); - CheckStack_push(&stack, tail, RE_STATUS_NEITHER); - } - break; - } - case RE_OP_GROUP_EXISTS: - { - RE_Node* branch_1; - RE_Node* branch_2; - BOOL visited_branch_1; - BOOL visited_branch_2; - - branch_1 = node->next_1.node; - branch_2 = node->nonstring.next_2.node; - visited_branch_1 = (branch_1->status & RE_STATUS_VISITED_AG); - visited_branch_2 = (branch_2->status & RE_STATUS_VISITED_AG); - - if (visited_branch_1 && visited_branch_2) { - RE_STATUS_T branch_1_result; - RE_STATUS_T branch_2_result; - - branch_1_result = branch_1->status & (RE_STATUS_REPEAT | - RE_STATUS_REF); - branch_2_result = branch_2->status & (RE_STATUS_REPEAT | - RE_STATUS_REF); - - node->status |= RE_STATUS_VISITED_AG | max_status_4(result, - branch_1_result, branch_2_result, RE_STATUS_REF); - } else { - CheckStack_push(&stack, node, result); - if (!visited_branch_2) - CheckStack_push(&stack, branch_2, RE_STATUS_NEITHER); - if (!visited_branch_1) - CheckStack_push(&stack, branch_1, RE_STATUS_NEITHER); - } - break; - } - case RE_OP_REF_GROUP: - case RE_OP_REF_GROUP_FLD: - case RE_OP_REF_GROUP_FLD_REV: - case RE_OP_REF_GROUP_IGN: - case RE_OP_REF_GROUP_IGN_REV: - case RE_OP_REF_GROUP_REV: - { - RE_Node* tail; - BOOL visited_tail; - - tail = node->next_1.node; - visited_tail = (tail->status & RE_STATUS_VISITED_AG); - - if (visited_tail) - node->status |= RE_STATUS_VISITED_AG | RE_STATUS_REF; - else { - CheckStack_push(&stack, node, result); - CheckStack_push(&stack, tail, RE_STATUS_NEITHER); - } - break; - } - case RE_OP_SUCCESS: - node->status |= RE_STATUS_VISITED_AG | result; - break; - default: - { - RE_Node* tail; - BOOL visited_tail; - RE_STATUS_T tail_result; - - tail = node->next_1.node; - visited_tail = (tail->status & RE_STATUS_VISITED_AG); - - if (visited_tail) { - tail_result = tail->status & (RE_STATUS_REPEAT | - RE_STATUS_REF); - node->status |= RE_STATUS_VISITED_AG | tail_result; - } else { - CheckStack_push(&stack, node, result); - CheckStack_push(&stack, node->next_1.node, result); - } - break; - } - } - } - } - - CheckStack_fini(&stack); - - return start_node->status & (RE_STATUS_REPEAT | RE_STATUS_REF); -} - -/* Adds an index to a node's values unless it's already present. - * - * 'offset' is the offset of the index count within the values. - */ -Py_LOCAL_INLINE(BOOL) add_index(RE_Node* node, size_t offset, size_t index) { - size_t index_count; - size_t first_index; - size_t i; - RE_CODE* new_values; - - if (!node) - return TRUE; - - index_count = node->values[offset]; - first_index = offset + 1; - - /* Is the index already present? */ - for (i = 0; i < index_count; i++) { - if (node->values[first_index + i] == index) - return TRUE; - } - - /* Allocate more space for the new index. */ - new_values = re_realloc(node->values, (node->value_count + 1) * - sizeof(RE_CODE)); - if (!new_values) - return FALSE; - - ++node->value_count; - node->values = new_values; - - node->values[first_index + node->values[offset]++] = (RE_CODE)index; - - return TRUE; -} - -/* Records the index of every repeat and fuzzy section within atomic - * subpatterns and lookarounds. - */ -Py_LOCAL_INLINE(BOOL) record_subpattern_repeats_and_fuzzy_sections(RE_Node* - parent_node, size_t offset, size_t repeat_count, RE_Node* node) { - while (node) { - if (node->status & RE_STATUS_VISITED_REP) - return TRUE; - - node->status |= RE_STATUS_VISITED_REP; - - switch (node->op) { - case RE_OP_BRANCH: - case RE_OP_GROUP_EXISTS: - if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, - offset, repeat_count, node->next_1.node)) - return FALSE; - node = node->nonstring.next_2.node; - break; - case RE_OP_END_FUZZY: - node = node->next_1.node; - break; - case RE_OP_END_GREEDY_REPEAT: - case RE_OP_END_LAZY_REPEAT: - return TRUE; - case RE_OP_FUZZY: - /* Record the fuzzy index. */ - if (!add_index(parent_node, offset, repeat_count + - node->values[0])) - return FALSE; - node = node->next_1.node; - break; - case RE_OP_GREEDY_REPEAT: - case RE_OP_LAZY_REPEAT: - /* Record the repeat index. */ - if (!add_index(parent_node, offset, node->values[0])) - return FALSE; - if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, - offset, repeat_count, node->next_1.node)) - return FALSE; - node = node->nonstring.next_2.node; - break; - case RE_OP_GREEDY_REPEAT_ONE: - case RE_OP_LAZY_REPEAT_ONE: - /* Record the repeat index. */ - if (!add_index(parent_node, offset, node->values[0])) - return FALSE; - node = node->next_1.node; - break; - default: - node = node->next_1.node; - break; - } - } - - return TRUE; -} - -/* Initialises a node stack. */ -Py_LOCAL_INLINE(void) NodeStack_init(RE_NodeStack* stack) { - stack->capacity = 0; - stack->count = 0; - stack->items = NULL; -} - -/* Finalises a node stack. */ -Py_LOCAL_INLINE(void) NodeStack_fini(RE_NodeStack* stack) { - PyMem_Free(stack->items); - stack->capacity = 0; - stack->count = 0; - stack->items = NULL; -} - -/* Pushes an item onto a node stack. */ -Py_LOCAL_INLINE(BOOL) NodeStack_push(RE_NodeStack* stack, RE_Node* node) { - if (stack->count >= stack->capacity) { - Py_ssize_t new_capacity; - RE_Node** new_items; - - new_capacity = stack->capacity * 2; - if (new_capacity == 0) - new_capacity = 16; - - new_items = (RE_Node**)PyMem_Realloc(stack->items, new_capacity * - sizeof(RE_Node*)); - if (!new_items) - return FALSE; - - stack->capacity = new_capacity; - stack->items = new_items; - } - - stack->items[stack->count++] = node; - - return TRUE; -} - -/* Pops an item off a node stack. Returns NULL if the stack is empty. */ -Py_LOCAL_INLINE(RE_Node*) NodeStack_pop(RE_NodeStack* stack) { - return stack->count > 0 ? stack->items[--stack->count] : NULL; -} - -/* Marks nodes which are being used as used. */ -Py_LOCAL_INLINE(void) use_nodes(RE_Node* node) { - RE_NodeStack stack; - - NodeStack_init(&stack); - - while (node) { - while (node && !(node->status & RE_STATUS_USED)) { - node->status |= RE_STATUS_USED; - if (!(node->status & RE_STATUS_STRING)) { - if (node->nonstring.next_2.node) - NodeStack_push(&stack, node->nonstring.next_2.node); - } - node = node->next_1.node; - } - node = NodeStack_pop(&stack); - } - - NodeStack_fini(&stack); -} - -/* Discards any unused nodes. - * - * Optimising the nodes might result in some nodes no longer being used. - */ -Py_LOCAL_INLINE(void) discard_unused_nodes(PatternObject* pattern) { - size_t i; - size_t new_count; - - /* Mark the nodes which are being used. */ - use_nodes(pattern->start_node); - - for (i = 0; i < pattern->call_ref_info_capacity; i++) - use_nodes(pattern->call_ref_info[i].node); - - new_count = 0; - for (i = 0; i < pattern->node_count; i++) { - RE_Node* node; - - node = pattern->node_list[i]; - if (node->status & RE_STATUS_USED) - pattern->node_list[new_count++] = node; - else { - re_dealloc(node->values); - if (node->status & RE_STATUS_STRING) { - re_dealloc(node->string.bad_character_offset); - re_dealloc(node->string.good_suffix_offset); - } - re_dealloc(node); - } - } - - pattern->node_count = new_count; -} - -/* Marks all the group which are named. Returns FALSE if there's an error. */ -Py_LOCAL_INLINE(BOOL) mark_named_groups(PatternObject* pattern) { - size_t i; - - for (i = 0; i < pattern->public_group_count; i++) { - RE_GroupInfo* group_info; - PyObject* index; - int status; - - group_info = &pattern->group_info[i]; - index = Py_BuildValue("n", i + 1); - if (!index) - return FALSE; - - status = PyDict_Contains(pattern->indexgroup, index); - Py_DECREF(index); - if (status < 0) - return FALSE; - - group_info->has_name = status == 1; - } - - return TRUE; -} - -/* Gets the test node. - * - * The test node lets the matcher look ahead in the pattern, allowing it to - * avoid the cost of housekeeping, only to find that what follows doesn't match - * anyway. - */ -Py_LOCAL_INLINE(void) set_test_node(RE_NextNode* next) { - RE_Node* node = next->node; - RE_Node* test; - - next->test = node; - next->match_next = node; - next->match_step = 0; - - if (!node) - return; - - test = node; - while (test->op == RE_OP_END_GROUP || test->op == RE_OP_START_GROUP) - test = test->next_1.node; - - next->test = test; - - if (test != node) - return; - - switch (test->op) { - case RE_OP_ANY: - case RE_OP_ANY_ALL: - case RE_OP_ANY_ALL_REV: - case RE_OP_ANY_REV: - case RE_OP_ANY_U: - case RE_OP_ANY_U_REV: - case RE_OP_BOUNDARY: - case RE_OP_CHARACTER: - case RE_OP_CHARACTER_IGN: - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_CHARACTER_REV: - case RE_OP_DEFAULT_BOUNDARY: - case RE_OP_DEFAULT_END_OF_WORD: - case RE_OP_DEFAULT_START_OF_WORD: - case RE_OP_END_OF_LINE: - case RE_OP_END_OF_LINE_U: - case RE_OP_END_OF_STRING: - case RE_OP_END_OF_STRING_LINE: - case RE_OP_END_OF_STRING_LINE_U: - case RE_OP_END_OF_WORD: - case RE_OP_GRAPHEME_BOUNDARY: - case RE_OP_PROPERTY: - case RE_OP_PROPERTY_IGN: - case RE_OP_PROPERTY_IGN_REV: - case RE_OP_PROPERTY_REV: - case RE_OP_RANGE: - case RE_OP_RANGE_IGN: - case RE_OP_RANGE_IGN_REV: - case RE_OP_RANGE_REV: - case RE_OP_SEARCH_ANCHOR: - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_IGN: - case RE_OP_SET_UNION_IGN_REV: - case RE_OP_SET_UNION_REV: - case RE_OP_START_OF_LINE: - case RE_OP_START_OF_LINE_U: - case RE_OP_START_OF_STRING: - case RE_OP_START_OF_WORD: - case RE_OP_STRING: - case RE_OP_STRING_FLD: - case RE_OP_STRING_FLD_REV: - case RE_OP_STRING_IGN: - case RE_OP_STRING_IGN_REV: - case RE_OP_STRING_REV: - next->match_next = test->next_1.node; - next->match_step = test->step; - break; - case RE_OP_GREEDY_REPEAT_ONE: - case RE_OP_LAZY_REPEAT_ONE: - if (test->values[1] > 0) - next->test = test; - break; - } -} - -/* Sets the test nodes. */ -Py_LOCAL_INLINE(void) set_test_nodes(PatternObject* pattern) { - RE_Node** node_list; - size_t i; - - node_list = pattern->node_list; - for (i = 0; i < pattern->node_count; i++) { - RE_Node* node; - - node = node_list[i]; - set_test_node(&node->next_1); - if (!(node->status & RE_STATUS_STRING)) - set_test_node(&node->nonstring.next_2); - } -} - -/* Optimises the pattern. */ -Py_LOCAL_INLINE(BOOL) optimise_pattern(PatternObject* pattern) { - size_t i; - - /* Building the nodes is made simpler by allowing branches to have a single - * exit. These need to be removed. - */ - skip_one_way_branches(pattern); - - /* Add position guards for repeat bodies containing a reference to a group - * or repeat tails followed at some point by a reference to a group. - */ - add_repeat_guards(pattern, pattern->start_node); - - /* Record the index of repeats and fuzzy sections within the body of atomic - * and lookaround nodes. - */ - if (!record_subpattern_repeats_and_fuzzy_sections(NULL, 0, - pattern->repeat_count, pattern->start_node)) - return FALSE; - - for (i = 0; i < pattern->call_ref_info_count; i++) { - RE_Node* node; - - node = pattern->call_ref_info[i].node; - if (!record_subpattern_repeats_and_fuzzy_sections(NULL, 0, - pattern->repeat_count, node)) - return FALSE; - } - - /* Discard any unused nodes. */ - discard_unused_nodes(pattern); - - /* Set the test nodes. */ - set_test_nodes(pattern); - - /* Mark all the group that are named. */ - if (!mark_named_groups(pattern)) - return FALSE; - - return TRUE; -} - -/* Creates a new pattern node. */ -Py_LOCAL_INLINE(RE_Node*) create_node(PatternObject* pattern, RE_UINT8 op, - RE_CODE flags, Py_ssize_t step, size_t value_count) { - RE_Node* node; - - node = (RE_Node*)re_alloc(sizeof(*node)); - if (!node) - return NULL; - memset(node, 0, sizeof(RE_Node)); - - node->value_count = value_count; - if (node->value_count > 0) { - node->values = (RE_CODE*)re_alloc(node->value_count * sizeof(RE_CODE)); - if (!node->values) - goto error; - } else - node->values = NULL; - - node->op = op; - node->match = (flags & RE_POSITIVE_OP) != 0; - node->status = (RE_STATUS_T)(flags << RE_STATUS_SHIFT); - node->step = step; - - /* Ensure that there's enough storage to record the new node. */ - if (pattern->node_count >= pattern->node_capacity) { - RE_Node** new_node_list; - - pattern->node_capacity *= 2; - if (pattern->node_capacity == 0) - pattern->node_capacity = RE_INIT_NODE_LIST_SIZE; - new_node_list = (RE_Node**)re_realloc(pattern->node_list, - pattern->node_capacity * sizeof(RE_Node*)); - if (!new_node_list) - goto error; - pattern->node_list = new_node_list; - } - - /* Record the new node. */ - pattern->node_list[pattern->node_count++] = node; - - return node; - -error: - re_dealloc(node->values); - re_dealloc(node); - return NULL; -} - -/* Adds a node as a next node for another node. */ -Py_LOCAL_INLINE(void) add_node(RE_Node* node_1, RE_Node* node_2) { - if (!node_1->next_1.node) - node_1->next_1.node = node_2; - else - node_1->nonstring.next_2.node = node_2; -} - -/* Ensures that the entry for a group's details actually exists. */ -Py_LOCAL_INLINE(BOOL) ensure_group(PatternObject* pattern, size_t group) { - size_t old_capacity; - size_t new_capacity; - RE_GroupInfo* new_group_info; - - if (group <= pattern->true_group_count) - /* We already have an entry for the group. */ - return TRUE; - - /* Increase the storage capacity to include the new entry if it's - * insufficient. - */ - old_capacity = pattern->group_info_capacity; - new_capacity = pattern->group_info_capacity; - while (group > new_capacity) - new_capacity += RE_LIST_SIZE_INC; - - if (new_capacity > old_capacity) { - new_group_info = (RE_GroupInfo*)re_realloc(pattern->group_info, - new_capacity * sizeof(RE_GroupInfo)); - if (!new_group_info) - return FALSE; - memset(new_group_info + old_capacity, 0, (new_capacity - old_capacity) - * sizeof(RE_GroupInfo)); - - pattern->group_info = new_group_info; - pattern->group_info_capacity = new_capacity; - } - - pattern->true_group_count = group; - - return TRUE; -} - -/* Records that there's a reference to a group. */ -Py_LOCAL_INLINE(BOOL) record_ref_group(PatternObject* pattern, size_t group) { - if (!ensure_group(pattern, group)) - return FALSE; - - pattern->group_info[group - 1].referenced = TRUE; - - return TRUE; -} - -/* Records that there's a new group. */ -Py_LOCAL_INLINE(BOOL) record_group(PatternObject* pattern, size_t group, - RE_Node* node) { - if (!ensure_group(pattern, group)) - return FALSE; - - if (group >= 1) { - RE_GroupInfo* info; - - info = &pattern->group_info[group - 1]; - info->end_index = (Py_ssize_t)pattern->true_group_count; - info->node = node; - } - - return TRUE; -} - -/* Records that a group has closed. */ -Py_LOCAL_INLINE(void) record_group_end(PatternObject* pattern, size_t group) { - if (group >= 1) - pattern->group_info[group - 1].end_index = ++pattern->group_end_index; -} - -/* Ensures that the entry for a call_ref's details actually exists. */ -Py_LOCAL_INLINE(BOOL) ensure_call_ref(PatternObject* pattern, size_t call_ref) - { - size_t old_capacity; - size_t new_capacity; - RE_CallRefInfo* new_call_ref_info; - - if (call_ref < pattern->call_ref_info_count) - /* We already have an entry for the call_ref. */ - return TRUE; - - /* Increase the storage capacity to include the new entry if it's - * insufficient. - */ - old_capacity = pattern->call_ref_info_capacity; - new_capacity = pattern->call_ref_info_capacity; - while (call_ref >= new_capacity) - new_capacity += RE_LIST_SIZE_INC; - - if (new_capacity > old_capacity) { - new_call_ref_info = (RE_CallRefInfo*)re_realloc(pattern->call_ref_info, - new_capacity * sizeof(RE_CallRefInfo)); - if (!new_call_ref_info) - return FALSE; - memset(new_call_ref_info + old_capacity, 0, (new_capacity - - old_capacity) * sizeof(RE_CallRefInfo)); - - pattern->call_ref_info = new_call_ref_info; - pattern->call_ref_info_capacity = new_capacity; - } - - pattern->call_ref_info_count = 1 + call_ref; - - return TRUE; -} - -/* Records that a call_ref is defined. */ -Py_LOCAL_INLINE(BOOL) record_call_ref_defined(PatternObject* pattern, size_t - call_ref, RE_Node* node) { - if (!ensure_call_ref(pattern, call_ref)) - return FALSE; - - pattern->call_ref_info[call_ref].defined = TRUE; - pattern->call_ref_info[call_ref].node = node; - - return TRUE; -} - -/* Records that a call_ref is used. */ -Py_LOCAL_INLINE(BOOL) record_call_ref_used(PatternObject* pattern, size_t - call_ref) { - if (!ensure_call_ref(pattern, call_ref)) - return FALSE; - - pattern->call_ref_info[call_ref].used = TRUE; - - return TRUE; -} - -/* Checks whether a node matches one and only one character. */ -Py_LOCAL_INLINE(BOOL) sequence_matches_one(RE_Node* node) { - while (node->op == RE_OP_BRANCH && !node->nonstring.next_2.node) - node = node->next_1.node; - - if (node->next_1.node || (node->status & RE_STATUS_FUZZY)) - return FALSE; - - return node_matches_one_character(node); -} - -/* Records a repeat. */ -Py_LOCAL_INLINE(BOOL) record_repeat(PatternObject* pattern, size_t index, - size_t repeat_depth) { - size_t old_capacity; - size_t new_capacity; - - /* Increase the storage capacity to include the new entry if it's - * insufficient. - */ - old_capacity = pattern->repeat_info_capacity; - new_capacity = pattern->repeat_info_capacity; - while (index >= new_capacity) - new_capacity += RE_LIST_SIZE_INC; - - if (new_capacity > old_capacity) { - RE_RepeatInfo* new_repeat_info; - - new_repeat_info = (RE_RepeatInfo*)re_realloc(pattern->repeat_info, - new_capacity * sizeof(RE_RepeatInfo)); - if (!new_repeat_info) - return FALSE; - memset(new_repeat_info + old_capacity, 0, (new_capacity - old_capacity) - * sizeof(RE_RepeatInfo)); - - pattern->repeat_info = new_repeat_info; - pattern->repeat_info_capacity = new_capacity; - } - - if (index >= pattern->repeat_count) - pattern->repeat_count = index + 1; - - if (repeat_depth > 0) - pattern->repeat_info[index].status |= RE_STATUS_INNER; - - return TRUE; -} - -Py_LOCAL_INLINE(Py_ssize_t) get_step(RE_CODE op) { - switch (op) { - case RE_OP_ANY: - case RE_OP_ANY_ALL: - case RE_OP_ANY_U: - case RE_OP_CHARACTER: - case RE_OP_CHARACTER_IGN: - case RE_OP_PROPERTY: - case RE_OP_PROPERTY_IGN: - case RE_OP_RANGE: - case RE_OP_RANGE_IGN: - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_IGN: - case RE_OP_STRING: - case RE_OP_STRING_FLD: - case RE_OP_STRING_IGN: - return 1; - case RE_OP_ANY_ALL_REV: - case RE_OP_ANY_REV: - case RE_OP_ANY_U_REV: - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_CHARACTER_REV: - case RE_OP_PROPERTY_IGN_REV: - case RE_OP_PROPERTY_REV: - case RE_OP_RANGE_IGN_REV: - case RE_OP_RANGE_REV: - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_IGN_REV: - case RE_OP_SET_UNION_REV: - case RE_OP_STRING_FLD_REV: - case RE_OP_STRING_IGN_REV: - case RE_OP_STRING_REV: - return -1; - } - - return 0; -} - -Py_LOCAL_INLINE(int) build_sequence(RE_CompileArgs* args); - -/* Builds an ANY node. */ -Py_LOCAL_INLINE(int) build_ANY(RE_CompileArgs* args) { - RE_UINT8 op; - RE_CODE flags; - Py_ssize_t step; - RE_Node* node; - - /* codes: opcode, flags. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - op = (RE_UINT8)args->code[0]; - flags = args->code[1]; - - step = get_step(op); - - /* Create the node. */ - node = create_node(args->pattern, op, flags, step, 0); - if (!node) - return RE_ERROR_MEMORY; - - args->code += 2; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - ++args->min_width; - - return RE_ERROR_SUCCESS; -} - -/* Builds a FUZZY node. */ -Py_LOCAL_INLINE(int) build_FUZZY(RE_CompileArgs* args) { - RE_CODE flags; - RE_Node* start_node; - RE_Node* end_node; - RE_CODE index; - RE_CompileArgs subargs; - int status; - - /* codes: opcode, flags, constraints, sequence, end. */ - if (args->code + 13 > args->end_code) - return RE_ERROR_ILLEGAL; - - flags = args->code[1]; - - /* Create nodes for the start and end of the fuzzy sequence. */ - start_node = create_node(args->pattern, RE_OP_FUZZY, flags, 0, 9); - end_node = create_node(args->pattern, RE_OP_END_FUZZY, flags, 0, 5); - if (!start_node || !end_node) - return RE_ERROR_MEMORY; - - index = (RE_CODE)args->pattern->fuzzy_count++; - start_node->values[0] = index; - end_node->values[0] = index; - - /* The constraints consist of 4 pairs of limits and the cost equation. */ - end_node->values[RE_FUZZY_VAL_MIN_DEL] = args->code[2]; /* Deletion minimum. */ - end_node->values[RE_FUZZY_VAL_MIN_INS] = args->code[4]; /* Insertion minimum. */ - end_node->values[RE_FUZZY_VAL_MIN_SUB] = args->code[6]; /* Substitution minimum. */ - end_node->values[RE_FUZZY_VAL_MIN_ERR] = args->code[8]; /* Error minimum. */ - - start_node->values[RE_FUZZY_VAL_MAX_DEL] = args->code[3]; /* Deletion maximum. */ - start_node->values[RE_FUZZY_VAL_MAX_INS] = args->code[5]; /* Insertion maximum. */ - start_node->values[RE_FUZZY_VAL_MAX_SUB] = args->code[7]; /* Substitution maximum. */ - start_node->values[RE_FUZZY_VAL_MAX_ERR] = args->code[9]; /* Error maximum. */ - - start_node->values[RE_FUZZY_VAL_DEL_COST] = args->code[10]; /* Deletion cost. */ - start_node->values[RE_FUZZY_VAL_INS_COST] = args->code[11]; /* Insertion cost. */ - start_node->values[RE_FUZZY_VAL_SUB_COST] = args->code[12]; /* Substitution cost. */ - start_node->values[RE_FUZZY_VAL_MAX_COST] = args->code[13]; /* Total cost. */ - - args->code += 14; - - subargs = *args; - subargs.within_fuzzy = TRUE; - - /* Compile the sequence and check that we've reached the end of the - * subpattern. - */ - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->min_width += subargs.min_width; - args->has_captures |= subargs.has_captures; - args->is_fuzzy = TRUE; - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - ++args->code; - - /* Append the fuzzy sequence. */ - add_node(args->end, start_node); - add_node(start_node, subargs.start); - add_node(subargs.end, end_node); - args->end = end_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds an ATOMIC node. */ -Py_LOCAL_INLINE(int) build_ATOMIC(RE_CompileArgs* args) { - RE_Node* atomic_node; - RE_CompileArgs subargs; - int status; - RE_Node* end_node; - - /* codes: opcode, sequence, end. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - atomic_node = create_node(args->pattern, RE_OP_ATOMIC, 0, 0, 0); - if (!atomic_node) - return RE_ERROR_MEMORY; - - ++args->code; - - /* Compile the sequence and check that we've reached the end of it. */ - subargs = *args; - - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - ++args->code; - - /* Check the subpattern. */ - args->min_width += subargs.min_width; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - if (subargs.has_groups) - atomic_node->status |= RE_STATUS_HAS_GROUPS; - - if (subargs.has_repeats) - atomic_node->status |= RE_STATUS_HAS_REPEATS; - - /* Create the node to terminate the subpattern. */ - end_node = create_node(subargs.pattern, RE_OP_END_ATOMIC, 0, 0, 0); - if (!end_node) - return RE_ERROR_MEMORY; - - /* Append the new sequence. */ - add_node(args->end, atomic_node); - add_node(atomic_node, subargs.start); - add_node(subargs.end, end_node); - args->end = end_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a BOUNDARY node. */ -Py_LOCAL_INLINE(int) build_BOUNDARY(RE_CompileArgs* args) { - RE_UINT8 op; - RE_CODE flags; - RE_Node* node; - - /* codes: opcode, flags. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - op = (RE_UINT8)args->code[0]; - flags = args->code[1]; - - args->code += 2; - - /* Create the node. */ - node = create_node(args->pattern, op, flags, 0, 0); - if (!node) - return RE_ERROR_MEMORY; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a BRANCH node. */ -Py_LOCAL_INLINE(int) build_BRANCH(RE_CompileArgs* args) { - RE_Node* branch_node; - RE_Node* join_node; - Py_ssize_t min_width; - RE_CompileArgs subargs; - int status; - - /* codes: opcode, branch, next, branch, end. */ - if (args->code + 2 > args->end_code) - return RE_ERROR_ILLEGAL; - - /* Create nodes for the start and end of the branch sequence. */ - branch_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - join_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - if (!branch_node || !join_node) - return RE_ERROR_MEMORY; - - /* Append the node. */ - add_node(args->end, branch_node); - args->end = join_node; - - min_width = PY_SSIZE_T_MAX; - - subargs = *args; - - /* A branch in the regular expression is compiled into a series of 2-way - * branches. - */ - do { - RE_Node* next_branch_node; - - /* Skip over the 'BRANCH' or 'NEXT' opcode. */ - ++subargs.code; - - /* Compile the sequence until the next 'BRANCH' or 'NEXT' opcode. */ - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - min_width = min_ssize_t(min_width, subargs.min_width); - - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - /* Append the sequence. */ - add_node(branch_node, subargs.start); - add_node(subargs.end, join_node); - - /* Create a start node for the next sequence and append it. */ - next_branch_node = create_node(subargs.pattern, RE_OP_BRANCH, 0, 0, 0); - if (!next_branch_node) - return RE_ERROR_MEMORY; - - add_node(branch_node, next_branch_node); - branch_node = next_branch_node; - } while (subargs.code < subargs.end_code && subargs.code[0] == RE_OP_NEXT); - - /* We should have reached the end of the branch. */ - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - - ++args->code; - args->min_width += min_width; - - return RE_ERROR_SUCCESS; -} - -/* Builds a CALL_REF node. */ -Py_LOCAL_INLINE(int) build_CALL_REF(RE_CompileArgs* args) { - RE_CODE call_ref; - RE_Node* start_node; - RE_Node* end_node; - RE_CompileArgs subargs; - int status; - - /* codes: opcode, call_ref. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - call_ref = args->code[1]; - - args->code += 2; - - /* Create nodes for the start and end of the subpattern. */ - start_node = create_node(args->pattern, RE_OP_CALL_REF, 0, 0, 1); - end_node = create_node(args->pattern, RE_OP_GROUP_RETURN, 0, 0, 0); - if (!start_node || !end_node) - return RE_ERROR_MEMORY; - - start_node->values[0] = call_ref; - - /* Compile the sequence and check that we've reached the end of the - * subpattern. - */ - subargs = *args; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->min_width += subargs.min_width; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - ++args->code; - - /* Record that we defined a call_ref. */ - if (!record_call_ref_defined(args->pattern, call_ref, start_node)) - return RE_ERROR_MEMORY; - - /* Append the node. */ - add_node(args->end, start_node); - add_node(start_node, subargs.start); - add_node(subargs.end, end_node); - args->end = end_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a CHARACTER or PROPERTY node. */ -Py_LOCAL_INLINE(int) build_CHARACTER_or_PROPERTY(RE_CompileArgs* args) { - RE_UINT8 op; - RE_CODE flags; - Py_ssize_t step; - RE_Node* node; - - /* codes: opcode, flags, value. */ - if (args->code + 2 > args->end_code) - return RE_ERROR_ILLEGAL; - - op = (RE_UINT8)args->code[0]; - flags = args->code[1]; - - step = get_step(op); - - if (flags & RE_ZEROWIDTH_OP) - step = 0; - - /* Create the node. */ - node = create_node(args->pattern, op, flags, step, 1); - if (!node) - return RE_ERROR_MEMORY; - - node->values[0] = args->code[2]; - - args->code += 3; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - if (step != 0) - ++args->min_width; - - return RE_ERROR_SUCCESS; -} - -/* Builds a CONDITIONAL node. */ -Py_LOCAL_INLINE(int) build_CONDITIONAL(RE_CompileArgs* args) { - RE_CODE flags; - BOOL forward; - RE_Node* test_node; - RE_CompileArgs subargs; - int status; - RE_Node* end_test_node; - RE_Node* end_node; - Py_ssize_t min_width; - - /* codes: opcode, flags, forward, sequence, next, sequence, next, sequence, - * end. - */ - if (args->code + 4 > args->end_code) - return RE_ERROR_ILLEGAL; - - flags = args->code[1]; - forward = (BOOL)args->code[2]; - - /* Create a node for the lookaround. */ - test_node = create_node(args->pattern, RE_OP_CONDITIONAL, flags, 0, 0); - if (!test_node) - return RE_ERROR_MEMORY; - - args->code += 3; - - add_node(args->end, test_node); - - /* Compile the lookaround test and check that we've reached the end of the - * subpattern. - */ - subargs = *args; - subargs.forward = forward; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_NEXT) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - ++args->code; - - /* Check the lookaround subpattern. */ - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - if (subargs.has_groups) - test_node->status |= RE_STATUS_HAS_GROUPS; - - if (subargs.has_repeats) - test_node->status |= RE_STATUS_HAS_REPEATS; - - /* Create the node to terminate the test. */ - end_test_node = create_node(args->pattern, RE_OP_END_CONDITIONAL, 0, 0, 0); - if (!end_test_node) - return RE_ERROR_MEMORY; - - /* test node -> test -> end test node */ - add_node(test_node, subargs.start); - add_node(subargs.end, end_test_node); - - /* Compile the true branch. */ - subargs = *args; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - /* Check the true branch. */ - args->code = subargs.code; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - min_width = subargs.min_width; - - /* Create the terminating node. */ - end_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - if (!end_node) - return RE_ERROR_MEMORY; - - /* end test node -> true branch -> end node */ - add_node(end_test_node, subargs.start); - add_node(subargs.end, end_node); - - if (args->code[0] == RE_OP_NEXT) { - /* There's a false branch. */ - ++args->code; - - /* Compile the false branch. */ - subargs.code = args->code; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - /* Check the false branch. */ - args->code = subargs.code; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - min_width = min_ssize_t(min_width, subargs.min_width); - - /* test node -> false branch -> end node */ - add_node(test_node, subargs.start); - add_node(subargs.end, end_node); - } else - /* end test node -> end node */ - add_node(end_test_node, end_node); - - if (args->code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->min_width += min_width; - - ++args->code; - - args->end = end_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a GROUP node. */ -Py_LOCAL_INLINE(int) build_GROUP(RE_CompileArgs* args) { - RE_CODE private_group; - RE_CODE public_group; - RE_Node* start_node; - RE_Node* end_node; - RE_CompileArgs subargs; - int status; - - /* codes: opcode, private_group, public_group. */ - if (args->code + 2 > args->end_code) - return RE_ERROR_ILLEGAL; - - private_group = args->code[1]; - public_group = args->code[2]; - - args->code += 3; - - /* Create nodes for the start and end of the capture group. */ - start_node = create_node(args->pattern, args->forward ? RE_OP_START_GROUP : - RE_OP_END_GROUP, 0, 0, 3); - end_node = create_node(args->pattern, args->forward ? RE_OP_END_GROUP : - RE_OP_START_GROUP, 0, 0, 3); - if (!start_node || !end_node) - return RE_ERROR_MEMORY; - - start_node->values[0] = private_group; - end_node->values[0] = private_group; - start_node->values[1] = public_group; - end_node->values[1] = public_group; - - /* Signal that the capture should be saved when it's complete. */ - start_node->values[2] = 0; - end_node->values[2] = 1; - - /* Record that we have a new capture group. */ - if (!record_group(args->pattern, private_group, start_node)) - return RE_ERROR_MEMORY; - - /* Compile the sequence and check that we've reached the end of the capture - * group. - */ - subargs = *args; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->min_width += subargs.min_width; - args->has_captures |= subargs.has_captures | subargs.visible_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= TRUE; - args->has_repeats |= subargs.has_repeats; - - ++args->code; - - /* Record that the capture group has closed. */ - record_group_end(args->pattern, private_group); - - /* Append the capture group. */ - add_node(args->end, start_node); - add_node(start_node, subargs.start); - add_node(subargs.end, end_node); - args->end = end_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a GROUP_CALL node. */ -Py_LOCAL_INLINE(int) build_GROUP_CALL(RE_CompileArgs* args) { - RE_CODE call_ref; - RE_Node* node; - - /* codes: opcode, call_ref. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - call_ref = args->code[1]; - - /* Create the node. */ - node = create_node(args->pattern, RE_OP_GROUP_CALL, 0, 0, 1); - if (!node) - return RE_ERROR_MEMORY; - - node->values[0] = call_ref; - - node->status |= RE_STATUS_HAS_GROUPS; - node->status |= RE_STATUS_HAS_REPEATS; - - args->code += 2; - - /* Record that we used a call_ref. */ - if (!record_call_ref_used(args->pattern, call_ref)) - return RE_ERROR_MEMORY; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a GROUP_EXISTS node. */ -Py_LOCAL_INLINE(int) build_GROUP_EXISTS(RE_CompileArgs* args) { - RE_CODE group; - RE_Node* start_node; - RE_Node* end_node; - RE_CompileArgs subargs; - int status; - Py_ssize_t min_width; - - /* codes: opcode, sequence, next, sequence, end. */ - if (args->code + 2 > args->end_code) - return RE_ERROR_ILLEGAL; - - group = args->code[1]; - - args->code += 2; - - /* Record that we have a reference to a group. If group is 0, then we have - * a DEFINE and not a true group. - */ - if (group > 0 && !record_ref_group(args->pattern, group)) - return RE_ERROR_MEMORY; - - /* Create nodes for the start and end of the structure. */ - start_node = create_node(args->pattern, RE_OP_GROUP_EXISTS, 0, 0, 1); - end_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - if (!start_node || !end_node) - return RE_ERROR_MEMORY; - - start_node->values[0] = group; - - subargs = *args; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - args->code = subargs.code; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - min_width = subargs.min_width; - - /* Append the start node. */ - add_node(args->end, start_node); - add_node(start_node, subargs.start); - - if (args->code[0] == RE_OP_NEXT) { - RE_Node* true_branch_end; - - ++args->code; - - true_branch_end = subargs.end; - - subargs.code = args->code; - - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - args->code = subargs.code; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - - if (group == 0) { - /* Join the 2 branches end-to-end and bypass it. The sequence - * itself will never be matched as a whole, so it doesn't matter. - */ - min_width = 0; - - add_node(start_node, end_node); - add_node(true_branch_end, subargs.start); - } else { - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - min_width = min_ssize_t(min_width, subargs.min_width); - - add_node(start_node, subargs.start); - add_node(true_branch_end, end_node); - } - - add_node(subargs.end, end_node); - } else { - add_node(start_node, end_node); - add_node(subargs.end, end_node); - - min_width = 0; - } - - args->min_width += min_width; - - if (args->code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - ++args->code; - - args->end = end_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a LOOKAROUND node. */ -Py_LOCAL_INLINE(int) build_LOOKAROUND(RE_CompileArgs* args) { - RE_CODE flags; - BOOL forward; - RE_Node* lookaround_node; - RE_CompileArgs subargs; - int status; - RE_Node* end_node; - RE_Node* next_node; - - /* codes: opcode, flags, forward, sequence, end. */ - if (args->code + 3 > args->end_code) - return RE_ERROR_ILLEGAL; - - flags = args->code[1]; - forward = (BOOL)args->code[2]; - - /* Create a node for the lookaround. */ - lookaround_node = create_node(args->pattern, RE_OP_LOOKAROUND, flags, 0, - 0); - if (!lookaround_node) - return RE_ERROR_MEMORY; - - args->code += 3; - - /* Compile the sequence and check that we've reached the end of the - * subpattern. - */ - subargs = *args; - subargs.forward = forward; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - ++args->code; - - /* Check the subpattern. */ - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - if (subargs.has_groups) - lookaround_node->status |= RE_STATUS_HAS_GROUPS; - - if (subargs.has_repeats) - lookaround_node->status |= RE_STATUS_HAS_REPEATS; - - /* Create the node to terminate the subpattern. */ - end_node = create_node(args->pattern, RE_OP_END_LOOKAROUND, 0, 0, 0); - if (!end_node) - return RE_ERROR_MEMORY; - - /* Make a continuation node. */ - next_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - if (!next_node) - return RE_ERROR_MEMORY; - - /* Append the new sequence. */ - add_node(args->end, lookaround_node); - add_node(lookaround_node, subargs.start); - add_node(lookaround_node, next_node); - add_node(subargs.end, end_node); - add_node(end_node, next_node); - - args->end = next_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a RANGE node. */ -Py_LOCAL_INLINE(int) build_RANGE(RE_CompileArgs* args) { - RE_UINT8 op; - RE_CODE flags; - Py_ssize_t step; - RE_Node* node; - - /* codes: opcode, flags, lower, upper. */ - if (args->code + 3 > args->end_code) - return RE_ERROR_ILLEGAL; - - op = (RE_UINT8)args->code[0]; - flags = args->code[1]; - - step = get_step(op); - - if (flags & RE_ZEROWIDTH_OP) - step = 0; - - /* Create the node. */ - node = create_node(args->pattern, op, flags, step, 2); - if (!node) - return RE_ERROR_MEMORY; - - node->values[0] = args->code[2]; - node->values[1] = args->code[3]; - - args->code += 4; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - if (step != 0) - ++args->min_width; - - return RE_ERROR_SUCCESS; -} - -/* Builds a REF_GROUP node. */ -Py_LOCAL_INLINE(int) build_REF_GROUP(RE_CompileArgs* args) { - RE_CODE flags; - RE_CODE group; - RE_Node* node; - - /* codes: opcode, flags, group. */ - if (args->code + 2 > args->end_code) - return RE_ERROR_ILLEGAL; - - flags = args->code[1]; - group = args->code[2]; - node = create_node(args->pattern, (RE_UINT8)args->code[0], flags, 0, 1); - if (!node) - return RE_ERROR_MEMORY; - - node->values[0] = group; - - args->code += 3; - - /* Record that we have a reference to a group. */ - if (!record_ref_group(args->pattern, group)) - return RE_ERROR_MEMORY; - - /* Append the reference. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a REPEAT node. */ -Py_LOCAL_INLINE(int) build_REPEAT(RE_CompileArgs* args) { - BOOL greedy; - RE_CODE min_count; - RE_CODE max_count; - int status; - - /* codes: opcode, min_count, max_count, sequence, end. */ - if (args->code + 3 > args->end_code) - return RE_ERROR_ILLEGAL; - - greedy = args->code[0] == RE_OP_GREEDY_REPEAT; - min_count = args->code[1]; - max_count = args->code[2]; - if (args->code[1] > args->code[2]) - return RE_ERROR_ILLEGAL; - - args->code += 3; - - if (min_count == 1 && max_count == 1) { - /* Singly-repeated sequence. */ - RE_CompileArgs subargs; - - subargs = *args; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->min_width += subargs.min_width; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= subargs.has_groups; - args->has_repeats |= subargs.has_repeats; - - ++args->code; - - /* Append the sequence. */ - add_node(args->end, subargs.start); - args->end = subargs.end; - } else { - size_t index; - RE_Node* repeat_node; - RE_CompileArgs subargs; - - index = args->pattern->repeat_count; - - /* Create the nodes for the repeat. */ - repeat_node = create_node(args->pattern, greedy ? RE_OP_GREEDY_REPEAT : - RE_OP_LAZY_REPEAT, 0, args->forward ? 1 : -1, 4); - if (!repeat_node || !record_repeat(args->pattern, index, - args->repeat_depth)) - return RE_ERROR_MEMORY; - - repeat_node->values[0] = (RE_CODE)index; - repeat_node->values[1] = min_count; - repeat_node->values[2] = max_count; - repeat_node->values[3] = args->forward; - - if (args->within_fuzzy) - args->pattern->repeat_info[index].status |= RE_STATUS_BODY; - - /* Compile the 'body' and check that we've reached the end of it. */ - subargs = *args; - subargs.visible_captures = TRUE; - ++subargs.repeat_depth; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->min_width += (Py_ssize_t)min_count * subargs.min_width; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - args->has_groups |= subargs.has_groups; - args->has_repeats = TRUE; - - ++args->code; - - /* Is it a repeat of something which will match a single character? - * - * If it's in a fuzzy section then it won't be optimised as a - * single-character repeat. - */ - if (sequence_matches_one(subargs.start)) { - repeat_node->op = greedy ? RE_OP_GREEDY_REPEAT_ONE : - RE_OP_LAZY_REPEAT_ONE; - - /* Append the new sequence. */ - add_node(args->end, repeat_node); - repeat_node->nonstring.next_2.node = subargs.start; - args->end = repeat_node; - } else { - RE_Node* end_repeat_node; - RE_Node* end_node; - - end_repeat_node = create_node(args->pattern, greedy ? - RE_OP_END_GREEDY_REPEAT : RE_OP_END_LAZY_REPEAT, 0, args->forward - ? 1 : -1, 4); - if (!end_repeat_node) - return RE_ERROR_MEMORY; - - end_repeat_node->values[0] = repeat_node->values[0]; - end_repeat_node->values[1] = repeat_node->values[1]; - end_repeat_node->values[2] = repeat_node->values[2]; - end_repeat_node->values[3] = args->forward; - - end_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - if (!end_node) - return RE_ERROR_MEMORY; - - /* Append the new sequence. */ - add_node(args->end, repeat_node); - add_node(repeat_node, subargs.start); - add_node(repeat_node, end_node); - add_node(subargs.end, end_repeat_node); - add_node(end_repeat_node, subargs.start); - add_node(end_repeat_node, end_node); - args->end = end_node; - } - } - - return RE_ERROR_SUCCESS; -} - -/* Builds a STRING node. */ -Py_LOCAL_INLINE(int) build_STRING(RE_CompileArgs* args, BOOL is_charset) { - RE_CODE flags; - RE_CODE length; - RE_UINT8 op; - Py_ssize_t step; - RE_Node* node; - size_t i; - - /* codes: opcode, flags, length, characters. */ - flags = args->code[1]; - length = args->code[2]; - if (args->code + 3 + length > args->end_code) - return RE_ERROR_ILLEGAL; - - op = (RE_UINT8)args->code[0]; - - step = get_step(op); - - /* Create the node. */ - node = create_node(args->pattern, op, flags, step * (Py_ssize_t)length, - length); - if (!node) - return RE_ERROR_MEMORY; - if (!is_charset) - node->status |= RE_STATUS_STRING; - - for (i = 0; i < length; i++) - node->values[i] = args->code[3 + i]; - - args->code += 3 + length; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - /* Because of full case-folding, one character in the text could match - * multiple characters in the pattern. - */ - if (op == RE_OP_STRING_FLD || op == RE_OP_STRING_FLD_REV) - args->min_width += possible_unfolded_length((Py_ssize_t)length); - else - args->min_width += (Py_ssize_t)length; - - return RE_ERROR_SUCCESS; -} - -/* Builds a SET node. */ -Py_LOCAL_INLINE(int) build_SET(RE_CompileArgs* args) { - RE_UINT8 op; - RE_CODE flags; - Py_ssize_t step; - RE_Node* node; - Py_ssize_t min_width; - int status; - - /* codes: opcode, flags, members. */ - op = (RE_UINT8)args->code[0]; - flags = args->code[1]; - - step = get_step(op); - - if (flags & RE_ZEROWIDTH_OP) - step = 0; - - node = create_node(args->pattern, op, flags, step, 0); - if (!node) - return RE_ERROR_MEMORY; - - args->code += 2; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - min_width = args->min_width; - - /* Compile the character set. */ - do { - switch (args->code[0]) { - case RE_OP_CHARACTER: - case RE_OP_PROPERTY: - status = build_CHARACTER_or_PROPERTY(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_RANGE: - status = build_RANGE(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - status = build_SET(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_STRING: - /* A set of characters. */ - if (!build_STRING(args, TRUE)) - return FALSE; - break; - default: - /* Illegal opcode for a character set. */ - return RE_ERROR_ILLEGAL; - } - } while (args->code < args->end_code && args->code[0] != RE_OP_END); - - /* Check that we've reached the end correctly. (The last opcode should be - * 'END'.) - */ - if (args->code >= args->end_code || args->code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - ++args->code; - - /* At this point the set's members are in the main sequence. They need to - * be moved out-of-line. - */ - node->nonstring.next_2.node = node->next_1.node; - node->next_1.node = NULL; - args->end = node; - - args->min_width = min_width; - - if (step != 0) - ++args->min_width; - - return RE_ERROR_SUCCESS; -} - -/* Builds a STRING_SET node. */ -Py_LOCAL_INLINE(int) build_STRING_SET(RE_CompileArgs* args) { - RE_CODE index; - RE_CODE min_len; - RE_CODE max_len; - RE_Node* node; - - /* codes: opcode, index, min_len, max_len. */ - if (args->code + 3 > args->end_code) - return RE_ERROR_ILLEGAL; - - index = args->code[1]; - min_len = args->code[2]; - max_len = args->code[3]; - node = create_node(args->pattern, (RE_UINT8)args->code[0], 0, 0, 3); - if (!node) - return RE_ERROR_MEMORY; - - node->values[0] = index; - node->values[1] = min_len; - node->values[2] = max_len; - - args->code += 4; - - /* Append the reference. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a SUCCESS node . */ -Py_LOCAL_INLINE(int) build_SUCCESS(RE_CompileArgs* args) { - RE_Node* node; - /* code: opcode. */ - - /* Create the node. */ - node = create_node(args->pattern, (RE_UINT8)args->code[0], 0, 0, 0); - if (!node) - return RE_ERROR_MEMORY; - - ++args->code; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a zero-width node. */ -Py_LOCAL_INLINE(int) build_zerowidth(RE_CompileArgs* args) { - RE_CODE flags; - RE_Node* node; - - /* codes: opcode, flags. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - flags = args->code[1]; - - /* Create the node. */ - node = create_node(args->pattern, (RE_UINT8)args->code[0], flags, 0, 0); - if (!node) - return RE_ERROR_MEMORY; - - args->code += 2; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a sequence of nodes from regular expression code. */ -Py_LOCAL_INLINE(int) build_sequence(RE_CompileArgs* args) { - int status; - - /* Guarantee that there's something to attach to. */ - args->start = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - args->end = args->start; - - args->min_width = 0; - args->has_captures = FALSE; - args->is_fuzzy = FALSE; - args->has_groups = FALSE; - args->has_repeats = FALSE; - - /* The sequence should end with an opcode we don't understand. If it - * doesn't then the code is illegal. - */ - while (args->code < args->end_code) { - /* The following code groups opcodes by format, not function. */ - switch (args->code[0]) { - case RE_OP_ANY: - case RE_OP_ANY_ALL: - case RE_OP_ANY_ALL_REV: - case RE_OP_ANY_REV: - case RE_OP_ANY_U: - case RE_OP_ANY_U_REV: - /* A simple opcode with no trailing codewords and width of 1. */ - status = build_ANY(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_ATOMIC: - /* An atomic sequence. */ - status = build_ATOMIC(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_BOUNDARY: - case RE_OP_DEFAULT_BOUNDARY: - case RE_OP_DEFAULT_END_OF_WORD: - case RE_OP_DEFAULT_START_OF_WORD: - case RE_OP_END_OF_WORD: - case RE_OP_GRAPHEME_BOUNDARY: - case RE_OP_KEEP: - case RE_OP_SKIP: - case RE_OP_START_OF_WORD: - /* A word or grapheme boundary. */ - status = build_BOUNDARY(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_BRANCH: - /* A 2-way branch. */ - status = build_BRANCH(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_CALL_REF: - /* A group call ref. */ - status = build_CALL_REF(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_CHARACTER: - case RE_OP_CHARACTER_IGN: - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_CHARACTER_REV: - case RE_OP_PROPERTY: - case RE_OP_PROPERTY_IGN: - case RE_OP_PROPERTY_IGN_REV: - case RE_OP_PROPERTY_REV: - /* A character literal or a property. */ - status = build_CHARACTER_or_PROPERTY(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_CONDITIONAL: - /* A lookaround conditional. */ - status = build_CONDITIONAL(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_END_OF_LINE: - case RE_OP_END_OF_LINE_U: - case RE_OP_END_OF_STRING: - case RE_OP_END_OF_STRING_LINE: - case RE_OP_END_OF_STRING_LINE_U: - case RE_OP_SEARCH_ANCHOR: - case RE_OP_START_OF_LINE: - case RE_OP_START_OF_LINE_U: - case RE_OP_START_OF_STRING: - /* A simple opcode with no trailing codewords and width of 0. */ - status = build_zerowidth(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_FAILURE: - case RE_OP_PRUNE: - case RE_OP_SUCCESS: - status = build_SUCCESS(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_FUZZY: - /* A fuzzy sequence. */ - status = build_FUZZY(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_GREEDY_REPEAT: - case RE_OP_LAZY_REPEAT: - /* A repeated sequence. */ - status = build_REPEAT(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_GROUP: - /* A capture group. */ - status = build_GROUP(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_GROUP_CALL: - /* A group call. */ - status = build_GROUP_CALL(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_GROUP_EXISTS: - /* A conditional sequence. */ - status = build_GROUP_EXISTS(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_LOOKAROUND: - /* A lookaround. */ - status = build_LOOKAROUND(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_RANGE: - case RE_OP_RANGE_IGN: - case RE_OP_RANGE_IGN_REV: - case RE_OP_RANGE_REV: - /* A range. */ - status = build_RANGE(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_REF_GROUP: - case RE_OP_REF_GROUP_FLD: - case RE_OP_REF_GROUP_FLD_REV: - case RE_OP_REF_GROUP_IGN: - case RE_OP_REF_GROUP_IGN_REV: - case RE_OP_REF_GROUP_REV: - /* A reference to a group. */ - status = build_REF_GROUP(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_IGN: - case RE_OP_SET_UNION_IGN_REV: - case RE_OP_SET_UNION_REV: - /* A set. */ - status = build_SET(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_STRING: - case RE_OP_STRING_FLD: - case RE_OP_STRING_FLD_REV: - case RE_OP_STRING_IGN: - case RE_OP_STRING_IGN_REV: - case RE_OP_STRING_REV: - /* A string literal. */ - if (!build_STRING(args, FALSE)) - return FALSE; - break; - case RE_OP_STRING_SET: - case RE_OP_STRING_SET_FLD: - case RE_OP_STRING_SET_FLD_REV: - case RE_OP_STRING_SET_IGN: - case RE_OP_STRING_SET_IGN_REV: - case RE_OP_STRING_SET_REV: - /* A reference to a list. */ - status = build_STRING_SET(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - default: - /* We've found an opcode which we don't recognise. We'll leave it - * for the caller. - */ - return RE_ERROR_SUCCESS; - } - } - - /* If we're here then we should be at the end of the code, otherwise we - * have an error. - */ - return args->code == args->end_code; -} - -/* Compiles the regular expression code to 'nodes'. - * - * Various details about the regular expression are discovered during - * compilation and stored in the PatternObject. - */ -Py_LOCAL_INLINE(BOOL) compile_to_nodes(RE_CODE* code, RE_CODE* end_code, - PatternObject* pattern) { - RE_CompileArgs args; - int status; - - /* Compile a regex sequence and then check that we've reached the end - * correctly. (The last opcode should be 'SUCCESS'.) - * - * If successful, 'start' and 'end' will point to the start and end nodes - * of the compiled sequence. - */ - args.code = code; - args.end_code = end_code; - args.pattern = pattern; - args.forward = (pattern->flags & RE_FLAG_REVERSE) == 0; - args.visible_captures = FALSE; - args.has_captures = FALSE; - args.repeat_depth = 0; - args.is_fuzzy = FALSE; - args.within_fuzzy = FALSE; - status = build_sequence(&args); - if (status == RE_ERROR_ILLEGAL) - set_error(RE_ERROR_ILLEGAL, NULL); - - if (status != RE_ERROR_SUCCESS) - return FALSE; - - pattern->min_width = args.min_width; - pattern->is_fuzzy = args.is_fuzzy; - pattern->do_search_start = TRUE; - pattern->start_node = args.start; - - /* Optimise the pattern. */ - if (!optimise_pattern(pattern)) - return FALSE; - - pattern->start_test = locate_test_start(pattern->start_node); - - /* Get the call_ref for the entire pattern, if any. */ - if (pattern->start_node->op == RE_OP_CALL_REF) - pattern->pattern_call_ref = (Py_ssize_t)pattern->start_node->values[0]; - else - pattern->pattern_call_ref = -1; - - return TRUE; -} - -/* Gets the required characters for a regex. - * - * In the event of an error, it just pretends that there are no required - * characters. - */ -Py_LOCAL_INLINE(void) get_required_chars(PyObject* required_chars, RE_CODE** - req_chars, size_t* req_length) { - Py_ssize_t len; - RE_CODE* chars; - Py_ssize_t i; - - *req_chars = NULL; - *req_length = 0; - - len = PyTuple_GET_SIZE(required_chars); - if (len < 1 || PyErr_Occurred()) { - PyErr_Clear(); - return; - } - - chars = (RE_CODE*)re_alloc((size_t)len * sizeof(RE_CODE)); - if (!chars) - goto error; - - for (i = 0; i < len; i++) { - PyObject* o; - size_t value; - - /* PyTuple_SET_ITEM borrows the reference. */ - o = PyTuple_GET_ITEM(required_chars, i); - - value = PyLong_AsUnsignedLong(o); - if ((Py_ssize_t)value == -1 && PyErr_Occurred()) - goto error; - - chars[i] = (RE_CODE)value; - if (chars[i] != value) - goto error; - } - - *req_chars = chars; - *req_length = (size_t)len; - - return; - -error: - PyErr_Clear(); - re_dealloc(chars); -} - -/* Makes a STRING node. */ -Py_LOCAL_INLINE(RE_Node*) make_STRING_node(PatternObject* pattern, RE_UINT8 op, - size_t length, RE_CODE* chars) { - Py_ssize_t step; - RE_Node* node; - size_t i; - - step = get_step(op); - - /* Create the node. */ - node = create_node(pattern, op, 0, step * (Py_ssize_t)length, length); - if (!node) - return NULL; - - node->status |= RE_STATUS_STRING; - - for (i = 0; i < length; i++) - node->values[i] = chars[i]; - - return node; -} - -/* Scans all of the characters in the current locale for their properties. */ -Py_LOCAL_INLINE(void) scan_locale_chars(RE_LocaleInfo* locale_info) { - int c; - - for (c = 0; c < 0x100; c++) { - unsigned short props = 0; - - if (isalnum(c)) - props |= RE_LOCALE_ALNUM; - if (isalpha(c)) - props |= RE_LOCALE_ALPHA; - if (iscntrl(c)) - props |= RE_LOCALE_CNTRL; - if (isdigit(c)) - props |= RE_LOCALE_DIGIT; - if (isgraph(c)) - props |= RE_LOCALE_GRAPH; - if (islower(c)) - props |= RE_LOCALE_LOWER; - if (isprint(c)) - props |= RE_LOCALE_PRINT; - if (ispunct(c)) - props |= RE_LOCALE_PUNCT; - if (isspace(c)) - props |= RE_LOCALE_SPACE; - if (isupper(c)) - props |= RE_LOCALE_UPPER; - - locale_info->properties[c] = props; - locale_info->uppercase[c] = (unsigned char)toupper(c); - locale_info->lowercase[c] = (unsigned char)tolower(c); - } -} - -/* Compiles regular expression code to a PatternObject. - * - * The regular expression code is provided as a list and is then compiled to - * 'nodes'. Various details about the regular expression are discovered during - * compilation and stored in the PatternObject. - */ -static PyObject* re_compile(PyObject* self_, PyObject* args) { - PyObject* pattern; - Py_ssize_t flags = 0; - PyObject* code_list; - PyObject* groupindex; - PyObject* indexgroup; - PyObject* named_lists; - PyObject* named_list_indexes; - Py_ssize_t req_offset; - PyObject* required_chars; - Py_ssize_t req_flags; - size_t public_group_count; - BOOL unpacked; - Py_ssize_t code_len; - RE_CODE* code; - Py_ssize_t i; - RE_CODE* req_chars; - size_t req_length; - PyObject* packed_code_list; - PatternObject* self; - BOOL unicode; - BOOL locale; - BOOL ascii; - BOOL ok; - - if (!PyArg_ParseTuple(args, "OnOOOOOnOnn:re_compile", &pattern, &flags, - &code_list, &groupindex, &indexgroup, &named_lists, &named_list_indexes, - &req_offset, &required_chars, &req_flags, &public_group_count)) - return NULL; - - /* If it came from a pickled source, code_list will be a packed code list - * in a bytestring. - */ - if (PyString_Check(code_list)) { - packed_code_list = code_list; - code_list = unpack_code_list(packed_code_list); - if (!code_list) - return NULL; - - unpacked = TRUE; - } else - unpacked = FALSE; - - /* Read the regex code. */ - code_len = PyList_GET_SIZE(code_list); - code = (RE_CODE*)re_alloc((size_t)code_len * sizeof(RE_CODE)); - if (!code) { - if (unpacked) - /* code_list has been built from a packed code list. */ - Py_DECREF(code_list); - - return NULL; - } - - for (i = 0; i < code_len; i++) { - PyObject* o; - size_t value; - - /* PyList_GET_ITEM borrows a reference. */ - o = PyList_GET_ITEM(code_list, i); - - value = PyLong_AsUnsignedLong(o); - if ((Py_ssize_t)value == -1 && PyErr_Occurred()) - goto error; - - code[i] = (RE_CODE)value; - if (code[i] != value) - goto error; - } - - /* Get the required characters. */ - get_required_chars(required_chars, &req_chars, &req_length); - - if (!unpacked) { - /* Pack the code list in case it's needed for pickling. */ - packed_code_list = pack_code_list(code, code_len); - if (!packed_code_list) { - set_error(RE_ERROR_MEMORY, NULL); - re_dealloc(req_chars); - re_dealloc(code); - return NULL; - } - } - - /* Create the PatternObject. */ - self = PyObject_NEW(PatternObject, &Pattern_Type); - if (!self) { - set_error(RE_ERROR_MEMORY, NULL); - if (unpacked) - Py_DECREF(code_list); - else - Py_DECREF(packed_code_list); - re_dealloc(req_chars); - re_dealloc(code); - return NULL; - } - - /* Initialise the PatternObject. */ - self->pattern = pattern; - self->flags = flags; - self->packed_code_list = packed_code_list; - self->weakreflist = NULL; - self->start_node = NULL; - self->repeat_count = 0; - self->true_group_count = 0; - self->public_group_count = public_group_count; - self->group_end_index = 0; - self->groupindex = groupindex; - self->indexgroup = indexgroup; - self->named_lists = named_lists; - self->named_lists_count = (size_t)PyDict_Size(named_lists); - self->partial_named_lists[0] = NULL; - self->partial_named_lists[1] = NULL; - self->named_list_indexes = named_list_indexes; - self->node_capacity = 0; - self->node_count = 0; - self->node_list = NULL; - self->group_info_capacity = 0; - self->group_info = NULL; - self->call_ref_info_capacity = 0; - self->call_ref_info_count = 0; - self->call_ref_info = NULL; - self->repeat_info_capacity = 0; - self->repeat_info = NULL; - self->groups_storage = NULL; - self->repeats_storage = NULL; - self->fuzzy_count = 0; - self->recursive = FALSE; - self->req_offset = req_offset; - self->required_chars = required_chars; - self->req_flags = req_flags; - self->req_string = NULL; - self->locale_info = NULL; - Py_INCREF(self->pattern); - if (unpacked) - Py_INCREF(self->packed_code_list); - Py_INCREF(self->groupindex); - Py_INCREF(self->indexgroup); - Py_INCREF(self->named_lists); - Py_INCREF(self->named_list_indexes); - Py_INCREF(self->required_chars); - - /* Initialise the character encoding. */ - unicode = (flags & RE_FLAG_UNICODE) != 0; - locale = (flags & RE_FLAG_LOCALE) != 0; - ascii = (flags & RE_FLAG_ASCII) != 0; - if (!unicode && !locale && !ascii) { - if (PyString_Check(self->pattern)) - ascii = RE_FLAG_ASCII; - else - unicode = RE_FLAG_UNICODE; - } - if (unicode) - self->encoding = &unicode_encoding; - else if (locale) - self->encoding = &locale_encoding; - else if (ascii) - self->encoding = &ascii_encoding; - - /* Compile the regular expression code to nodes. */ - ok = compile_to_nodes(code, code + code_len, self); - - /* We no longer need the regular expression code. */ - re_dealloc(code); - - if (!ok) { - Py_DECREF(self); - re_dealloc(req_chars); - if (unpacked) - Py_DECREF(code_list); - return NULL; - } - - /* Make a node for the required string, if there's one. */ - if (req_chars) { - /* Remove the FULLCASE flag if it's not a Unicode pattern or not - * ignoring case. - */ - if (!(self->flags & RE_FLAG_UNICODE) || !(self->flags & - RE_FLAG_IGNORECASE)) - req_flags &= ~RE_FLAG_FULLCASE; - - if (self->flags & RE_FLAG_REVERSE) { - switch (req_flags) { - case 0: - self->req_string = make_STRING_node(self, RE_OP_STRING_REV, - req_length, req_chars); - break; - case RE_FLAG_IGNORECASE | RE_FLAG_FULLCASE: - self->req_string = make_STRING_node(self, RE_OP_STRING_FLD_REV, - req_length, req_chars); - break; - case RE_FLAG_IGNORECASE: - self->req_string = make_STRING_node(self, RE_OP_STRING_IGN_REV, - req_length, req_chars); - break; - } - } else { - switch (req_flags) { - case 0: - self->req_string = make_STRING_node(self, RE_OP_STRING, - req_length, req_chars); - break; - case RE_FLAG_IGNORECASE | RE_FLAG_FULLCASE: - self->req_string = make_STRING_node(self, RE_OP_STRING_FLD, - req_length, req_chars); - break; - case RE_FLAG_IGNORECASE: - self->req_string = make_STRING_node(self, RE_OP_STRING_IGN, - req_length, req_chars); - break; - } - } - - re_dealloc(req_chars); - } - - if (locale) { - /* Store info about the characters in the locale for locale-sensitive - * matching. - */ - self->locale_info = re_alloc(sizeof(RE_LocaleInfo)); - if (!self->locale_info) { - Py_DECREF(self); - if (unpacked) - Py_DECREF(code_list); - return NULL; - } - - scan_locale_chars(self->locale_info); - } - - if (unpacked) - Py_DECREF(code_list); - - return (PyObject*)self; - -error: - re_dealloc(code); - set_error(RE_ERROR_ILLEGAL, NULL); - if (unpacked) - Py_DECREF(code_list); - - return NULL; -} - -/* Gets the size of the codewords. */ -static PyObject* get_code_size(PyObject* self, PyObject* unused) { - return Py_BuildValue("n", sizeof(RE_CODE)); -} - -/* Gets the property dict. */ -static PyObject* get_properties(PyObject* self_, PyObject* args) { - Py_INCREF(property_dict); - - return property_dict; -} - -/* Folds the case of a string. */ -static PyObject* fold_case(PyObject* self_, PyObject* args) { - RE_StringInfo str_info; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_LocaleInfo locale_info; - Py_ssize_t folded_charsize; - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - Py_ssize_t buf_size; - void* folded; - Py_ssize_t folded_len; - PyObject* result; - - Py_ssize_t flags; - PyObject* string; - if (!PyArg_ParseTuple(args, "nO:fold_case", &flags, &string)) - return NULL; - - if (!(flags & RE_FLAG_IGNORECASE)) { - Py_INCREF(string); - return string; - } - - /* Get the string. */ - if (!get_string(string, &str_info)) - return NULL; - - /* Get the function for reading from the original string. */ - switch (str_info.charsize) { - case 1: - char_at = bytes1_char_at; - break; - case 2: - char_at = bytes2_char_at; - break; - case 4: - char_at = bytes4_char_at; - break; - default: -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return NULL; - } - - /* What's the encoding? */ - if (flags & RE_FLAG_UNICODE) - encoding = &unicode_encoding; - else if (flags & RE_FLAG_LOCALE) { - encoding = &locale_encoding; - scan_locale_chars(&locale_info); - } else if (flags & RE_FLAG_ASCII) - encoding = &ascii_encoding; - else - encoding = &unicode_encoding; - - /* The folded string will have the same width as the original string. */ - folded_charsize = str_info.charsize; - - /* Get the function for writing to the folded string. */ - switch (folded_charsize) { - case 1: - set_char_at = bytes1_set_char_at; - break; - case 2: - set_char_at = bytes2_set_char_at; - break; - case 4: - set_char_at = bytes4_set_char_at; - break; - default: -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return NULL; - } - - /* Allocate a buffer for the folded string. */ - if (flags & RE_FLAG_FULLCASE) - /* When using full case-folding with Unicode, some single codepoints - * are mapped to multiple codepoints. - */ - buf_size = str_info.length * RE_MAX_FOLDED; - else - buf_size = str_info.length; - - folded = re_alloc((size_t)(buf_size * folded_charsize)); - if (!folded) { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return NULL; - } - - /* Fold the case of the string. */ - folded_len = 0; - - if (flags & RE_FLAG_FULLCASE) { - /* Full case-folding. */ - int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* - folded); - Py_ssize_t i; - Py_UCS4 codepoints[RE_MAX_FOLDED]; - - full_case_fold = encoding->full_case_fold; - - for (i = 0; i < str_info.length; i++) { - int count; - int j; - - count = full_case_fold(&locale_info, char_at(str_info.characters, - i), codepoints); - for (j = 0; j < count; j++) - set_char_at(folded, folded_len + j, codepoints[j]); - - folded_len += count; - } - } else { - /* Simple case-folding. */ - Py_UCS4 (*simple_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch); - Py_ssize_t i; - - simple_case_fold = encoding->simple_case_fold; - - for (i = 0; i < str_info.length; i++) { - Py_UCS4 ch; - - ch = simple_case_fold(&locale_info, char_at(str_info.characters, - i)); - set_char_at(folded, i, ch); - } - - folded_len = str_info.length; - } - - /* Build the result string. */ - if (str_info.is_unicode) - result = build_unicode_value(folded, 0, folded_len, folded_charsize); - else - result = build_bytes_value(folded, 0, folded_len, folded_charsize); - - re_dealloc(folded); - -#if PY_VERSION_HEX >= 0x02060000 - /* Release the original string's buffer. */ - release_buffer(&str_info); - -#endif - return result; -} - -/* Returns a tuple of the Unicode characters that expand on full case-folding. - */ -static PyObject* get_expand_on_folding(PyObject* self, PyObject* unused) { - int count; - PyObject* result; - int i; - - /* How many characters are there? */ - count = sizeof(re_expand_on_folding) / sizeof(re_expand_on_folding[0]); - - /* Put all the characters in a tuple. */ - result = PyTuple_New(count); - if (!result) - return NULL; - - for (i = 0; i < count; i++) { - Py_UNICODE codepoint; - PyObject* item; - - codepoint = re_expand_on_folding[i]; - - item = build_unicode_value(&codepoint, 0, 1, sizeof(codepoint)); - if (!item) - goto error; - - /* PyTuple_SetItem borrows the reference. */ - PyTuple_SetItem(result, i, item); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* Returns whether a character has a given value for a Unicode property. */ -static PyObject* has_property_value(PyObject* self_, PyObject* args) { - BOOL v; - - Py_ssize_t property_value; - Py_ssize_t character; - if (!PyArg_ParseTuple(args, "nn:has_property_value", &property_value, - &character)) - return NULL; - - v = unicode_has_property((RE_CODE)property_value, (Py_UCS4)character) ? 1 : - 0; - - return Py_BuildValue("n", v); -} - -/* Returns a list of all the simple cases of a character. - * - * If full case-folding is turned on and the character also expands on full - * case-folding, a None is appended to the list. - */ -static PyObject* get_all_cases(PyObject* self_, PyObject* args) { - RE_EncodingTable* encoding; - RE_LocaleInfo locale_info; - int count; - Py_UCS4 cases[RE_MAX_CASES]; - PyObject* result; - int i; - Py_UCS4 folded[RE_MAX_FOLDED]; - - Py_ssize_t flags; - Py_ssize_t character; - if (!PyArg_ParseTuple(args, "nn:get_all_cases", &flags, &character)) - return NULL; - - /* What's the encoding? */ - if (flags & RE_FLAG_UNICODE) - encoding = &unicode_encoding; - else if (flags & RE_FLAG_LOCALE) { - encoding = &locale_encoding; - scan_locale_chars(&locale_info); - } else if (flags & RE_FLAG_ASCII) - encoding = &ascii_encoding; - else - encoding = &ascii_encoding; - - /* Get all the simple cases. */ - count = encoding->all_cases(&locale_info, (Py_UCS4)character, cases); - - result = PyList_New(count); - if (!result) - return NULL; - - for (i = 0; i < count; i++) { - PyObject* item; - - item = Py_BuildValue("n", cases[i]); - if (!item) - goto error; - - /* PyList_SetItem borrows the reference. */ - PyList_SetItem(result, i, item); - } - - /* If the character also expands on full case-folding, append a None. */ - if ((flags & RE_FULL_CASE_FOLDING) == RE_FULL_CASE_FOLDING) { - count = encoding->full_case_fold(&locale_info, (Py_UCS4)character, - folded); - if (count > 1) - PyList_Append(result, Py_None); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* The table of the module's functions. */ -static PyMethodDef _functions[] = { - {"compile", (PyCFunction)re_compile, METH_VARARGS}, - {"get_code_size", (PyCFunction)get_code_size, METH_NOARGS}, - {"get_properties", (PyCFunction)get_properties, METH_VARARGS}, - {"fold_case", (PyCFunction)fold_case, METH_VARARGS}, - {"get_expand_on_folding", (PyCFunction)get_expand_on_folding, METH_NOARGS}, - {"has_property_value", (PyCFunction)has_property_value, METH_VARARGS}, - {"get_all_cases", (PyCFunction)get_all_cases, METH_VARARGS}, - {NULL, NULL} -}; - -/* Initialises the property dictionary. */ -Py_LOCAL_INLINE(BOOL) init_property_dict(void) { - size_t value_set_count; - size_t i; - PyObject** value_dicts; - - property_dict = NULL; - - /* How many value sets are there? */ - value_set_count = 0; - - for (i = 0; i < sizeof(re_property_values) / sizeof(re_property_values[0]); - i++) { - RE_PropertyValue* value; - - value = &re_property_values[i]; - if (value->value_set >= value_set_count) - value_set_count = (size_t)value->value_set + 1; - } - - /* Quick references for the value sets. */ - value_dicts = (PyObject**)re_alloc(value_set_count * - sizeof(value_dicts[0])); - if (!value_dicts) - return FALSE; - - memset(value_dicts, 0, value_set_count * sizeof(value_dicts[0])); - - /* Build the property values dictionaries. */ - for (i = 0; i < sizeof(re_property_values) / sizeof(re_property_values[0]); - i++) { - RE_PropertyValue* value; - PyObject* v; - int status; - - value = &re_property_values[i]; - if (!value_dicts[value->value_set]) { - value_dicts[value->value_set] = PyDict_New(); - if (!value_dicts[value->value_set]) - goto error; - } - - v = Py_BuildValue("i", value->id); - if (!v) - goto error; - - status = PyDict_SetItemString(value_dicts[value->value_set], - re_strings[value->name], v); - Py_DECREF(v); - if (status < 0) - goto error; - } - - /* Build the property dictionary. */ - property_dict = PyDict_New(); - if (!property_dict) - goto error; - - for (i = 0; i < sizeof(re_properties) / sizeof(re_properties[0]); i++) { - RE_Property* property; - PyObject* v; - int status; - - property = &re_properties[i]; - v = Py_BuildValue("iO", property->id, - value_dicts[property->value_set]); - if (!v) - goto error; - - status = PyDict_SetItemString(property_dict, - re_strings[property->name], v); - Py_DECREF(v); - if (status < 0) - goto error; - } - - /* DECREF the value sets. Any unused ones will be deallocated. */ - for (i = 0; i < value_set_count; i++) - Py_XDECREF(value_dicts[i]); - - re_dealloc(value_dicts); - - return TRUE; - -error: - Py_XDECREF(property_dict); - - /* DECREF the value sets. */ - for (i = 0; i < value_set_count; i++) - Py_XDECREF(value_dicts[i]); - - re_dealloc(value_dicts); - - return FALSE; -} - -/* Initialises the module. */ -PyMODINIT_FUNC init_regex(void) { - PyObject* m; - PyObject* d; - PyObject* x; - -#if defined(VERBOSE) - /* Unbuffered in case it crashes! */ - setvbuf(stdout, NULL, _IONBF, 0); - -#endif - /* Initialise Pattern_Type. */ - Pattern_Type.tp_dealloc = pattern_dealloc; - Pattern_Type.tp_repr = pattern_repr; - Pattern_Type.tp_flags = Py_TPFLAGS_HAVE_WEAKREFS; - Pattern_Type.tp_doc = pattern_doc; - Pattern_Type.tp_weaklistoffset = offsetof(PatternObject, weakreflist); - Pattern_Type.tp_methods = pattern_methods; - Pattern_Type.tp_members = pattern_members; - Pattern_Type.tp_getset = pattern_getset; - - /* Initialise Match_Type. */ - Match_Type.tp_dealloc = match_dealloc; - Match_Type.tp_repr = match_repr; - Match_Type.tp_as_mapping = &match_as_mapping; - Match_Type.tp_flags = Py_TPFLAGS_DEFAULT; - Match_Type.tp_doc = match_doc; - Match_Type.tp_methods = match_methods; - Match_Type.tp_members = match_members; - Match_Type.tp_getset = match_getset; - - /* Initialise Scanner_Type. */ - Scanner_Type.tp_dealloc = scanner_dealloc; - Scanner_Type.tp_flags = Py_TPFLAGS_DEFAULT; - Scanner_Type.tp_doc = scanner_doc; - Scanner_Type.tp_iter = scanner_iter; - Scanner_Type.tp_iternext = scanner_iternext; - Scanner_Type.tp_methods = scanner_methods; - Scanner_Type.tp_members = scanner_members; - - /* Initialise Splitter_Type. */ - Splitter_Type.tp_dealloc = splitter_dealloc; - Splitter_Type.tp_flags = Py_TPFLAGS_DEFAULT; - Splitter_Type.tp_doc = splitter_doc; - Splitter_Type.tp_iter = splitter_iter; - Splitter_Type.tp_iternext = splitter_iternext; - Splitter_Type.tp_methods = splitter_methods; - Splitter_Type.tp_members = splitter_members; -#if PY_VERSION_HEX >= 0x02060000 - - /* Initialise Capture_Type. */ - Capture_Type.tp_dealloc = capture_dealloc; - Capture_Type.tp_str = capture_str; - Capture_Type.tp_as_mapping = &capture_as_mapping; - Capture_Type.tp_flags = Py_TPFLAGS_DEFAULT; - Capture_Type.tp_methods = capture_methods; -#endif - - /* Initialize object types */ - if (PyType_Ready(&Pattern_Type) < 0) - return; - if (PyType_Ready(&Match_Type) < 0) - return; - if (PyType_Ready(&Scanner_Type) < 0) - return; - if (PyType_Ready(&Splitter_Type) < 0) - return; -#if PY_VERSION_HEX >= 0x02060000 - if (PyType_Ready(&Capture_Type) < 0) - return; -#endif - - error_exception = NULL; - - m = Py_InitModule("_" RE_MODULE, _functions); - if (!m) - return; - - d = PyModule_GetDict(m); - - x = PyInt_FromLong(RE_MAGIC); - if (x) { - PyDict_SetItemString(d, "MAGIC", x); - Py_DECREF(x); - } - - x = PyInt_FromLong(sizeof(RE_CODE)); - if (x) { - PyDict_SetItemString(d, "CODE_SIZE", x); - Py_DECREF(x); - } - - x = PyString_FromString(copyright); - if (x) { - PyDict_SetItemString(d, "copyright", x); - Py_DECREF(x); - } - - /* Initialise the property dictionary. */ - if (!init_property_dict()) - return; -} - -/* vim:ts=4:sw=4:et */ diff --git a/src/regex/_regex.h b/src/regex/_regex.h deleted file mode 100644 index 33ccfdb110..0000000000 --- a/src/regex/_regex.h +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * NOTE: This file is generated by regex.py. If you need - * to change anything in here, edit regex.py and run it. - * - * 2010-01-16 mrab Re-written - */ - -/* Supports Unicode version 9.0.0. */ - -#define RE_MAGIC 20100116 - -#include "_regex_unicode.h" - -/* Operators. */ -#define RE_OP_FAILURE 0 -#define RE_OP_SUCCESS 1 -#define RE_OP_ANY 2 -#define RE_OP_ANY_ALL 3 -#define RE_OP_ANY_ALL_REV 4 -#define RE_OP_ANY_REV 5 -#define RE_OP_ANY_U 6 -#define RE_OP_ANY_U_REV 7 -#define RE_OP_ATOMIC 8 -#define RE_OP_BOUNDARY 9 -#define RE_OP_BRANCH 10 -#define RE_OP_CALL_REF 11 -#define RE_OP_CHARACTER 12 -#define RE_OP_CHARACTER_IGN 13 -#define RE_OP_CHARACTER_IGN_REV 14 -#define RE_OP_CHARACTER_REV 15 -#define RE_OP_CONDITIONAL 16 -#define RE_OP_DEFAULT_BOUNDARY 17 -#define RE_OP_DEFAULT_END_OF_WORD 18 -#define RE_OP_DEFAULT_START_OF_WORD 19 -#define RE_OP_END 20 -#define RE_OP_END_OF_LINE 21 -#define RE_OP_END_OF_LINE_U 22 -#define RE_OP_END_OF_STRING 23 -#define RE_OP_END_OF_STRING_LINE 24 -#define RE_OP_END_OF_STRING_LINE_U 25 -#define RE_OP_END_OF_WORD 26 -#define RE_OP_FUZZY 27 -#define RE_OP_GRAPHEME_BOUNDARY 28 -#define RE_OP_GREEDY_REPEAT 29 -#define RE_OP_GROUP 30 -#define RE_OP_GROUP_CALL 31 -#define RE_OP_GROUP_EXISTS 32 -#define RE_OP_KEEP 33 -#define RE_OP_LAZY_REPEAT 34 -#define RE_OP_LOOKAROUND 35 -#define RE_OP_NEXT 36 -#define RE_OP_PROPERTY 37 -#define RE_OP_PROPERTY_IGN 38 -#define RE_OP_PROPERTY_IGN_REV 39 -#define RE_OP_PROPERTY_REV 40 -#define RE_OP_PRUNE 41 -#define RE_OP_RANGE 42 -#define RE_OP_RANGE_IGN 43 -#define RE_OP_RANGE_IGN_REV 44 -#define RE_OP_RANGE_REV 45 -#define RE_OP_REF_GROUP 46 -#define RE_OP_REF_GROUP_FLD 47 -#define RE_OP_REF_GROUP_FLD_REV 48 -#define RE_OP_REF_GROUP_IGN 49 -#define RE_OP_REF_GROUP_IGN_REV 50 -#define RE_OP_REF_GROUP_REV 51 -#define RE_OP_SEARCH_ANCHOR 52 -#define RE_OP_SET_DIFF 53 -#define RE_OP_SET_DIFF_IGN 54 -#define RE_OP_SET_DIFF_IGN_REV 55 -#define RE_OP_SET_DIFF_REV 56 -#define RE_OP_SET_INTER 57 -#define RE_OP_SET_INTER_IGN 58 -#define RE_OP_SET_INTER_IGN_REV 59 -#define RE_OP_SET_INTER_REV 60 -#define RE_OP_SET_SYM_DIFF 61 -#define RE_OP_SET_SYM_DIFF_IGN 62 -#define RE_OP_SET_SYM_DIFF_IGN_REV 63 -#define RE_OP_SET_SYM_DIFF_REV 64 -#define RE_OP_SET_UNION 65 -#define RE_OP_SET_UNION_IGN 66 -#define RE_OP_SET_UNION_IGN_REV 67 -#define RE_OP_SET_UNION_REV 68 -#define RE_OP_SKIP 69 -#define RE_OP_START_OF_LINE 70 -#define RE_OP_START_OF_LINE_U 71 -#define RE_OP_START_OF_STRING 72 -#define RE_OP_START_OF_WORD 73 -#define RE_OP_STRING 74 -#define RE_OP_STRING_FLD 75 -#define RE_OP_STRING_FLD_REV 76 -#define RE_OP_STRING_IGN 77 -#define RE_OP_STRING_IGN_REV 78 -#define RE_OP_STRING_REV 79 -#define RE_OP_STRING_SET 80 -#define RE_OP_STRING_SET_FLD 81 -#define RE_OP_STRING_SET_FLD_REV 82 -#define RE_OP_STRING_SET_IGN 83 -#define RE_OP_STRING_SET_IGN_REV 84 -#define RE_OP_STRING_SET_REV 85 -#define RE_OP_BODY_END 86 -#define RE_OP_BODY_START 87 -#define RE_OP_END_ATOMIC 88 -#define RE_OP_END_CONDITIONAL 89 -#define RE_OP_END_FUZZY 90 -#define RE_OP_END_GREEDY_REPEAT 91 -#define RE_OP_END_GROUP 92 -#define RE_OP_END_LAZY_REPEAT 93 -#define RE_OP_END_LOOKAROUND 94 -#define RE_OP_GREEDY_REPEAT_ONE 95 -#define RE_OP_GROUP_RETURN 96 -#define RE_OP_LAZY_REPEAT_ONE 97 -#define RE_OP_MATCH_BODY 98 -#define RE_OP_MATCH_TAIL 99 -#define RE_OP_START_GROUP 100 - -char* re_op_text[] = { - "RE_OP_FAILURE", - "RE_OP_SUCCESS", - "RE_OP_ANY", - "RE_OP_ANY_ALL", - "RE_OP_ANY_ALL_REV", - "RE_OP_ANY_REV", - "RE_OP_ANY_U", - "RE_OP_ANY_U_REV", - "RE_OP_ATOMIC", - "RE_OP_BOUNDARY", - "RE_OP_BRANCH", - "RE_OP_CALL_REF", - "RE_OP_CHARACTER", - "RE_OP_CHARACTER_IGN", - "RE_OP_CHARACTER_IGN_REV", - "RE_OP_CHARACTER_REV", - "RE_OP_CONDITIONAL", - "RE_OP_DEFAULT_BOUNDARY", - "RE_OP_DEFAULT_END_OF_WORD", - "RE_OP_DEFAULT_START_OF_WORD", - "RE_OP_END", - "RE_OP_END_OF_LINE", - "RE_OP_END_OF_LINE_U", - "RE_OP_END_OF_STRING", - "RE_OP_END_OF_STRING_LINE", - "RE_OP_END_OF_STRING_LINE_U", - "RE_OP_END_OF_WORD", - "RE_OP_FUZZY", - "RE_OP_GRAPHEME_BOUNDARY", - "RE_OP_GREEDY_REPEAT", - "RE_OP_GROUP", - "RE_OP_GROUP_CALL", - "RE_OP_GROUP_EXISTS", - "RE_OP_KEEP", - "RE_OP_LAZY_REPEAT", - "RE_OP_LOOKAROUND", - "RE_OP_NEXT", - "RE_OP_PROPERTY", - "RE_OP_PROPERTY_IGN", - "RE_OP_PROPERTY_IGN_REV", - "RE_OP_PROPERTY_REV", - "RE_OP_PRUNE", - "RE_OP_RANGE", - "RE_OP_RANGE_IGN", - "RE_OP_RANGE_IGN_REV", - "RE_OP_RANGE_REV", - "RE_OP_REF_GROUP", - "RE_OP_REF_GROUP_FLD", - "RE_OP_REF_GROUP_FLD_REV", - "RE_OP_REF_GROUP_IGN", - "RE_OP_REF_GROUP_IGN_REV", - "RE_OP_REF_GROUP_REV", - "RE_OP_SEARCH_ANCHOR", - "RE_OP_SET_DIFF", - "RE_OP_SET_DIFF_IGN", - "RE_OP_SET_DIFF_IGN_REV", - "RE_OP_SET_DIFF_REV", - "RE_OP_SET_INTER", - "RE_OP_SET_INTER_IGN", - "RE_OP_SET_INTER_IGN_REV", - "RE_OP_SET_INTER_REV", - "RE_OP_SET_SYM_DIFF", - "RE_OP_SET_SYM_DIFF_IGN", - "RE_OP_SET_SYM_DIFF_IGN_REV", - "RE_OP_SET_SYM_DIFF_REV", - "RE_OP_SET_UNION", - "RE_OP_SET_UNION_IGN", - "RE_OP_SET_UNION_IGN_REV", - "RE_OP_SET_UNION_REV", - "RE_OP_SKIP", - "RE_OP_START_OF_LINE", - "RE_OP_START_OF_LINE_U", - "RE_OP_START_OF_STRING", - "RE_OP_START_OF_WORD", - "RE_OP_STRING", - "RE_OP_STRING_FLD", - "RE_OP_STRING_FLD_REV", - "RE_OP_STRING_IGN", - "RE_OP_STRING_IGN_REV", - "RE_OP_STRING_REV", - "RE_OP_STRING_SET", - "RE_OP_STRING_SET_FLD", - "RE_OP_STRING_SET_FLD_REV", - "RE_OP_STRING_SET_IGN", - "RE_OP_STRING_SET_IGN_REV", - "RE_OP_STRING_SET_REV", - "RE_OP_BODY_END", - "RE_OP_BODY_START", - "RE_OP_END_ATOMIC", - "RE_OP_END_CONDITIONAL", - "RE_OP_END_FUZZY", - "RE_OP_END_GREEDY_REPEAT", - "RE_OP_END_GROUP", - "RE_OP_END_LAZY_REPEAT", - "RE_OP_END_LOOKAROUND", - "RE_OP_GREEDY_REPEAT_ONE", - "RE_OP_GROUP_RETURN", - "RE_OP_LAZY_REPEAT_ONE", - "RE_OP_MATCH_BODY", - "RE_OP_MATCH_TAIL", - "RE_OP_START_GROUP", -}; - -#define RE_FLAG_ASCII 0x80 -#define RE_FLAG_BESTMATCH 0x1000 -#define RE_FLAG_DEBUG 0x200 -#define RE_FLAG_DOTALL 0x10 -#define RE_FLAG_ENHANCEMATCH 0x8000 -#define RE_FLAG_FULLCASE 0x4000 -#define RE_FLAG_IGNORECASE 0x2 -#define RE_FLAG_LOCALE 0x4 -#define RE_FLAG_MULTILINE 0x8 -#define RE_FLAG_POSIX 0x10000 -#define RE_FLAG_REVERSE 0x400 -#define RE_FLAG_TEMPLATE 0x1 -#define RE_FLAG_UNICODE 0x20 -#define RE_FLAG_VERBOSE 0x40 -#define RE_FLAG_VERSION0 0x2000 -#define RE_FLAG_VERSION1 0x100 -#define RE_FLAG_WORD 0x800 diff --git a/src/regex/_regex_core.py b/src/regex/_regex_core.py deleted file mode 100644 index 4b65078eda..0000000000 --- a/src/regex/_regex_core.py +++ /dev/null @@ -1,4413 +0,0 @@ -# -# Secret Labs' Regular Expression Engine core module -# -# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. -# -# This version of the SRE library can be redistributed under CNRI's -# Python 1.6 license. For any other use, please contact Secret Labs -# AB (info@pythonware.com). -# -# Portions of this engine have been developed in cooperation with -# CNRI. Hewlett-Packard provided funding for 1.6 integration and -# other compatibility work. -# -# 2010-01-16 mrab Python front-end re-written and extended - -import string -import unicodedata -from collections import defaultdict - -from calibre.constants import plugins -_regex = plugins['_regex'][0] -if _regex is None: - raise RuntimeError('Failed to load regex module with error: ' + plugins['_regex'][1]) - -__all__ = ["A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH", - "F", "FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "P", - "POSIX", "R", "REVERSE", "S", "DOTALL", "T", "TEMPLATE", "U", "UNICODE", - "V0", "VERSION0", "V1", "VERSION1", "W", "WORD", "X", "VERBOSE", "error", - "Scanner"] - -# The regex exception. -class error(Exception): - def __init__(self, message, pattern=None, pos=None): - newline = u'\n' if isinstance(pattern, unicode) else '\n' - self.msg = message - self.pattern = pattern - self.pos = pos - if pattern is not None and pos is not None: - self.lineno = pattern.count(newline, 0, pos) + 1 - self.colno = pos - pattern.rfind(newline, 0, pos) - - message = "%s at position %d" % (message, pos) - - if newline in pattern: - message += " (line %d, column %d)" % (self.lineno, self.colno) - - Exception.__init__(self, message) - -# The exception for when a positional flag has been turned on in the old -# behaviour. -class _UnscopedFlagSet(Exception): - pass - -# The exception for when parsing fails and we want to try something else. -class ParseError(Exception): - pass - -# The exception for when there isn't a valid first set. -class _FirstSetError(Exception): - pass - -# Flags. -A = ASCII = 0x80 # Assume ASCII locale. -B = BESTMATCH = 0x1000 # Best fuzzy match. -D = DEBUG = 0x200 # Print parsed pattern. -E = ENHANCEMATCH = 0x8000 # Attempt to improve the fit after finding the first - # fuzzy match. -F = FULLCASE = 0x4000 # Unicode full case-folding. -I = IGNORECASE = 0x2 # Ignore case. -L = LOCALE = 0x4 # Assume current 8-bit locale. -M = MULTILINE = 0x8 # Make anchors look for newline. -P = POSIX = 0x10000 # POSIX-style matching (leftmost longest). -R = REVERSE = 0x400 # Search backwards. -S = DOTALL = 0x10 # Make dot match newline. -U = UNICODE = 0x20 # Assume Unicode locale. -V0 = VERSION0 = 0x2000 # Old legacy behaviour. -V1 = VERSION1 = 0x100 # New enhanced behaviour. -W = WORD = 0x800 # Default Unicode word breaks. -X = VERBOSE = 0x40 # Ignore whitespace and comments. -T = TEMPLATE = 0x1 # Template (present because re module has it). - -DEFAULT_VERSION = VERSION1 - -_ALL_VERSIONS = VERSION0 | VERSION1 -_ALL_ENCODINGS = ASCII | LOCALE | UNICODE - -# The default flags for the various versions. -DEFAULT_FLAGS = {VERSION0: 0, VERSION1: FULLCASE} - -# The mask for the flags. -GLOBAL_FLAGS = (_ALL_ENCODINGS | _ALL_VERSIONS | BESTMATCH | DEBUG | - ENHANCEMATCH | POSIX | REVERSE) -SCOPED_FLAGS = FULLCASE | IGNORECASE | MULTILINE | DOTALL | WORD | VERBOSE - -ALPHA = frozenset(string.ascii_letters) -DIGITS = frozenset(string.digits) -ALNUM = ALPHA | DIGITS -OCT_DIGITS = frozenset(string.octdigits) -HEX_DIGITS = frozenset(string.hexdigits) -SPECIAL_CHARS = frozenset("()|?*+{^$.[\\#") | frozenset([""]) -NAMED_CHAR_PART = ALNUM | frozenset(" -") -PROPERTY_NAME_PART = ALNUM | frozenset(" &_-.") -SET_OPS = ("||", "~~", "&&", "--") - -# The width of the code words inside the regex engine. -BYTES_PER_CODE = _regex.get_code_size() -BITS_PER_CODE = BYTES_PER_CODE * 8 - -# The repeat count which represents infinity. -UNLIMITED = (1 << BITS_PER_CODE) - 1 - -# The regular expression flags. -REGEX_FLAGS = {"a": ASCII, "b": BESTMATCH, "e": ENHANCEMATCH, "f": FULLCASE, - "i": IGNORECASE, "L": LOCALE, "m": MULTILINE, "p": POSIX, "r": REVERSE, - "s": DOTALL, "u": UNICODE, "V0": VERSION0, "V1": VERSION1, "w": WORD, "x": - VERBOSE} - -# The case flags. -CASE_FLAGS = FULLCASE | IGNORECASE -NOCASE = 0 -FULLIGNORECASE = FULLCASE | IGNORECASE - -FULL_CASE_FOLDING = UNICODE | FULLIGNORECASE - -CASE_FLAGS_COMBINATIONS = {0: 0, FULLCASE: 0, IGNORECASE: IGNORECASE, - FULLIGNORECASE: FULLIGNORECASE} - -# The number of digits in hexadecimal escapes. -HEX_ESCAPES = {"x": 2, "u": 4, "U": 8} - -# A singleton which indicates a comment within a pattern. -COMMENT = object() -FLAGS = object() - -# The names of the opcodes. -OPCODES = """ -FAILURE -SUCCESS -ANY -ANY_ALL -ANY_ALL_REV -ANY_REV -ANY_U -ANY_U_REV -ATOMIC -BOUNDARY -BRANCH -CALL_REF -CHARACTER -CHARACTER_IGN -CHARACTER_IGN_REV -CHARACTER_REV -CONDITIONAL -DEFAULT_BOUNDARY -DEFAULT_END_OF_WORD -DEFAULT_START_OF_WORD -END -END_OF_LINE -END_OF_LINE_U -END_OF_STRING -END_OF_STRING_LINE -END_OF_STRING_LINE_U -END_OF_WORD -FUZZY -GRAPHEME_BOUNDARY -GREEDY_REPEAT -GROUP -GROUP_CALL -GROUP_EXISTS -KEEP -LAZY_REPEAT -LOOKAROUND -NEXT -PROPERTY -PROPERTY_IGN -PROPERTY_IGN_REV -PROPERTY_REV -PRUNE -RANGE -RANGE_IGN -RANGE_IGN_REV -RANGE_REV -REF_GROUP -REF_GROUP_FLD -REF_GROUP_FLD_REV -REF_GROUP_IGN -REF_GROUP_IGN_REV -REF_GROUP_REV -SEARCH_ANCHOR -SET_DIFF -SET_DIFF_IGN -SET_DIFF_IGN_REV -SET_DIFF_REV -SET_INTER -SET_INTER_IGN -SET_INTER_IGN_REV -SET_INTER_REV -SET_SYM_DIFF -SET_SYM_DIFF_IGN -SET_SYM_DIFF_IGN_REV -SET_SYM_DIFF_REV -SET_UNION -SET_UNION_IGN -SET_UNION_IGN_REV -SET_UNION_REV -SKIP -START_OF_LINE -START_OF_LINE_U -START_OF_STRING -START_OF_WORD -STRING -STRING_FLD -STRING_FLD_REV -STRING_IGN -STRING_IGN_REV -STRING_REV -STRING_SET -STRING_SET_FLD -STRING_SET_FLD_REV -STRING_SET_IGN -STRING_SET_IGN_REV -STRING_SET_REV -""" - -# Define the opcodes in a namespace. -class Namespace(object): - pass - -OP = Namespace() -for i, op in enumerate(OPCODES.split()): - setattr(OP, op, i) - -def _shrink_cache(cache_dict, args_dict, locale_sensitive, max_length, divisor=5): - """Make room in the given cache. - - Args: - cache_dict: The cache dictionary to modify. - args_dict: The dictionary of named list args used by patterns. - max_length: Maximum # of entries in cache_dict before it is shrunk. - divisor: Cache will shrink to max_length - 1/divisor*max_length items. - """ - # Toss out a fraction of the entries at random to make room for new ones. - # A random algorithm was chosen as opposed to simply cache_dict.popitem() - # as popitem could penalize the same regular expression repeatedly based - # on its internal hash value. Being random should spread the cache miss - # love around. - cache_keys = tuple(cache_dict.keys()) - overage = len(cache_keys) - max_length - if overage < 0: - # Cache is already within limits. Normally this should not happen - # but it could due to multithreading. - return - - number_to_toss = max_length // divisor + overage - - # The import is done here to avoid a circular dependency. - import random - if not hasattr(random, 'sample'): - # Do nothing while resolving the circular dependency: - # re->random->warnings->tokenize->string->re - return - - for doomed_key in random.sample(cache_keys, number_to_toss): - try: - del cache_dict[doomed_key] - except KeyError: - # Ignore problems if the cache changed from another thread. - pass - - # Rebuild the arguments and locale-sensitivity dictionaries. - args_dict.clear() - sensitivity_dict = {} - for pattern, pattern_type, flags, args, default_version, locale in tuple(cache_dict): - args_dict[pattern, pattern_type, flags, default_version, locale] = args - try: - sensitivity_dict[pattern_type, pattern] = locale_sensitive[pattern_type, pattern] - except KeyError: - pass - - locale_sensitive.clear() - locale_sensitive.update(sensitivity_dict) - -def _fold_case(info, string): - "Folds the case of a string." - flags = info.flags - if (flags & _ALL_ENCODINGS) == 0: - flags |= info.guess_encoding - - return _regex.fold_case(flags, string) - -def is_cased(info, char): - "Checks whether a character is cased." - return len(_regex.get_all_cases(info.flags, char)) > 1 - -def _compile_firstset(info, fs): - "Compiles the firstset for the pattern." - reverse = bool(info.flags & REVERSE) - fs = _check_firstset(info, reverse, fs) - if not fs: - return [] - - # Compile the firstset. - return fs.compile(reverse) - -def _check_firstset(info, reverse, fs): - "Checks the firstset for the pattern." - if not fs or None in fs: - return None - - # If we ignore the case, for simplicity we won't build a firstset. - members = set() - case_flags = NOCASE - for i in fs: - if isinstance(i, Character) and not i.positive: - return None - -# if i.case_flags: -# if isinstance(i, Character): -# if is_cased(info, i.value): -# return [] -# elif isinstance(i, SetBase): -# return [] - case_flags |= i.case_flags - members.add(i.with_flags(case_flags=NOCASE)) - - if case_flags == (FULLCASE | IGNORECASE): - return None - - # Build the firstset. - fs = SetUnion(info, list(members), case_flags=case_flags & ~FULLCASE, - zerowidth=True) - fs = fs.optimise(info, reverse, in_set=True) - - return fs - -def _flatten_code(code): - "Flattens the code from a list of tuples." - flat_code = [] - for c in code: - flat_code.extend(c) - - return flat_code - -def make_case_flags(info): - "Makes the case flags." - flags = info.flags & CASE_FLAGS - - # Turn off FULLCASE if ASCII is turned on. - if info.flags & ASCII: - flags &= ~FULLCASE - - return flags - -def make_character(info, value, in_set=False): - "Makes a character literal." - if in_set: - # A character set is built case-sensitively. - return Character(value) - - return Character(value, case_flags=make_case_flags(info)) - -def make_ref_group(info, name, position): - "Makes a group reference." - return RefGroup(info, name, position, case_flags=make_case_flags(info)) - -def make_string_set(info, name): - "Makes a string set." - return StringSet(info, name, case_flags=make_case_flags(info)) - -def make_property(info, prop, in_set): - "Makes a property." - if in_set: - return prop - - return prop.with_flags(case_flags=make_case_flags(info)) - -def _parse_pattern(source, info): - "Parses a pattern, eg. 'a|b|c'." - branches = [parse_sequence(source, info)] - while source.match("|"): - branches.append(parse_sequence(source, info)) - - if len(branches) == 1: - return branches[0] - return Branch(branches) - -def parse_sequence(source, info): - "Parses a sequence, eg. 'abc'." - sequence = [] - applied = False - while True: - # Get literal characters followed by an element. - characters, case_flags, element = parse_literal_and_element(source, - info) - if not element: - # No element, just a literal. We've also reached the end of the - # sequence. - append_literal(characters, case_flags, sequence) - break - - if element is COMMENT or element is FLAGS: - append_literal(characters, case_flags, sequence) - elif type(element) is tuple: - # It looks like we've found a quantifier. - ch, saved_pos = element - - counts = parse_quantifier(source, info, ch) - if counts: - # It _is_ a quantifier. - apply_quantifier(source, info, counts, characters, case_flags, - ch, saved_pos, applied, sequence) - applied = True - else: - # It's not a quantifier. Maybe it's a fuzzy constraint. - constraints = parse_fuzzy(source, ch) - if constraints: - # It _is_ a fuzzy constraint. - apply_constraint(source, info, constraints, characters, - case_flags, saved_pos, applied, sequence) - applied = True - else: - # The element was just a literal. - characters.append(ord(ch)) - append_literal(characters, case_flags, sequence) - applied = False - else: - # We have a literal followed by something else. - append_literal(characters, case_flags, sequence) - sequence.append(element) - applied = False - - return make_sequence(sequence) - -def apply_quantifier(source, info, counts, characters, case_flags, ch, - saved_pos, applied, sequence): - if characters: - # The quantifier applies to the last character. - append_literal(characters[ : -1], case_flags, sequence) - element = Character(characters[-1], case_flags=case_flags) - else: - # The quantifier applies to the last item in the sequence. - if applied: - raise error("multiple repeat", source.string, saved_pos) - - if not sequence: - raise error("nothing to repeat", source.string, saved_pos) - - element = sequence.pop() - - min_count, max_count = counts - saved_pos = source.pos - ch = source.get() - if ch == "?": - # The "?" suffix that means it's a lazy repeat. - repeated = LazyRepeat - elif ch == "+": - # The "+" suffix that means it's a possessive repeat. - repeated = PossessiveRepeat - else: - # No suffix means that it's a greedy repeat. - source.pos = saved_pos - repeated = GreedyRepeat - - # Ignore the quantifier if it applies to a zero-width item or the number of - # repeats is fixed at 1. - if not element.is_empty() and (min_count != 1 or max_count != 1): - element = repeated(element, min_count, max_count) - - sequence.append(element) - -def apply_constraint(source, info, constraints, characters, case_flags, - saved_pos, applied, sequence): - if characters: - # The constraint applies to the last character. - append_literal(characters[ : -1], case_flags, sequence) - element = Character(characters[-1], case_flags=case_flags) - sequence.append(Fuzzy(element, constraints)) - else: - # The constraint applies to the last item in the sequence. - if applied or not sequence: - raise error("nothing for fuzzy constraint", source.string, - saved_pos) - - element = sequence.pop() - - # If a group is marked as fuzzy then put all of the fuzzy part in the - # group. - if isinstance(element, Group): - element.subpattern = Fuzzy(element.subpattern, constraints) - sequence.append(element) - else: - sequence.append(Fuzzy(element, constraints)) - -def append_literal(characters, case_flags, sequence): - if characters: - sequence.append(Literal(characters, case_flags=case_flags)) - -def PossessiveRepeat(element, min_count, max_count): - "Builds a possessive repeat." - return Atomic(GreedyRepeat(element, min_count, max_count)) - -_QUANTIFIERS = {"?": (0, 1), "*": (0, None), "+": (1, None)} - -def parse_quantifier(source, info, ch): - "Parses a quantifier." - q = _QUANTIFIERS.get(ch) - if q: - # It's a quantifier. - return q - - if ch == "{": - # Looks like a limited repeated element, eg. 'a{2,3}'. - counts = parse_limited_quantifier(source) - if counts: - return counts - - return None - -def is_above_limit(count): - "Checks whether a count is above the maximum." - return count is not None and count >= UNLIMITED - -def parse_limited_quantifier(source): - "Parses a limited quantifier." - saved_pos = source.pos - min_count = parse_count(source) - if source.match(","): - max_count = parse_count(source) - - # No minimum means 0 and no maximum means unlimited. - min_count = int(min_count or 0) - max_count = int(max_count) if max_count else None - else: - if not min_count: - source.pos = saved_pos - return None - - min_count = max_count = int(min_count) - - if not source.match ("}"): - source.pos = saved_pos - return None - - if is_above_limit(min_count) or is_above_limit(max_count): - raise error("repeat count too big", source.string, saved_pos) - - if max_count is not None and min_count > max_count: - raise error("min repeat greater than max repeat", source.string, - saved_pos) - - return min_count, max_count - -def parse_fuzzy(source, ch): - "Parses a fuzzy setting, if present." - saved_pos = source.pos - - if ch != "{": - return None - - constraints = {} - try: - parse_fuzzy_item(source, constraints) - while source.match(","): - parse_fuzzy_item(source, constraints) - except ParseError: - source.pos = saved_pos - return None - - if not source.match("}"): - raise error("expected }", source.string, source.pos) - - return constraints - -def parse_fuzzy_item(source, constraints): - "Parses a fuzzy setting item." - saved_pos = source.pos - try: - parse_cost_constraint(source, constraints) - except ParseError: - source.pos = saved_pos - - parse_cost_equation(source, constraints) - -def parse_cost_constraint(source, constraints): - "Parses a cost constraint." - saved_pos = source.pos - ch = source.get() - if ch in ALPHA: - # Syntax: constraint [("<=" | "<") cost] - constraint = parse_constraint(source, constraints, ch) - - max_inc = parse_fuzzy_compare(source) - - if max_inc is None: - # No maximum cost. - constraints[constraint] = 0, None - else: - # There's a maximum cost. - cost_pos = source.pos - max_cost = parse_cost_limit(source) - - # Inclusive or exclusive limit? - if not max_inc: - max_cost -= 1 - - if max_cost < 0: - raise error("bad fuzzy cost limit", source.string, cost_pos) - - constraints[constraint] = 0, max_cost - elif ch in DIGITS: - # Syntax: cost ("<=" | "<") constraint ("<=" | "<") cost - source.pos = saved_pos - - # Minimum cost. - cost_pos = source.pos - min_cost = parse_cost_limit(source) - - min_inc = parse_fuzzy_compare(source) - if min_inc is None: - raise ParseError() - - constraint = parse_constraint(source, constraints, source.get()) - - max_inc = parse_fuzzy_compare(source) - if max_inc is None: - raise ParseError() - - # Maximum cost. - cost_pos = source.pos - max_cost = parse_cost_limit(source) - - # Inclusive or exclusive limits? - if not min_inc: - min_cost += 1 - if not max_inc: - max_cost -= 1 - - if not 0 <= min_cost <= max_cost: - raise error("bad fuzzy cost limit", source.string, cost_pos) - - constraints[constraint] = min_cost, max_cost - else: - raise ParseError() - -def parse_cost_limit(source): - "Parses a cost limit." - cost_pos = source.pos - digits = parse_count(source) - - try: - return int(digits) - except ValueError: - pass - - raise error("bad fuzzy cost limit", source.string, cost_pos) - -def parse_constraint(source, constraints, ch): - "Parses a constraint." - if ch not in "deis": - raise ParseError() - - if ch in constraints: - raise ParseError() - - return ch - -def parse_fuzzy_compare(source): - "Parses a cost comparator." - if source.match("<="): - return True - elif source.match("<"): - return False - else: - return None - -def parse_cost_equation(source, constraints): - "Parses a cost equation." - if "cost" in constraints: - raise error("more than one cost equation", source.string, source.pos) - - cost = {} - - parse_cost_term(source, cost) - while source.match("+"): - parse_cost_term(source, cost) - - max_inc = parse_fuzzy_compare(source) - if max_inc is None: - raise ParseError() - - max_cost = int(parse_count(source)) - - if not max_inc: - max_cost -= 1 - - if max_cost < 0: - raise error("bad fuzzy cost limit", source.string, source.pos) - - cost["max"] = max_cost - - constraints["cost"] = cost - -def parse_cost_term(source, cost): - "Parses a cost equation term." - coeff = parse_count(source) - ch = source.get() - if ch not in "dis": - raise ParseError() - - if ch in cost: - raise error("repeated fuzzy cost", source.string, source.pos) - - cost[ch] = int(coeff or 1) - -def parse_count(source): - "Parses a quantifier's count, which can be empty." - return source.get_while(DIGITS) - -def parse_literal_and_element(source, info): - """Parses a literal followed by an element. The element is FLAGS if it's an - inline flag or None if it has reached the end of a sequence. - """ - characters = [] - case_flags = make_case_flags(info) - while True: - saved_pos = source.pos - ch = source.get() - if ch in SPECIAL_CHARS: - if ch in ")|": - # The end of a sequence. At the end of the pattern ch is "". - source.pos = saved_pos - return characters, case_flags, None - elif ch == "\\": - # An escape sequence outside a set. - element = parse_escape(source, info, False) - return characters, case_flags, element - elif ch == "(": - # A parenthesised subpattern or a flag. - element = parse_paren(source, info) - if element and element is not COMMENT: - return characters, case_flags, element - elif ch == ".": - # Any character. - if info.flags & DOTALL: - element = AnyAll() - elif info.flags & WORD: - element = AnyU() - else: - element = Any() - - return characters, case_flags, element - elif ch == "[": - # A character set. - element = parse_set(source, info) - return characters, case_flags, element - elif ch == "^": - # The start of a line or the string. - if info.flags & MULTILINE: - if info.flags & WORD: - element = StartOfLineU() - else: - element = StartOfLine() - else: - element = StartOfString() - - return characters, case_flags, element - elif ch == "$": - # The end of a line or the string. - if info.flags & MULTILINE: - if info.flags & WORD: - element = EndOfLineU() - else: - element = EndOfLine() - else: - if info.flags & WORD: - element = EndOfStringLineU() - else: - element = EndOfStringLine() - - return characters, case_flags, element - elif ch in "?*+{": - # Looks like a quantifier. - return characters, case_flags, (ch, saved_pos) - else: - # A literal. - characters.append(ord(ch)) - else: - # A literal. - characters.append(ord(ch)) - -def parse_paren(source, info): - """Parses a parenthesised subpattern or a flag. Returns FLAGS if it's an - inline flag. - """ - saved_pos = source.pos - ch = source.get() - if ch == "?": - # (?... - saved_pos_2 = source.pos - ch = source.get() - if ch == "<": - # (?<... - saved_pos_3 = source.pos - ch = source.get() - if ch in ("=", "!"): - # (?<=... or (?") - saved_flags = info.flags - try: - subpattern = _parse_pattern(source, info) - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - info.close_group() - return Group(info, group, subpattern) - if ch in ("=", "!"): - # (?=... or (?!...: lookahead. - return parse_lookaround(source, info, False, ch == "=") - if ch == "P": - # (?P...: a Python extension. - return parse_extension(source, info) - if ch == "#": - # (?#...: a comment. - return parse_comment(source) - if ch == "(": - # (?(...: a conditional subpattern. - return parse_conditional(source, info) - if ch == ">": - # (?>...: an atomic subpattern. - return parse_atomic(source, info) - if ch == "|": - # (?|...: a common/reset groups branch. - return parse_common(source, info) - if ch == "R" or "0" <= ch <= "9": - # (?R...: probably a call to a group. - return parse_call_group(source, info, ch, saved_pos_2) - if ch == "&": - # (?&...: a call to a named group. - return parse_call_named_group(source, info, saved_pos_2) - - # (?...: probably a flags subpattern. - source.pos = saved_pos_2 - return parse_flags_subpattern(source, info) - - if ch == "*": - # (*... - saved_pos_2 = source.pos - word = source.get_while(set(")>"), include=False) - if word[ : 1].isalpha(): - verb = VERBS.get(word) - if not verb: - raise error("unknown verb", source.string, saved_pos_2) - - source.expect(")") - - return verb - - # (...: an unnamed capture group. - source.pos = saved_pos - group = info.open_group() - saved_flags = info.flags - try: - subpattern = _parse_pattern(source, info) - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - info.close_group() - - return Group(info, group, subpattern) - -def parse_extension(source, info): - "Parses a Python extension." - saved_pos = source.pos - ch = source.get() - if ch == "<": - # (?P<...: a named capture group. - name = parse_name(source) - group = info.open_group(name) - source.expect(">") - saved_flags = info.flags - try: - subpattern = _parse_pattern(source, info) - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - info.close_group() - - return Group(info, group, subpattern) - if ch == "=": - # (?P=...: a named group reference. - name = parse_name(source, allow_numeric=True) - source.expect(")") - if info.is_open_group(name): - raise error("cannot refer to an open group", source.string, - saved_pos) - - return make_ref_group(info, name, saved_pos) - if ch == ">" or ch == "&": - # (?P>...: a call to a group. - return parse_call_named_group(source, info, saved_pos) - - source.pos = saved_pos - raise error("unknown extension", source.string, saved_pos) - -def parse_comment(source): - "Parses a comment." - source.skip_while(set(")"), include=False) - source.expect(")") - - return COMMENT - -def parse_lookaround(source, info, behind, positive): - "Parses a lookaround." - saved_flags = info.flags - try: - subpattern = _parse_pattern(source, info) - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - return LookAround(behind, positive, subpattern) - -def parse_conditional(source, info): - "Parses a conditional subpattern." - saved_flags = info.flags - saved_pos = source.pos - ch = source.get() - if ch == "?": - # (?(?... - ch = source.get() - if ch in ("=", "!"): - # (?(?=... or (?(?!...: lookahead conditional. - return parse_lookaround_conditional(source, info, False, ch == "=") - if ch == "<": - # (?(?<... - ch = source.get() - if ch in ("=", "!"): - # (?(?<=... or (?(?"), include=False) - - if not name: - raise error("missing group name", source.string, source.pos) - - if name.isdigit(): - min_group = 0 if allow_group_0 else 1 - if not allow_numeric or int(name) < min_group: - raise error("bad character in group name", source.string, - source.pos) - else: - if not is_identifier(name): - raise error("bad character in group name", source.string, - source.pos) - - return name - -def is_identifier(name): - if not name: - return False - - if name[0] not in ALPHA and name[0] != "_": - return False - - name = name.replace("_", "") - - return not name or all(c in ALNUM for c in name) - -def is_octal(string): - "Checks whether a string is octal." - return all(ch in OCT_DIGITS for ch in string) - -def is_decimal(string): - "Checks whether a string is decimal." - return all(ch in DIGITS for ch in string) - -def is_hexadecimal(string): - "Checks whether a string is hexadecimal." - return all(ch in HEX_DIGITS for ch in string) - -def parse_escape(source, info, in_set): - "Parses an escape sequence." - saved_ignore = source.ignore_space - source.ignore_space = False - ch = source.get() - source.ignore_space = saved_ignore - if not ch: - # A backslash at the end of the pattern. - raise error("bad escape (end of pattern)", source.string, source.pos) - if ch in HEX_ESCAPES: - # A hexadecimal escape sequence. - return parse_hex_escape(source, info, ch, HEX_ESCAPES[ch], in_set, ch) - elif ch == "g" and not in_set: - # A group reference. - saved_pos = source.pos - try: - return parse_group_ref(source, info) - except error: - # Invalid as a group reference, so assume it's a literal. - source.pos = saved_pos - - return make_character(info, ord(ch), in_set) - elif ch == "G" and not in_set: - # A search anchor. - return SearchAnchor() - elif ch == "L" and not in_set: - # A string set. - return parse_string_set(source, info) - elif ch == "N": - # A named codepoint. - return parse_named_char(source, info, in_set) - elif ch in "pP": - # A Unicode property, positive or negative. - return parse_property(source, info, ch == "p", in_set) - elif ch == "X" and not in_set: - # A grapheme cluster. - return Grapheme() - elif ch in ALPHA: - # An alphabetic escape sequence. - # Positional escapes aren't allowed inside a character set. - if not in_set: - if info.flags & WORD: - value = WORD_POSITION_ESCAPES.get(ch) - else: - value = POSITION_ESCAPES.get(ch) - - if value: - return value - - value = CHARSET_ESCAPES.get(ch) - if value: - return value - - value = CHARACTER_ESCAPES.get(ch) - if value: - return Character(ord(value)) - - return make_character(info, ord(ch), in_set) - elif ch in DIGITS: - # A numeric escape sequence. - return parse_numeric_escape(source, info, ch, in_set) - else: - # A literal. - return make_character(info, ord(ch), in_set) - -def parse_numeric_escape(source, info, ch, in_set): - "Parses a numeric escape sequence." - if in_set or ch == "0": - # Octal escape sequence, max 3 digits. - return parse_octal_escape(source, info, [ch], in_set) - - # At least 1 digit, so either octal escape or group. - digits = ch - saved_pos = source.pos - ch = source.get() - if ch in DIGITS: - # At least 2 digits, so either octal escape or group. - digits += ch - saved_pos = source.pos - ch = source.get() - if is_octal(digits) and ch in OCT_DIGITS: - # 3 octal digits, so octal escape sequence. - encoding = info.flags & _ALL_ENCODINGS - if encoding == ASCII or encoding == LOCALE: - octal_mask = 0xFF - else: - octal_mask = 0x1FF - - value = int(digits + ch, 8) & octal_mask - return make_character(info, value) - - # Group reference. - source.pos = saved_pos - if info.is_open_group(digits): - raise error("cannot refer to an open group", source.string, source.pos) - - return make_ref_group(info, digits, source.pos) - -def parse_octal_escape(source, info, digits, in_set): - "Parses an octal escape sequence." - saved_pos = source.pos - ch = source.get() - while len(digits) < 3 and ch in OCT_DIGITS: - digits.append(ch) - saved_pos = source.pos - ch = source.get() - - source.pos = saved_pos - try: - value = int("".join(digits), 8) - return make_character(info, value, in_set) - except ValueError: - if digits[0] in OCT_DIGITS: - raise error("incomplete escape \\%s" % ''.join(digits), - source.string, source.pos) - else: - raise error("bad escape \\%s" % digits[0], source.string, - source.pos) - -def parse_hex_escape(source, info, esc, expected_len, in_set, type): - "Parses a hex escape sequence." - saved_pos = source.pos - digits = [] - for i in range(expected_len): - ch = source.get() - if ch not in HEX_DIGITS: - raise error("incomplete escape \\%s%s" % (type, ''.join(digits)), - source.string, saved_pos) - digits.append(ch) - - try: - value = int("".join(digits), 16) - except ValueError: - pass - else: - if value < 0x110000: - return make_character(info, value, in_set) - - # Bad hex escape. - raise error("bad hex escape \\%s%s" % (esc, ''.join(digits)), - source.string, saved_pos) - -def parse_group_ref(source, info): - "Parses a group reference." - source.expect("<") - saved_pos = source.pos - name = parse_name(source, True) - source.expect(">") - if info.is_open_group(name): - raise error("cannot refer to an open group", source.string, source.pos) - - return make_ref_group(info, name, saved_pos) - -def parse_string_set(source, info): - "Parses a string set reference." - source.expect("<") - name = parse_name(source, True) - source.expect(">") - if name is None or name not in info.kwargs: - raise error("undefined named list", source.string, source.pos) - - return make_string_set(info, name) - -def parse_named_char(source, info, in_set): - "Parses a named character." - saved_pos = source.pos - if source.match("{"): - name = source.get_while(NAMED_CHAR_PART) - if source.match("}"): - try: - value = unicodedata.lookup(name) - return make_character(info, ord(value), in_set) - except KeyError: - raise error("undefined character name", source.string, - source.pos) - - source.pos = saved_pos - return make_character(info, ord("N"), in_set) - -def parse_property(source, info, positive, in_set): - "Parses a Unicode property." - saved_pos = source.pos - ch = source.get() - if ch == "{": - negate = source.match("^") - prop_name, name = parse_property_name(source) - if source.match("}"): - # It's correctly delimited. - prop = lookup_property(prop_name, name, positive != negate, source) - return make_property(info, prop, in_set) - elif ch and ch in "CLMNPSZ": - # An abbreviated property, eg \pL. - prop = lookup_property(None, ch, positive, source) - return make_property(info, prop, in_set) - - # Not a property, so treat as a literal "p" or "P". - source.pos = saved_pos - ch = "p" if positive else "P" - return make_character(info, ord(ch), in_set) - -def parse_property_name(source): - "Parses a property name, which may be qualified." - name = source.get_while(PROPERTY_NAME_PART) - saved_pos = source.pos - - ch = source.get() - if ch and ch in ":=": - prop_name = name - name = source.get_while(ALNUM | set(" &_-./")).strip() - - if name: - # Name after the ":" or "=", so it's a qualified name. - saved_pos = source.pos - else: - # No name after the ":" or "=", so assume it's an unqualified name. - prop_name, name = None, prop_name - else: - prop_name = None - - source.pos = saved_pos - return prop_name, name - -def parse_set(source, info): - "Parses a character set." - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - - saved_ignore = source.ignore_space - source.ignore_space = False - # Negative set? - negate = source.match("^") - try: - if version == VERSION0: - item = parse_set_imp_union(source, info) - else: - item = parse_set_union(source, info) - - if not source.match("]"): - raise error("missing ]", source.string, source.pos) - finally: - source.ignore_space = saved_ignore - - if negate: - item = item.with_flags(positive=not item.positive) - - item = item.with_flags(case_flags=make_case_flags(info)) - - return item - -def parse_set_union(source, info): - "Parses a set union ([x||y])." - items = [parse_set_symm_diff(source, info)] - while source.match("||"): - items.append(parse_set_symm_diff(source, info)) - - if len(items) == 1: - return items[0] - return SetUnion(info, items) - -def parse_set_symm_diff(source, info): - "Parses a set symmetric difference ([x~~y])." - items = [parse_set_inter(source, info)] - while source.match("~~"): - items.append(parse_set_inter(source, info)) - - if len(items) == 1: - return items[0] - return SetSymDiff(info, items) - -def parse_set_inter(source, info): - "Parses a set intersection ([x&&y])." - items = [parse_set_diff(source, info)] - while source.match("&&"): - items.append(parse_set_diff(source, info)) - - if len(items) == 1: - return items[0] - return SetInter(info, items) - -def parse_set_diff(source, info): - "Parses a set difference ([x--y])." - items = [parse_set_imp_union(source, info)] - while source.match("--"): - items.append(parse_set_imp_union(source, info)) - - if len(items) == 1: - return items[0] - return SetDiff(info, items) - -def parse_set_imp_union(source, info): - "Parses a set implicit union ([xy])." - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - - items = [parse_set_member(source, info)] - while True: - saved_pos = source.pos - if source.match("]"): - # End of the set. - source.pos = saved_pos - break - - if version == VERSION1 and any(source.match(op) for op in SET_OPS): - # The new behaviour has set operators. - source.pos = saved_pos - break - - items.append(parse_set_member(source, info)) - - if len(items) == 1: - return items[0] - return SetUnion(info, items) - -def parse_set_member(source, info): - "Parses a member in a character set." - # Parse a set item. - start = parse_set_item(source, info) - saved_pos1 = source.pos - if (not isinstance(start, Character) or not start.positive or not - source.match("-")): - # It's not the start of a range. - return start - - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - - # It looks like the start of a range of characters. - saved_pos2 = source.pos - if version == VERSION1 and source.match("-"): - # It's actually the set difference operator '--', so return the - # character. - source.pos = saved_pos1 - return start - - if source.match("]"): - # We've reached the end of the set, so return both the character and - # hyphen. - source.pos = saved_pos2 - return SetUnion(info, [start, Character(ord("-"))]) - - # Parse a set item. - end = parse_set_item(source, info) - if not isinstance(end, Character) or not end.positive: - # It's not a range, so return the character, hyphen and property. - return SetUnion(info, [start, Character(ord("-")), end]) - - # It _is_ a range. - if start.value > end.value: - raise error("bad character range", source.string, source.pos) - - if start.value == end.value: - return start - - return Range(start.value, end.value) - -def parse_set_item(source, info): - "Parses an item in a character set." - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - - if source.match("\\"): - # An escape sequence in a set. - return parse_escape(source, info, True) - - saved_pos = source.pos - if source.match("[:"): - # Looks like a POSIX character class. - try: - return parse_posix_class(source, info) - except ParseError: - # Not a POSIX character class. - source.pos = saved_pos - - if version == VERSION1 and source.match("["): - # It's the start of a nested set. - - # Negative set? - negate = source.match("^") - item = parse_set_union(source, info) - - if not source.match("]"): - raise error("missing ]", source.string, source.pos) - - if negate: - item = item.with_flags(positive=not item.positive) - - return item - - ch = source.get() - if not ch: - raise error("unterminated character set", source.string, source.pos) - - return Character(ord(ch)) - -def parse_posix_class(source, info): - "Parses a POSIX character class." - negate = source.match("^") - prop_name, name = parse_property_name(source) - if not source.match(":]"): - raise ParseError() - - return lookup_property(prop_name, name, not negate, source, posix=True) - -def float_to_rational(flt): - "Converts a float to a rational pair." - int_part = int(flt) - error = flt - int_part - if abs(error) < 0.0001: - return int_part, 1 - - den, num = float_to_rational(1.0 / error) - - return int_part * den + num, den - -def numeric_to_rational(numeric): - "Converts a numeric string to a rational string, if possible." - if numeric[ : 1] == "-": - sign, numeric = numeric[0], numeric[1 : ] - else: - sign = "" - - parts = numeric.split("/") - if len(parts) == 2: - num, den = float_to_rational(float(parts[0]) / float(parts[1])) - elif len(parts) == 1: - num, den = float_to_rational(float(parts[0])) - else: - raise ValueError() - - result = "%s%s/%s" % (sign, num, den) - if result.endswith("/1"): - return result[ : -2] - - return result - -def standardise_name(name): - "Standardises a property or value name." - try: - return numeric_to_rational("".join(name)) - except (ValueError, ZeroDivisionError): - return "".join(ch for ch in name if ch not in "_- ").upper() - -_posix_classes = set('ALNUM DIGIT PUNCT XDIGIT'.split()) - -def lookup_property(property, value, positive, source=None, posix=False): - "Looks up a property." - # Normalise the names (which may still be lists). - property = standardise_name(property) if property else None - value = standardise_name(value) - - if (property, value) == ("GENERALCATEGORY", "ASSIGNED"): - property, value, positive = "GENERALCATEGORY", "UNASSIGNED", not positive - - if posix and not property and value.upper() in _posix_classes: - value = 'POSIX' + value - - if property: - # Both the property and the value are provided. - prop = PROPERTIES.get(property) - if not prop: - if not source: - raise error("unknown property") - - raise error("unknown property", source.string, source.pos) - - prop_id, value_dict = prop - val_id = value_dict.get(value) - if val_id is None: - if not source: - raise error("unknown property value") - - raise error("unknown property value", source.string, source.pos) - - if "YES" in value_dict and val_id == 0: - positive, val_id = not positive, 1 - - return Property((prop_id << 16) | val_id, positive) - - # Only the value is provided. - # It might be the name of a GC, script or block value. - for property in ("GC", "SCRIPT", "BLOCK"): - prop_id, value_dict = PROPERTIES.get(property) - val_id = value_dict.get(value) - if val_id is not None: - return Property((prop_id << 16) | val_id, positive) - - # It might be the name of a binary property. - prop = PROPERTIES.get(value) - if prop: - prop_id, value_dict = prop - - if "YES" in value_dict: - return Property((prop_id << 16) | 1, positive) - - # It might be the name of a binary property starting with a prefix. - if value.startswith("IS"): - prop = PROPERTIES.get(value[2 : ]) - if prop: - prop_id, value_dict = prop - if "YES" in value_dict: - return Property((prop_id << 16) | 1, positive) - - # It might be the name of a script or block starting with a prefix. - for prefix, property in (("IS", "SCRIPT"), ("IN", "BLOCK")): - if value.startswith(prefix): - prop_id, value_dict = PROPERTIES.get(property) - val_id = value_dict.get(value[2 : ]) - if val_id is not None: - return Property((prop_id << 16) | val_id, positive) - - # Unknown property. - if not source: - raise error("unknown property") - - raise error("unknown property", source.string, source.pos) - -def _compile_replacement(source, pattern, is_unicode): - "Compiles a replacement template escape sequence." - ch = source.get() - if ch in ALPHA: - # An alphabetic escape sequence. - value = CHARACTER_ESCAPES.get(ch) - if value: - return False, [ord(value)] - - if ch in HEX_ESCAPES and (ch == "x" or is_unicode): - # A hexadecimal escape sequence. - return False, [parse_repl_hex_escape(source, HEX_ESCAPES[ch], ch)] - - if ch == "g": - # A group preference. - return True, [compile_repl_group(source, pattern)] - - if ch == "N" and is_unicode: - # A named character. - value = parse_repl_named_char(source) - if value is not None: - return False, [value] - - return False, [ord("\\"), ord(ch)] - - if isinstance(source.sep, str): - octal_mask = 0xFF - else: - octal_mask = 0x1FF - - if ch == "0": - # An octal escape sequence. - digits = ch - while len(digits) < 3: - saved_pos = source.pos - ch = source.get() - if ch not in OCT_DIGITS: - source.pos = saved_pos - break - digits += ch - - return False, [int(digits, 8) & octal_mask] - - if ch in DIGITS: - # Either an octal escape sequence (3 digits) or a group reference (max - # 2 digits). - digits = ch - saved_pos = source.pos - ch = source.get() - if ch in DIGITS: - digits += ch - saved_pos = source.pos - ch = source.get() - if ch and is_octal(digits + ch): - # An octal escape sequence. - return False, [int(digits + ch, 8) & octal_mask] - - # A group reference. - source.pos = saved_pos - return True, [int(digits)] - - if ch == "\\": - # An escaped backslash is a backslash. - return False, [ord("\\")] - - if not ch: - # A trailing backslash. - raise error("bad escape (end of pattern)", source.string, source.pos) - - # An escaped non-backslash is a backslash followed by the literal. - return False, [ord("\\"), ord(ch)] - -def parse_repl_hex_escape(source, expected_len, type): - "Parses a hex escape sequence in a replacement string." - digits = [] - for i in range(expected_len): - ch = source.get() - if ch not in HEX_DIGITS: - raise error("incomplete escape \\%s%s" % (type, ''.join(digits)), - source.string, source.pos) - digits.append(ch) - - return int("".join(digits), 16) - -def parse_repl_named_char(source): - "Parses a named character in a replacement string." - saved_pos = source.pos - if source.match("{"): - name = source.get_while(ALPHA | set(" ")) - - if source.match("}"): - try: - value = unicodedata.lookup(name) - return ord(value) - except KeyError: - raise error("undefined character name", source.string, - source.pos) - - source.pos = saved_pos - return None - -def compile_repl_group(source, pattern): - "Compiles a replacement template group reference." - source.expect("<") - name = parse_name(source, True, True) - - source.expect(">") - if name.isdigit(): - index = int(name) - if not 0 <= index <= pattern.groups: - raise error("invalid group reference", source.string, source.pos) - - return index - - try: - return pattern.groupindex[name] - except KeyError: - raise IndexError("unknown group") - -# The regular expression is parsed into a syntax tree. The different types of -# node are defined below. - -INDENT = " " -POSITIVE_OP = 0x1 -ZEROWIDTH_OP = 0x2 -FUZZY_OP = 0x4 -REVERSE_OP = 0x8 -REQUIRED_OP = 0x10 - -POS_TEXT = {False: "NON-MATCH", True: "MATCH"} -CASE_TEXT = {NOCASE: "", IGNORECASE: " SIMPLE_IGNORE_CASE", FULLCASE: "", - FULLIGNORECASE: " FULL_IGNORE_CASE"} - -def make_sequence(items): - if len(items) == 1: - return items[0] - return Sequence(items) - -# Common base class for all nodes. -class RegexBase(object): - def __init__(self): - self._key = self.__class__ - - def with_flags(self, positive=None, case_flags=None, zerowidth=None): - if positive is None: - positive = self.positive - else: - positive = bool(positive) - if case_flags is None: - case_flags = self.case_flags - else: - case_flags = CASE_FLAGS_COMBINATIONS[case_flags & CASE_FLAGS] - if zerowidth is None: - zerowidth = self.zerowidth - else: - zerowidth = bool(zerowidth) - - if (positive == self.positive and case_flags == self.case_flags and - zerowidth == self.zerowidth): - return self - - return self.rebuild(positive, case_flags, zerowidth) - - def fix_groups(self, pattern, reverse, fuzzy): - pass - - def optimise(self, info, reverse): - return self - - def pack_characters(self, info): - return self - - def remove_captures(self): - return self - - def is_atomic(self): - return True - - def can_be_affix(self): - return True - - def contains_group(self): - return False - - def get_firstset(self, reverse): - raise _FirstSetError() - - def has_simple_start(self): - return False - - def compile(self, reverse=False, fuzzy=False): - return self._compile(reverse, fuzzy) - - def dump(self, indent, reverse): - self._dump(indent, reverse) - - def is_empty(self): - return False - - def __hash__(self): - return hash(self._key) - - def __eq__(self, other): - return type(self) is type(other) and self._key == other._key - - def __ne__(self, other): - return not self.__eq__(other) - - def get_required_string(self, reverse): - return self.max_width(), None - -# Base class for zero-width nodes. -class ZeroWidthBase(RegexBase): - def __init__(self, positive=True): - RegexBase.__init__(self) - self.positive = bool(positive) - - self._key = self.__class__, self.positive - - def get_firstset(self, reverse): - return set([None]) - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if fuzzy: - flags |= FUZZY_OP - if reverse: - flags |= REVERSE_OP - return [(self._opcode, flags)] - - def _dump(self, indent, reverse): - print "%s%s %s" % (INDENT * indent, self._op_name, - POS_TEXT[self.positive]) - - def max_width(self): - return 0 - -class Any(RegexBase): - _opcode = {False: OP.ANY, True: OP.ANY_REV} - _op_name = "ANY" - - def has_simple_start(self): - return True - - def _compile(self, reverse, fuzzy): - flags = 0 - if fuzzy: - flags |= FUZZY_OP - return [(self._opcode[reverse], flags)] - - def _dump(self, indent, reverse): - print "%s%s" % (INDENT * indent, self._op_name) - - def max_width(self): - return 1 - -class AnyAll(Any): - _opcode = {False: OP.ANY_ALL, True: OP.ANY_ALL_REV} - _op_name = "ANY_ALL" - -class AnyU(Any): - _opcode = {False: OP.ANY_U, True: OP.ANY_U_REV} - _op_name = "ANY_U" - -class Atomic(RegexBase): - def __init__(self, subpattern): - RegexBase.__init__(self) - self.subpattern = subpattern - - def fix_groups(self, pattern, reverse, fuzzy): - self.subpattern.fix_groups(pattern, reverse, fuzzy) - - def optimise(self, info, reverse): - self.subpattern = self.subpattern.optimise(info, reverse) - - if self.subpattern.is_empty(): - return self.subpattern - return self - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - return self - - def remove_captures(self): - self.subpattern = self.subpattern.remove_captures() - return self - - def can_be_affix(self): - return self.subpattern.can_be_affix() - - def contains_group(self): - return self.subpattern.contains_group() - - def get_firstset(self, reverse): - return self.subpattern.get_firstset(reverse) - - def has_simple_start(self): - return self.subpattern.has_simple_start() - - def _compile(self, reverse, fuzzy): - return ([(OP.ATOMIC, )] + self.subpattern.compile(reverse, fuzzy) + - [(OP.END, )]) - - def _dump(self, indent, reverse): - print "%sATOMIC" % (INDENT * indent) - self.subpattern.dump(indent + 1, reverse) - - def is_empty(self): - return self.subpattern.is_empty() - - def __eq__(self, other): - return (type(self) is type(other) and self.subpattern == - other.subpattern) - - def max_width(self): - return self.subpattern.max_width() - - def get_required_string(self, reverse): - return self.subpattern.get_required_string(reverse) - -class Boundary(ZeroWidthBase): - _opcode = OP.BOUNDARY - _op_name = "BOUNDARY" - -class Branch(RegexBase): - def __init__(self, branches): - RegexBase.__init__(self) - self.branches = branches - - def fix_groups(self, pattern, reverse, fuzzy): - for b in self.branches: - b.fix_groups(pattern, reverse, fuzzy) - - def optimise(self, info, reverse): - # Flatten branches within branches. - branches = Branch._flatten_branches(info, reverse, self.branches) - - # Move any common prefix or suffix out of the branches. - if reverse: - suffix, branches = Branch._split_common_suffix(info, branches) - prefix = [] - else: - prefix, branches = Branch._split_common_prefix(info, branches) - suffix = [] - - # Try to reduce adjacent single-character branches to sets. - branches = Branch._reduce_to_set(info, reverse, branches) - - if len(branches) > 1: - sequence = [Branch(branches)] - - if not prefix or not suffix: - # We might be able to add a quick precheck before the branches. - firstset = self._add_precheck(info, reverse, branches) - - if firstset: - if reverse: - sequence.append(firstset) - else: - sequence.insert(0, firstset) - else: - sequence = branches - - return make_sequence(prefix + sequence + suffix) - - def _add_precheck(self, info, reverse, branches): - charset = set() - pos = -1 if reverse else 0 - - for branch in branches: - if type(branch) is Literal and branch.case_flags == NOCASE: - charset.add(branch.characters[pos]) - else: - return - - if not charset: - return None - - return _check_firstset(info, reverse, [Character(c) for c in charset]) - - def pack_characters(self, info): - self.branches = [b.pack_characters(info) for b in self.branches] - return self - - def remove_captures(self): - self.branches = [b.remove_captures() for b in self.branches] - return self - - def is_atomic(self): - return all(b.is_atomic() for b in self.branches) - - def can_be_affix(self): - return all(b.can_be_affix() for b in self.branches) - - def contains_group(self): - return any(b.contains_group() for b in self.branches) - - def get_firstset(self, reverse): - fs = set() - for b in self.branches: - fs |= b.get_firstset(reverse) - - return fs or set([None]) - - def _compile(self, reverse, fuzzy): - code = [(OP.BRANCH, )] - for b in self.branches: - code.extend(b.compile(reverse, fuzzy)) - code.append((OP.NEXT, )) - - code[-1] = (OP.END, ) - - return code - - def _dump(self, indent, reverse): - print "%sBRANCH" % (INDENT * indent) - self.branches[0].dump(indent + 1, reverse) - for b in self.branches[1 : ]: - print "%sOR" % (INDENT * indent) - b.dump(indent + 1, reverse) - - @staticmethod - def _flatten_branches(info, reverse, branches): - # Flatten the branches so that there aren't branches of branches. - new_branches = [] - for b in branches: - b = b.optimise(info, reverse) - if isinstance(b, Branch): - new_branches.extend(b.branches) - else: - new_branches.append(b) - - return new_branches - - @staticmethod - def _split_common_prefix(info, branches): - # Common leading items can be moved out of the branches. - # Get the items in the branches. - alternatives = [] - for b in branches: - if isinstance(b, Sequence): - alternatives.append(b.items) - else: - alternatives.append([b]) - - # What is the maximum possible length of the prefix? - max_count = min(len(a) for a in alternatives) - - # What is the longest common prefix? - prefix = alternatives[0] - pos = 0 - end_pos = max_count - while pos < end_pos and prefix[pos].can_be_affix() and all(a[pos] == - prefix[pos] for a in alternatives): - pos += 1 - count = pos - - if info.flags & UNICODE: - # We need to check that we're not splitting a sequence of - # characters which could form part of full case-folding. - count = pos - while count > 0 and not all(Branch._can_split(a, count) for a in - alternatives): - count -= 1 - - # No common prefix is possible. - if count == 0: - return [], branches - - # Rebuild the branches. - new_branches = [] - for a in alternatives: - new_branches.append(make_sequence(a[count : ])) - - return prefix[ : count], new_branches - - @staticmethod - def _split_common_suffix(info, branches): - # Common trailing items can be moved out of the branches. - # Get the items in the branches. - alternatives = [] - for b in branches: - if isinstance(b, Sequence): - alternatives.append(b.items) - else: - alternatives.append([b]) - - # What is the maximum possible length of the suffix? - max_count = min(len(a) for a in alternatives) - - # What is the longest common suffix? - suffix = alternatives[0] - pos = -1 - end_pos = -1 - max_count - while pos > end_pos and suffix[pos].can_be_affix() and all(a[pos] == - suffix[pos] for a in alternatives): - pos -= 1 - count = -1 - pos - - if info.flags & UNICODE: - # We need to check that we're not splitting a sequence of - # characters which could form part of full case-folding. - while count > 0 and not all(Branch._can_split_rev(a, count) for a - in alternatives): - count -= 1 - - # No common suffix is possible. - if count == 0: - return [], branches - - # Rebuild the branches. - new_branches = [] - for a in alternatives: - new_branches.append(make_sequence(a[ : -count])) - - return suffix[-count : ], new_branches - - @staticmethod - def _can_split(items, count): - # Check the characters either side of the proposed split. - if not Branch._is_full_case(items, count - 1): - return True - - if not Branch._is_full_case(items, count): - return True - - # Check whether a 1-1 split would be OK. - if Branch._is_folded(items[count - 1 : count + 1]): - return False - - # Check whether a 1-2 split would be OK. - if (Branch._is_full_case(items, count + 2) and - Branch._is_folded(items[count - 1 : count + 2])): - return False - - # Check whether a 2-1 split would be OK. - if (Branch._is_full_case(items, count - 2) and - Branch._is_folded(items[count - 2 : count + 1])): - return False - - return True - - @staticmethod - def _can_split_rev(items, count): - end = len(items) - - # Check the characters either side of the proposed split. - if not Branch._is_full_case(items, end - count): - return True - - if not Branch._is_full_case(items, end - count - 1): - return True - - # Check whether a 1-1 split would be OK. - if Branch._is_folded(items[end - count - 1 : end - count + 1]): - return False - - # Check whether a 1-2 split would be OK. - if (Branch._is_full_case(items, end - count + 2) and - Branch._is_folded(items[end - count - 1 : end - count + 2])): - return False - - # Check whether a 2-1 split would be OK. - if (Branch._is_full_case(items, end - count - 2) and - Branch._is_folded(items[end - count - 2 : end - count + 1])): - return False - - return True - - @staticmethod - def _merge_common_prefixes(info, reverse, branches): - # Branches with the same case-sensitive character prefix can be grouped - # together if they are separated only by other branches with a - # character prefix. - prefixed = defaultdict(list) - order = {} - new_branches = [] - for b in branches: - if Branch._is_simple_character(b): - # Branch starts with a simple character. - prefixed[b.value].append([b]) - order.setdefault(b.value, len(order)) - elif (isinstance(b, Sequence) and b.items and - Branch._is_simple_character(b.items[0])): - # Branch starts with a simple character. - prefixed[b.items[0].value].append(b.items) - order.setdefault(b.items[0].value, len(order)) - else: - Branch._flush_char_prefix(info, reverse, prefixed, order, - new_branches) - - new_branches.append(b) - - Branch._flush_char_prefix(info, prefixed, order, new_branches) - - return new_branches - - @staticmethod - def _is_simple_character(c): - return isinstance(c, Character) and c.positive and not c.case_flags - - @staticmethod - def _reduce_to_set(info, reverse, branches): - # Can the branches be reduced to a set? - new_branches = [] - items = set() - case_flags = NOCASE - for b in branches: - if isinstance(b, (Character, Property, SetBase)): - # Branch starts with a single character. - if b.case_flags != case_flags: - # Different case sensitivity, so flush. - Branch._flush_set_members(info, reverse, items, case_flags, - new_branches) - - case_flags = b.case_flags - - items.add(b.with_flags(case_flags=NOCASE)) - else: - Branch._flush_set_members(info, reverse, items, case_flags, - new_branches) - - new_branches.append(b) - - Branch._flush_set_members(info, reverse, items, case_flags, - new_branches) - - return new_branches - - @staticmethod - def _flush_char_prefix(info, reverse, prefixed, order, new_branches): - # Flush the prefixed branches. - if not prefixed: - return - - for value, branches in sorted(prefixed.items(), key=lambda pair: - order[pair[0]]): - if len(branches) == 1: - new_branches.append(make_sequence(branches[0])) - else: - subbranches = [] - optional = False - for b in branches: - if len(b) > 1: - subbranches.append(make_sequence(b[1 : ])) - elif not optional: - subbranches.append(Sequence()) - optional = True - - sequence = Sequence([Character(value), Branch(subbranches)]) - new_branches.append(sequence.optimise(info, reverse)) - - prefixed.clear() - order.clear() - - @staticmethod - def _flush_set_members(info, reverse, items, case_flags, new_branches): - # Flush the set members. - if not items: - return - - if len(items) == 1: - item = list(items)[0] - else: - item = SetUnion(info, list(items)).optimise(info, reverse) - - new_branches.append(item.with_flags(case_flags=case_flags)) - - items.clear() - - @staticmethod - def _is_full_case(items, i): - if not 0 <= i < len(items): - return False - - item = items[i] - return (isinstance(item, Character) and item.positive and - (item.case_flags & FULLIGNORECASE) == FULLIGNORECASE) - - @staticmethod - def _is_folded(items): - if len(items) < 2: - return False - - for i in items: - if (not isinstance(i, Character) or not i.positive or not - i.case_flags): - return False - - folded = u"".join(unichr(i.value) for i in items) - folded = _regex.fold_case(FULL_CASE_FOLDING, folded) - - # Get the characters which expand to multiple codepoints on folding. - expanding_chars = _regex.get_expand_on_folding() - - for c in expanding_chars: - if folded == _regex.fold_case(FULL_CASE_FOLDING, c): - return True - - return False - - def is_empty(self): - return all(b.is_empty() for b in self.branches) - - def __eq__(self, other): - return type(self) is type(other) and self.branches == other.branches - - def max_width(self): - return max(b.max_width() for b in self.branches) - -class CallGroup(RegexBase): - def __init__(self, info, group, position): - RegexBase.__init__(self) - self.info = info - self.group = group - self.position = position - - self._key = self.__class__, self.group - - def fix_groups(self, pattern, reverse, fuzzy): - try: - self.group = int(self.group) - except ValueError: - try: - self.group = self.info.group_index[self.group] - except KeyError: - raise error("invalid group reference", pattern, self.position) - - if not 0 <= self.group <= self.info.group_count: - raise error("unknown group", pattern, self.position) - - if self.group > 0 and self.info.open_group_count[self.group] > 1: - raise error("ambiguous group reference", pattern, self.position) - - self.info.group_calls.append((self, reverse, fuzzy)) - - self._key = self.__class__, self.group - - def remove_captures(self): - raise error("group reference not allowed", pattern, self.position) - - def _compile(self, reverse, fuzzy): - return [(OP.GROUP_CALL, self.call_ref)] - - def _dump(self, indent, reverse): - print "%sGROUP_CALL %s" % (INDENT * indent, self.group) - - def __eq__(self, other): - return type(self) is type(other) and self.group == other.group - - def max_width(self): - return UNLIMITED - -class CallRef(RegexBase): - def __init__(self, ref, parsed): - self.ref = ref - self.parsed = parsed - - def _compile(self, reverse, fuzzy): - return ([(OP.CALL_REF, self.ref)] + self.parsed._compile(reverse, - fuzzy) + [(OP.END, )]) - -class Character(RegexBase): - _opcode = {(NOCASE, False): OP.CHARACTER, (IGNORECASE, False): - OP.CHARACTER_IGN, (FULLCASE, False): OP.CHARACTER, (FULLIGNORECASE, - False): OP.CHARACTER_IGN, (NOCASE, True): OP.CHARACTER_REV, (IGNORECASE, - True): OP.CHARACTER_IGN_REV, (FULLCASE, True): OP.CHARACTER_REV, - (FULLIGNORECASE, True): OP.CHARACTER_IGN_REV} - - def __init__(self, value, positive=True, case_flags=NOCASE, - zerowidth=False): - RegexBase.__init__(self) - self.value = value - self.positive = bool(positive) - self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] - self.zerowidth = bool(zerowidth) - - if (self.positive and (self.case_flags & FULLIGNORECASE) == - FULLIGNORECASE): - self.folded = _regex.fold_case(FULL_CASE_FOLDING, unichr(self.value)) - else: - self.folded = unichr(self.value) - - self._key = (self.__class__, self.value, self.positive, - self.case_flags, self.zerowidth) - - def rebuild(self, positive, case_flags, zerowidth): - return Character(self.value, positive, case_flags, zerowidth) - - def optimise(self, info, reverse, in_set=False): - return self - - def get_firstset(self, reverse): - return set([self]) - - def has_simple_start(self): - return True - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if self.zerowidth: - flags |= ZEROWIDTH_OP - if fuzzy: - flags |= FUZZY_OP - - code = PrecompiledCode([self._opcode[self.case_flags, reverse], flags, - self.value]) - - if len(self.folded) > 1: - # The character expands on full case-folding. - code = Branch([code, String([ord(c) for c in self.folded], - case_flags=self.case_flags)]) - - return code.compile(reverse, fuzzy) - - def _dump(self, indent, reverse): - display = repr(unichr(self.value)).lstrip("bu") - print "%sCHARACTER %s %s%s" % (INDENT * indent, - POS_TEXT[self.positive], display, CASE_TEXT[self.case_flags]) - - def matches(self, ch): - return (ch == self.value) == self.positive - - def max_width(self): - return len(self.folded) - - def get_required_string(self, reverse): - if not self.positive: - return 1, None - - self.folded_characters = tuple(ord(c) for c in self.folded) - - return 0, self - -class Conditional(RegexBase): - def __init__(self, info, group, yes_item, no_item, position): - RegexBase.__init__(self) - self.info = info - self.group = group - self.yes_item = yes_item - self.no_item = no_item - self.position = position - - def fix_groups(self, pattern, reverse, fuzzy): - try: - self.group = int(self.group) - except ValueError: - try: - self.group = self.info.group_index[self.group] - except KeyError: - if self.group == 'DEFINE': - # 'DEFINE' is a special name unless there's a group with - # that name. - self.group = 0 - else: - raise error("unknown group", pattern, self.position) - - if not 0 <= self.group <= self.info.group_count: - raise error("invalid group reference", pattern, self.position) - - self.yes_item.fix_groups(pattern, reverse, fuzzy) - self.no_item.fix_groups(pattern, reverse, fuzzy) - - def optimise(self, info, reverse): - yes_item = self.yes_item.optimise(info, reverse) - no_item = self.no_item.optimise(info, reverse) - - return Conditional(info, self.group, yes_item, no_item, self.position) - - def pack_characters(self, info): - self.yes_item = self.yes_item.pack_characters(info) - self.no_item = self.no_item.pack_characters(info) - return self - - def remove_captures(self): - self.yes_item = self.yes_item.remove_captures() - self.no_item = self.no_item.remove_captures() - - def is_atomic(self): - return self.yes_item.is_atomic() and self.no_item.is_atomic() - - def can_be_affix(self): - return self.yes_item.can_be_affix() and self.no_item.can_be_affix() - - def contains_group(self): - return self.yes_item.contains_group() or self.no_item.contains_group() - - def get_firstset(self, reverse): - return (self.yes_item.get_firstset(reverse) | - self.no_item.get_firstset(reverse)) - - def _compile(self, reverse, fuzzy): - code = [(OP.GROUP_EXISTS, self.group)] - code.extend(self.yes_item.compile(reverse, fuzzy)) - add_code = self.no_item.compile(reverse, fuzzy) - if add_code: - code.append((OP.NEXT, )) - code.extend(add_code) - - code.append((OP.END, )) - - return code - - def _dump(self, indent, reverse): - print "%sGROUP_EXISTS %s" % (INDENT * indent, self.group) - self.yes_item.dump(indent + 1, reverse) - if not self.no_item.is_empty(): - print "%sOR" % (INDENT * indent) - self.no_item.dump(indent + 1, reverse) - - def is_empty(self): - return self.yes_item.is_empty() and self.no_item.is_empty() - - def __eq__(self, other): - return type(self) is type(other) and (self.group, self.yes_item, - self.no_item) == (other.group, other.yes_item, other.no_item) - - def max_width(self): - return max(self.yes_item.max_width(), self.no_item.max_width()) - -class DefaultBoundary(ZeroWidthBase): - _opcode = OP.DEFAULT_BOUNDARY - _op_name = "DEFAULT_BOUNDARY" - -class DefaultEndOfWord(ZeroWidthBase): - _opcode = OP.DEFAULT_END_OF_WORD - _op_name = "DEFAULT_END_OF_WORD" - -class DefaultStartOfWord(ZeroWidthBase): - _opcode = OP.DEFAULT_START_OF_WORD - _op_name = "DEFAULT_START_OF_WORD" - -class EndOfLine(ZeroWidthBase): - _opcode = OP.END_OF_LINE - _op_name = "END_OF_LINE" - -class EndOfLineU(EndOfLine): - _opcode = OP.END_OF_LINE_U - _op_name = "END_OF_LINE_U" - -class EndOfString(ZeroWidthBase): - _opcode = OP.END_OF_STRING - _op_name = "END_OF_STRING" - -class EndOfStringLine(ZeroWidthBase): - _opcode = OP.END_OF_STRING_LINE - _op_name = "END_OF_STRING_LINE" - -class EndOfStringLineU(EndOfStringLine): - _opcode = OP.END_OF_STRING_LINE_U - _op_name = "END_OF_STRING_LINE_U" - -class EndOfWord(ZeroWidthBase): - _opcode = OP.END_OF_WORD - _op_name = "END_OF_WORD" - -class Failure(ZeroWidthBase): - _op_name = "FAILURE" - - def _compile(self, reverse, fuzzy): - return [(OP.FAILURE, )] - -class Fuzzy(RegexBase): - def __init__(self, subpattern, constraints=None): - RegexBase.__init__(self) - if constraints is None: - constraints = {} - self.subpattern = subpattern - self.constraints = constraints - - # If an error type is mentioned in the cost equation, then its maximum - # defaults to unlimited. - if "cost" in constraints: - for e in "dis": - if e in constraints["cost"]: - constraints.setdefault(e, (0, None)) - - # If any error type is mentioned, then all the error maxima default to - # 0, otherwise they default to unlimited. - if set(constraints) & set("dis"): - for e in "dis": - constraints.setdefault(e, (0, 0)) - else: - for e in "dis": - constraints.setdefault(e, (0, None)) - - # The maximum of the generic error type defaults to unlimited. - constraints.setdefault("e", (0, None)) - - # The cost equation defaults to equal costs. Also, the cost of any - # error type not mentioned in the cost equation defaults to 0. - if "cost" in constraints: - for e in "dis": - constraints["cost"].setdefault(e, 0) - else: - constraints["cost"] = {"d": 1, "i": 1, "s": 1, "max": - constraints["e"][1]} - - def fix_groups(self, pattern, reverse, fuzzy): - self.subpattern.fix_groups(pattern, reverse, True) - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - return self - - def remove_captures(self): - self.subpattern = self.subpattern.remove_captures() - return self - - def is_atomic(self): - return self.subpattern.is_atomic() - - def contains_group(self): - return self.subpattern.contains_group() - - def _compile(self, reverse, fuzzy): - # The individual limits. - arguments = [] - for e in "dise": - v = self.constraints[e] - arguments.append(v[0]) - arguments.append(UNLIMITED if v[1] is None else v[1]) - - # The coeffs of the cost equation. - for e in "dis": - arguments.append(self.constraints["cost"][e]) - - # The maximum of the cost equation. - v = self.constraints["cost"]["max"] - arguments.append(UNLIMITED if v is None else v) - - flags = 0 - if reverse: - flags |= REVERSE_OP - - return ([(OP.FUZZY, flags) + tuple(arguments)] + - self.subpattern.compile(reverse, True) + [(OP.END,)]) - - def _dump(self, indent, reverse): - constraints = self._constraints_to_string() - if constraints: - constraints = " " + constraints - print "%sFUZZY%s" % (INDENT * indent, constraints) - self.subpattern.dump(indent + 1, reverse) - - def is_empty(self): - return self.subpattern.is_empty() - - def __eq__(self, other): - return (type(self) is type(other) and self.subpattern == - other.subpattern) - - def max_width(self): - return UNLIMITED - - def _constraints_to_string(self): - constraints = [] - - for name in "ids": - min, max = self.constraints[name] - if max == 0: - continue - - con = "" - - if min > 0: - con = "%s<=" % min - - con += name - - if max is not None: - con += "<=%s" % max - - constraints.append(con) - - cost = [] - for name in "ids": - coeff = self.constraints["cost"][name] - if coeff > 0: - cost.append("%s%s" % (coeff, name)) - - limit = self.constraints["cost"]["max"] - if limit is not None and limit > 0: - cost = "%s<=%s" % ("+".join(cost), limit) - constraints.append(cost) - - return ",".join(constraints) - -class Grapheme(RegexBase): - def _compile(self, reverse, fuzzy): - # Match at least 1 character until a grapheme boundary is reached. Note - # that this is the same whether matching forwards or backwards. - grapheme_matcher = Atomic(Sequence([LazyRepeat(AnyAll(), 1, None), - GraphemeBoundary()])) - - return grapheme_matcher.compile(reverse, fuzzy) - - def _dump(self, indent, reverse): - print "%sGRAPHEME" % (INDENT * indent) - - def max_width(self): - return UNLIMITED - -class GraphemeBoundary: - def compile(self, reverse, fuzzy): - return [(OP.GRAPHEME_BOUNDARY, 1)] - -class GreedyRepeat(RegexBase): - _opcode = OP.GREEDY_REPEAT - _op_name = "GREEDY_REPEAT" - - def __init__(self, subpattern, min_count, max_count): - RegexBase.__init__(self) - self.subpattern = subpattern - self.min_count = min_count - self.max_count = max_count - - def fix_groups(self, pattern, reverse, fuzzy): - self.subpattern.fix_groups(pattern, reverse, fuzzy) - - def optimise(self, info, reverse): - subpattern = self.subpattern.optimise(info, reverse) - - return type(self)(subpattern, self.min_count, self.max_count) - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - return self - - def remove_captures(self): - self.subpattern = self.subpattern.remove_captures() - return self - - def is_atomic(self): - return self.min_count == self.max_count and self.subpattern.is_atomic() - - def can_be_affix(self): - return False - - def contains_group(self): - return self.subpattern.contains_group() - - def get_firstset(self, reverse): - fs = self.subpattern.get_firstset(reverse) - if self.min_count == 0: - fs.add(None) - - return fs - - def _compile(self, reverse, fuzzy): - repeat = [self._opcode, self.min_count] - if self.max_count is None: - repeat.append(UNLIMITED) - else: - repeat.append(self.max_count) - - subpattern = self.subpattern.compile(reverse, fuzzy) - if not subpattern: - return [] - - return ([tuple(repeat)] + subpattern + [(OP.END, )]) - - def _dump(self, indent, reverse): - if self.max_count is None: - limit = "INF" - else: - limit = self.max_count - print "%s%s %s %s" % (INDENT * indent, self._op_name, self.min_count, - limit) - - self.subpattern.dump(indent + 1, reverse) - - def is_empty(self): - return self.subpattern.is_empty() - - def __eq__(self, other): - return type(self) is type(other) and (self.subpattern, self.min_count, - self.max_count) == (other.subpattern, other.min_count, - other.max_count) - - def max_width(self): - if self.max_count is None: - return UNLIMITED - - return self.subpattern.max_width() * self.max_count - - def get_required_string(self, reverse): - max_count = UNLIMITED if self.max_count is None else self.max_count - if self.min_count == 0: - w = self.subpattern.max_width() * max_count - return min(w, UNLIMITED), None - - ofs, req = self.subpattern.get_required_string(reverse) - if req: - return ofs, req - - w = self.subpattern.max_width() * max_count - return min(w, UNLIMITED), None - -class Group(RegexBase): - def __init__(self, info, group, subpattern): - RegexBase.__init__(self) - self.info = info - self.group = group - self.subpattern = subpattern - - self.call_ref = None - - def fix_groups(self, pattern, reverse, fuzzy): - self.info.defined_groups[self.group] = (self, reverse, fuzzy) - self.subpattern.fix_groups(pattern, reverse, fuzzy) - - def optimise(self, info, reverse): - subpattern = self.subpattern.optimise(info, reverse) - - return Group(self.info, self.group, subpattern) - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - return self - - def remove_captures(self): - return self.subpattern.remove_captures() - - def is_atomic(self): - return self.subpattern.is_atomic() - - def can_be_affix(self): - return False - - def contains_group(self): - return True - - def get_firstset(self, reverse): - return self.subpattern.get_firstset(reverse) - - def has_simple_start(self): - return self.subpattern.has_simple_start() - - def _compile(self, reverse, fuzzy): - code = [] - - key = self.group, reverse, fuzzy - ref = self.info.call_refs.get(key) - if ref is not None: - code += [(OP.CALL_REF, ref)] - - public_group = private_group = self.group - if private_group < 0: - public_group = self.info.private_groups[private_group] - private_group = self.info.group_count - private_group - - code += ([(OP.GROUP, private_group, public_group)] + - self.subpattern.compile(reverse, fuzzy) + [(OP.END, )]) - - if ref is not None: - code += [(OP.END, )] - - return code - - def _dump(self, indent, reverse): - group = self.group - if group < 0: - group = private_groups[group] - print "%sGROUP %s" % (INDENT * indent, group) - self.subpattern.dump(indent + 1, reverse) - - def __eq__(self, other): - return (type(self) is type(other) and (self.group, self.subpattern) == - (other.group, other.subpattern)) - - def max_width(self): - return self.subpattern.max_width() - - def get_required_string(self, reverse): - return self.subpattern.get_required_string(reverse) - -class Keep(ZeroWidthBase): - _opcode = OP.KEEP - _op_name = "KEEP" - -class LazyRepeat(GreedyRepeat): - _opcode = OP.LAZY_REPEAT - _op_name = "LAZY_REPEAT" - -class LookAround(RegexBase): - _dir_text = {False: "AHEAD", True: "BEHIND"} - - def __init__(self, behind, positive, subpattern): - RegexBase.__init__(self) - self.behind = bool(behind) - self.positive = bool(positive) - self.subpattern = subpattern - - def fix_groups(self, pattern, reverse, fuzzy): - self.subpattern.fix_groups(pattern, self.behind, fuzzy) - - def optimise(self, info, reverse): - subpattern = self.subpattern.optimise(info, self.behind) - if self.positive and subpattern.is_empty(): - return subpattern - - return LookAround(self.behind, self.positive, subpattern) - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - return self - - def remove_captures(self): - return self.subpattern.remove_captures() - - def is_atomic(self): - return self.subpattern.is_atomic() - - def can_be_affix(self): - return self.subpattern.can_be_affix() - - def contains_group(self): - return self.subpattern.contains_group() - - def _compile(self, reverse, fuzzy): - return ([(OP.LOOKAROUND, int(self.positive), int(not self.behind))] + - self.subpattern.compile(self.behind) + [(OP.END, )]) - - def _dump(self, indent, reverse): - print "%sLOOK%s %s" % (INDENT * indent, self._dir_text[self.behind], - POS_TEXT[self.positive]) - self.subpattern.dump(indent + 1, self.behind) - - def is_empty(self): - return self.positive and self.subpattern.is_empty() - - def __eq__(self, other): - return type(self) is type(other) and (self.behind, self.positive, - self.subpattern) == (other.behind, other.positive, other.subpattern) - - def max_width(self): - return 0 - -class LookAroundConditional(RegexBase): - _dir_text = {False: "AHEAD", True: "BEHIND"} - - def __init__(self, behind, positive, subpattern, yes_item, no_item): - RegexBase.__init__(self) - self.behind = bool(behind) - self.positive = bool(positive) - self.subpattern = subpattern - self.yes_item = yes_item - self.no_item = no_item - - def fix_groups(self, pattern, reverse, fuzzy): - self.subpattern.fix_groups(pattern, reverse, fuzzy) - self.yes_item.fix_groups(pattern, reverse, fuzzy) - self.no_item.fix_groups(pattern, reverse, fuzzy) - - def optimise(self, info, reverse): - subpattern = self.subpattern.optimise(info, self.behind) - yes_item = self.yes_item.optimise(info, self.behind) - no_item = self.no_item.optimise(info, self.behind) - - return LookAroundConditional(self.behind, self.positive, subpattern, - yes_item, no_item) - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - self.yes_item = self.yes_item.pack_characters(info) - self.no_item = self.no_item.pack_characters(info) - return self - - def remove_captures(self): - self.subpattern = self.subpattern.remove_captures() - self.yes_item = self.yes_item.remove_captures() - self.no_item = self.no_item.remove_captures() - - def is_atomic(self): - return (self.subpattern.is_atomic() and self.yes_item.is_atomic() and - self.no_item.is_atomic()) - - def can_be_affix(self): - return (self.subpattern.can_be_affix() and self.yes_item.can_be_affix() - and self.no_item.can_be_affix()) - - def contains_group(self): - return (self.subpattern.contains_group() or - self.yes_item.contains_group() or self.no_item.contains_group()) - - def get_firstset(self, reverse): - return (self.subpattern.get_firstset(reverse) | - self.no_item.get_firstset(reverse)) - - def _compile(self, reverse, fuzzy): - code = [(OP.CONDITIONAL, int(self.positive), int(not self.behind))] - code.extend(self.subpattern.compile(self.behind, fuzzy)) - code.append((OP.NEXT, )) - code.extend(self.yes_item.compile(reverse, fuzzy)) - add_code = self.no_item.compile(reverse, fuzzy) - if add_code: - code.append((OP.NEXT, )) - code.extend(add_code) - - code.append((OP.END, )) - - return code - - def _dump(self, indent, reverse): - print("%sCONDITIONAL %s %s" % (INDENT * indent, - self._dir_text[self.behind], POS_TEXT[self.positive])) - self.subpattern.dump(indent + 1, self.behind) - print("%sEITHER" % (INDENT * indent)) - self.yes_item.dump(indent + 1, reverse) - if not self.no_item.is_empty(): - print("%sOR" % (INDENT * indent)) - self.no_item.dump(indent + 1, reverse) - - def is_empty(self): - return (self.subpattern.is_empty() and self.yes_item.is_empty() or - self.no_item.is_empty()) - - def __eq__(self, other): - return type(self) is type(other) and (self.subpattern, self.yes_item, - self.no_item) == (other.subpattern, other.yes_item, other.no_item) - - def max_width(self): - return max(self.yes_item.max_width(), self.no_item.max_width()) - - def get_required_string(self, reverse): - return self.max_width(), None - -class PrecompiledCode(RegexBase): - def __init__(self, code): - self.code = code - - def _compile(self, reverse, fuzzy): - return [tuple(self.code)] - -class Property(RegexBase): - _opcode = {(NOCASE, False): OP.PROPERTY, (IGNORECASE, False): - OP.PROPERTY_IGN, (FULLCASE, False): OP.PROPERTY, (FULLIGNORECASE, False): - OP.PROPERTY_IGN, (NOCASE, True): OP.PROPERTY_REV, (IGNORECASE, True): - OP.PROPERTY_IGN_REV, (FULLCASE, True): OP.PROPERTY_REV, (FULLIGNORECASE, - True): OP.PROPERTY_IGN_REV} - - def __init__(self, value, positive=True, case_flags=NOCASE, - zerowidth=False): - RegexBase.__init__(self) - self.value = value - self.positive = bool(positive) - self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] - self.zerowidth = bool(zerowidth) - - self._key = (self.__class__, self.value, self.positive, - self.case_flags, self.zerowidth) - - def rebuild(self, positive, case_flags, zerowidth): - return Property(self.value, positive, case_flags, zerowidth) - - def optimise(self, info, reverse, in_set=False): - return self - - def get_firstset(self, reverse): - return set([self]) - - def has_simple_start(self): - return True - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if self.zerowidth: - flags |= ZEROWIDTH_OP - if fuzzy: - flags |= FUZZY_OP - return [(self._opcode[self.case_flags, reverse], flags, self.value)] - - def _dump(self, indent, reverse): - prop = PROPERTY_NAMES[self.value >> 16] - name, value = prop[0], prop[1][self.value & 0xFFFF] - print "%sPROPERTY %s %s:%s%s" % (INDENT * indent, - POS_TEXT[self.positive], name, value, CASE_TEXT[self.case_flags]) - - def matches(self, ch): - return _regex.has_property_value(self.value, ch) == self.positive - - def max_width(self): - return 1 - -class Prune(ZeroWidthBase): - _op_name = "PRUNE" - - def _compile(self, reverse, fuzzy): - return [(OP.PRUNE, )] - -class Range(RegexBase): - _opcode = {(NOCASE, False): OP.RANGE, (IGNORECASE, False): OP.RANGE_IGN, - (FULLCASE, False): OP.RANGE, (FULLIGNORECASE, False): OP.RANGE_IGN, - (NOCASE, True): OP.RANGE_REV, (IGNORECASE, True): OP.RANGE_IGN_REV, - (FULLCASE, True): OP.RANGE_REV, (FULLIGNORECASE, True): OP.RANGE_IGN_REV} - _op_name = "RANGE" - - def __init__(self, lower, upper, positive=True, case_flags=NOCASE, - zerowidth=False): - RegexBase.__init__(self) - self.lower = lower - self.upper = upper - self.positive = bool(positive) - self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] - self.zerowidth = bool(zerowidth) - - self._key = (self.__class__, self.lower, self.upper, self.positive, - self.case_flags, self.zerowidth) - - def rebuild(self, positive, case_flags, zerowidth): - return Range(self.lower, self.upper, positive, case_flags, zerowidth) - - def optimise(self, info, reverse, in_set=False): - # Is the range case-sensitive? - if not self.positive or not (self.case_flags & IGNORECASE) or in_set: - return self - - # Is full case-folding possible? - if (not (info.flags & UNICODE) or (self.case_flags & FULLIGNORECASE) != - FULLIGNORECASE): - return self - - # Get the characters which expand to multiple codepoints on folding. - expanding_chars = _regex.get_expand_on_folding() - - # Get the folded characters in the range. - items = [] - for ch in expanding_chars: - if self.lower <= ord(ch) <= self.upper: - folded = _regex.fold_case(FULL_CASE_FOLDING, ch) - items.append(String([ord(c) for c in folded], - case_flags=self.case_flags)) - - if not items: - # We can fall back to simple case-folding. - return self - - if len(items) < self.upper - self.lower + 1: - # Not all the characters are covered by the full case-folding. - items.insert(0, self) - - return Branch(items) - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if self.zerowidth: - flags |= ZEROWIDTH_OP - if fuzzy: - flags |= FUZZY_OP - return [(self._opcode[self.case_flags, reverse], flags, self.lower, - self.upper)] - - def _dump(self, indent, reverse): - display_lower = repr(unichr(self.lower)).lstrip("bu") - display_upper = repr(unichr(self.upper)).lstrip("bu") - print "%sRANGE %s %s %s%s" % (INDENT * indent, POS_TEXT[self.positive], - display_lower, display_upper, CASE_TEXT[self.case_flags]) - - def matches(self, ch): - return (self.lower <= ch <= self.upper) == self.positive - - def max_width(self): - return 1 - -class RefGroup(RegexBase): - _opcode = {(NOCASE, False): OP.REF_GROUP, (IGNORECASE, False): - OP.REF_GROUP_IGN, (FULLCASE, False): OP.REF_GROUP, (FULLIGNORECASE, - False): OP.REF_GROUP_FLD, (NOCASE, True): OP.REF_GROUP_REV, (IGNORECASE, - True): OP.REF_GROUP_IGN_REV, (FULLCASE, True): OP.REF_GROUP_REV, - (FULLIGNORECASE, True): OP.REF_GROUP_FLD_REV} - - def __init__(self, info, group, position, case_flags=NOCASE): - RegexBase.__init__(self) - self.info = info - self.group = group - self.position = position - self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] - - self._key = self.__class__, self.group, self.case_flags - - def fix_groups(self, pattern, reverse, fuzzy): - try: - self.group = int(self.group) - except ValueError: - try: - self.group = self.info.group_index[self.group] - except KeyError: - raise error("unknown group", pattern, self.position) - - if not 1 <= self.group <= self.info.group_count: - raise error("invalid group reference", pattern, self.position) - - self._key = self.__class__, self.group, self.case_flags - - def remove_captures(self): - raise error("group reference not allowed", pattern, self.position) - - def _compile(self, reverse, fuzzy): - flags = 0 - if fuzzy: - flags |= FUZZY_OP - return [(self._opcode[self.case_flags, reverse], flags, self.group)] - - def _dump(self, indent, reverse): - print "%sREF_GROUP %s%s" % (INDENT * indent, self.group, - CASE_TEXT[self.case_flags]) - - def max_width(self): - return UNLIMITED - -class SearchAnchor(ZeroWidthBase): - _opcode = OP.SEARCH_ANCHOR - _op_name = "SEARCH_ANCHOR" - -class Sequence(RegexBase): - def __init__(self, items=None): - RegexBase.__init__(self) - if items is None: - items = [] - - self.items = items - - def fix_groups(self, pattern, reverse, fuzzy): - for s in self.items: - s.fix_groups(pattern, reverse, fuzzy) - - def optimise(self, info, reverse): - # Flatten the sequences. - items = [] - for s in self.items: - s = s.optimise(info, reverse) - if isinstance(s, Sequence): - items.extend(s.items) - else: - items.append(s) - - return make_sequence(items) - - def pack_characters(self, info): - "Packs sequences of characters into strings." - items = [] - characters = [] - case_flags = NOCASE - for s in self.items: - if type(s) is Character and s.positive and not s.zerowidth: - if s.case_flags != case_flags: - # Different case sensitivity, so flush, unless neither the - # previous nor the new character are cased. - if s.case_flags or is_cased(info, s.value): - Sequence._flush_characters(info, characters, - case_flags, items) - - case_flags = s.case_flags - - characters.append(s.value) - elif type(s) is String or type(s) is Literal: - if s.case_flags != case_flags: - # Different case sensitivity, so flush, unless the neither - # the previous nor the new string are cased. - if s.case_flags or any(is_cased(info, c) for c in - characters): - Sequence._flush_characters(info, characters, - case_flags, items) - - case_flags = s.case_flags - - characters.extend(s.characters) - else: - Sequence._flush_characters(info, characters, case_flags, items) - - items.append(s.pack_characters(info)) - - Sequence._flush_characters(info, characters, case_flags, items) - - return make_sequence(items) - - def remove_captures(self): - self.items = [s.remove_captures() for s in self.items] - return self - - def is_atomic(self): - return all(s.is_atomic() for s in self.items) - - def can_be_affix(self): - return False - - def contains_group(self): - return any(s.contains_group() for s in self.items) - - def get_firstset(self, reverse): - fs = set() - items = self.items - if reverse: - items.reverse() - for s in items: - fs |= s.get_firstset(reverse) - if None not in fs: - return fs - fs.discard(None) - - return fs | set([None]) - - def has_simple_start(self): - return bool(self.items) and self.items[0].has_simple_start() - - def _compile(self, reverse, fuzzy): - seq = self.items - if reverse: - seq = seq[::-1] - - code = [] - for s in seq: - code.extend(s.compile(reverse, fuzzy)) - - return code - - def _dump(self, indent, reverse): - for s in self.items: - s.dump(indent, reverse) - - @staticmethod - def _flush_characters(info, characters, case_flags, items): - if not characters: - return - - # Disregard case_flags if all of the characters are case-less. - if case_flags & IGNORECASE: - if not any(is_cased(info, c) for c in characters): - case_flags = NOCASE - - if len(characters) == 1: - items.append(Character(characters[0], case_flags=case_flags)) - else: - items.append(String(characters, case_flags=case_flags)) - - characters[:] = [] - - def is_empty(self): - return all(i.is_empty() for i in self.items) - - def __eq__(self, other): - return type(self) is type(other) and self.items == other.items - - def max_width(self): - return sum(s.max_width() for s in self.items) - - def get_required_string(self, reverse): - seq = self.items - if reverse: - seq = seq[::-1] - - offset = 0 - - for s in seq: - ofs, req = s.get_required_string(reverse) - offset += ofs - if req: - return offset, req - - return offset, None - -class SetBase(RegexBase): - def __init__(self, info, items, positive=True, case_flags=NOCASE, - zerowidth=False): - RegexBase.__init__(self) - self.info = info - self.items = tuple(items) - self.positive = bool(positive) - self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] - self.zerowidth = bool(zerowidth) - - self.char_width = 1 - - self._key = (self.__class__, self.items, self.positive, - self.case_flags, self.zerowidth) - - def rebuild(self, positive, case_flags, zerowidth): - return type(self)(self.info, self.items, positive, case_flags, - zerowidth).optimise(self.info, False) - - def get_firstset(self, reverse): - return set([self]) - - def has_simple_start(self): - return True - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if self.zerowidth: - flags |= ZEROWIDTH_OP - if fuzzy: - flags |= FUZZY_OP - code = [(self._opcode[self.case_flags, reverse], flags)] - for m in self.items: - code.extend(m.compile()) - - code.append((OP.END, )) - - return code - - def _dump(self, indent, reverse): - print "%s%s %s%s" % (INDENT * indent, self._op_name, - POS_TEXT[self.positive], CASE_TEXT[self.case_flags]) - for i in self.items: - i.dump(indent + 1, reverse) - - def _handle_case_folding(self, info, in_set): - # Is the set case-sensitive? - if not self.positive or not (self.case_flags & IGNORECASE) or in_set: - return self - - # Is full case-folding possible? - if (not (self.info.flags & UNICODE) or (self.case_flags & - FULLIGNORECASE) != FULLIGNORECASE): - return self - - # Get the characters which expand to multiple codepoints on folding. - expanding_chars = _regex.get_expand_on_folding() - - # Get the folded characters in the set. - items = [] - seen = set() - for ch in expanding_chars: - if self.matches(ord(ch)): - folded = _regex.fold_case(FULL_CASE_FOLDING, ch) - if folded not in seen: - items.append(String([ord(c) for c in folded], - case_flags=self.case_flags)) - seen.add(folded) - - if not items: - # We can fall back to simple case-folding. - return self - - return Branch([self] + items) - - def max_width(self): - # Is the set case-sensitive? - if not self.positive or not (self.case_flags & IGNORECASE): - return 1 - - # Is full case-folding possible? - if (not (self.info.flags & UNICODE) or (self.case_flags & - FULLIGNORECASE) != FULLIGNORECASE): - return 1 - - # Get the characters which expand to multiple codepoints on folding. - expanding_chars = _regex.get_expand_on_folding() - - # Get the folded characters in the set. - seen = set() - for ch in expanding_chars: - if self.matches(ord(ch)): - folded = _regex.fold_case(FULL_CASE_FOLDING, ch) - seen.add(folded) - - if not seen: - return 1 - - return max(len(folded) for folded in seen) - -class SetDiff(SetBase): - _opcode = {(NOCASE, False): OP.SET_DIFF, (IGNORECASE, False): - OP.SET_DIFF_IGN, (FULLCASE, False): OP.SET_DIFF, (FULLIGNORECASE, False): - OP.SET_DIFF_IGN, (NOCASE, True): OP.SET_DIFF_REV, (IGNORECASE, True): - OP.SET_DIFF_IGN_REV, (FULLCASE, True): OP.SET_DIFF_REV, (FULLIGNORECASE, - True): OP.SET_DIFF_IGN_REV} - _op_name = "SET_DIFF" - - def optimise(self, info, reverse, in_set=False): - items = self.items - if len(items) > 2: - items = [items[0], SetUnion(info, items[1 : ])] - - if len(items) == 1: - return items[0].with_flags(case_flags=self.case_flags, - zerowidth=self.zerowidth).optimise(info, reverse, in_set) - - self.items = tuple(m.optimise(info, reverse, in_set=True) for m in - items) - - return self._handle_case_folding(info, in_set) - - def matches(self, ch): - m = self.items[0].matches(ch) and not self.items[1].matches(ch) - return m == self.positive - -class SetInter(SetBase): - _opcode = {(NOCASE, False): OP.SET_INTER, (IGNORECASE, False): - OP.SET_INTER_IGN, (FULLCASE, False): OP.SET_INTER, (FULLIGNORECASE, - False): OP.SET_INTER_IGN, (NOCASE, True): OP.SET_INTER_REV, (IGNORECASE, - True): OP.SET_INTER_IGN_REV, (FULLCASE, True): OP.SET_INTER_REV, - (FULLIGNORECASE, True): OP.SET_INTER_IGN_REV} - _op_name = "SET_INTER" - - def optimise(self, info, reverse, in_set=False): - items = [] - for m in self.items: - m = m.optimise(info, reverse, in_set=True) - if isinstance(m, SetInter) and m.positive: - # Intersection in intersection. - items.extend(m.items) - else: - items.append(m) - - if len(items) == 1: - return items[0].with_flags(case_flags=self.case_flags, - zerowidth=self.zerowidth).optimise(info, reverse, in_set) - - self.items = tuple(items) - - return self._handle_case_folding(info, in_set) - - def matches(self, ch): - m = all(i.matches(ch) for i in self.items) - return m == self.positive - -class SetSymDiff(SetBase): - _opcode = {(NOCASE, False): OP.SET_SYM_DIFF, (IGNORECASE, False): - OP.SET_SYM_DIFF_IGN, (FULLCASE, False): OP.SET_SYM_DIFF, (FULLIGNORECASE, - False): OP.SET_SYM_DIFF_IGN, (NOCASE, True): OP.SET_SYM_DIFF_REV, - (IGNORECASE, True): OP.SET_SYM_DIFF_IGN_REV, (FULLCASE, True): - OP.SET_SYM_DIFF_REV, (FULLIGNORECASE, True): OP.SET_SYM_DIFF_IGN_REV} - _op_name = "SET_SYM_DIFF" - - def optimise(self, info, reverse, in_set=False): - items = [] - for m in self.items: - m = m.optimise(info, reverse, in_set=True) - if isinstance(m, SetSymDiff) and m.positive: - # Symmetric difference in symmetric difference. - items.extend(m.items) - else: - items.append(m) - - if len(items) == 1: - return items[0].with_flags(case_flags=self.case_flags, - zerowidth=self.zerowidth).optimise(info, reverse, in_set) - - self.items = tuple(items) - - return self._handle_case_folding(info, in_set) - - def matches(self, ch): - m = False - for i in self.items: - m = m != i.matches(ch) - - return m == self.positive - -class SetUnion(SetBase): - _opcode = {(NOCASE, False): OP.SET_UNION, (IGNORECASE, False): - OP.SET_UNION_IGN, (FULLCASE, False): OP.SET_UNION, (FULLIGNORECASE, - False): OP.SET_UNION_IGN, (NOCASE, True): OP.SET_UNION_REV, (IGNORECASE, - True): OP.SET_UNION_IGN_REV, (FULLCASE, True): OP.SET_UNION_REV, - (FULLIGNORECASE, True): OP.SET_UNION_IGN_REV} - _op_name = "SET_UNION" - - def optimise(self, info, reverse, in_set=False): - items = [] - for m in self.items: - m = m.optimise(info, reverse, in_set=True) - if isinstance(m, SetUnion) and m.positive: - # Union in union. - items.extend(m.items) - else: - items.append(m) - - if len(items) == 1: - i = items[0] - return i.with_flags(positive=i.positive == self.positive, - case_flags=self.case_flags, - zerowidth=self.zerowidth).optimise(info, reverse, in_set) - - self.items = tuple(items) - - return self._handle_case_folding(info, in_set) - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if self.zerowidth: - flags |= ZEROWIDTH_OP - if fuzzy: - flags |= FUZZY_OP - - characters, others = defaultdict(list), [] - for m in self.items: - if isinstance(m, Character): - characters[m.positive].append(m.value) - else: - others.append(m) - - code = [(self._opcode[self.case_flags, reverse], flags)] - - for positive, values in characters.items(): - flags = 0 - if positive: - flags |= POSITIVE_OP - if len(values) == 1: - code.append((OP.CHARACTER, flags, values[0])) - else: - code.append((OP.STRING, flags, len(values)) + tuple(values)) - - for m in others: - code.extend(m.compile()) - - code.append((OP.END, )) - - return code - - def matches(self, ch): - m = any(i.matches(ch) for i in self.items) - return m == self.positive - -class Skip(ZeroWidthBase): - _op_name = "SKIP" - _opcode = OP.SKIP - -class StartOfLine(ZeroWidthBase): - _opcode = OP.START_OF_LINE - _op_name = "START_OF_LINE" - -class StartOfLineU(StartOfLine): - _opcode = OP.START_OF_LINE_U - _op_name = "START_OF_LINE_U" - -class StartOfString(ZeroWidthBase): - _opcode = OP.START_OF_STRING - _op_name = "START_OF_STRING" - -class StartOfWord(ZeroWidthBase): - _opcode = OP.START_OF_WORD - _op_name = "START_OF_WORD" - -class String(RegexBase): - _opcode = {(NOCASE, False): OP.STRING, (IGNORECASE, False): OP.STRING_IGN, - (FULLCASE, False): OP.STRING, (FULLIGNORECASE, False): OP.STRING_FLD, - (NOCASE, True): OP.STRING_REV, (IGNORECASE, True): OP.STRING_IGN_REV, - (FULLCASE, True): OP.STRING_REV, (FULLIGNORECASE, True): - OP.STRING_FLD_REV} - - def __init__(self, characters, case_flags=NOCASE): - self.characters = tuple(characters) - self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] - - if (self.case_flags & FULLIGNORECASE) == FULLIGNORECASE: - folded_characters = [] - for char in self.characters: - folded = _regex.fold_case(FULL_CASE_FOLDING, unichr(char)) - folded_characters.extend(ord(c) for c in folded) - else: - folded_characters = self.characters - - self.folded_characters = tuple(folded_characters) - self.required = False - - self._key = self.__class__, self.characters, self.case_flags - - def get_firstset(self, reverse): - if reverse: - pos = -1 - else: - pos = 0 - return set([Character(self.characters[pos], - case_flags=self.case_flags)]) - - def has_simple_start(self): - return True - - def _compile(self, reverse, fuzzy): - flags = 0 - if fuzzy: - flags |= FUZZY_OP - if self.required: - flags |= REQUIRED_OP - return [(self._opcode[self.case_flags, reverse], flags, - len(self.folded_characters)) + self.folded_characters] - - def _dump(self, indent, reverse): - display = repr("".join(unichr(c) for c in self.characters)).lstrip("bu") - print "%sSTRING %s%s" % (INDENT * indent, display, - CASE_TEXT[self.case_flags]) - - def max_width(self): - return len(self.folded_characters) - - def get_required_string(self, reverse): - return 0, self - -class Literal(String): - def _dump(self, indent, reverse): - for c in self.characters: - display = repr(unichr(c)).lstrip("bu") - print "%sCHARACTER MATCH %s%s" % (INDENT * indent, display, - CASE_TEXT[self.case_flags]) - -class StringSet(RegexBase): - _opcode = {(NOCASE, False): OP.STRING_SET, (IGNORECASE, False): - OP.STRING_SET_IGN, (FULLCASE, False): OP.STRING_SET, (FULLIGNORECASE, - False): OP.STRING_SET_FLD, (NOCASE, True): OP.STRING_SET_REV, - (IGNORECASE, True): OP.STRING_SET_IGN_REV, (FULLCASE, True): - OP.STRING_SET_REV, (FULLIGNORECASE, True): OP.STRING_SET_FLD_REV} - - def __init__(self, info, name, case_flags=NOCASE): - self.info = info - self.name = name - self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] - - self._key = self.__class__, self.name, self.case_flags - - self.set_key = (name, self.case_flags) - if self.set_key not in info.named_lists_used: - info.named_lists_used[self.set_key] = len(info.named_lists_used) - - def _compile(self, reverse, fuzzy): - index = self.info.named_lists_used[self.set_key] - items = self.info.kwargs[self.name] - - case_flags = self.case_flags - - if not items: - return [] - - encoding = self.info.flags & _ALL_ENCODINGS - fold_flags = encoding | case_flags - - if fuzzy: - choices = [self._folded(fold_flags, i) for i in items] - - # Sort from longest to shortest. - choices.sort(key=lambda s: (-len(s), s)) - - branches = [] - for string in choices: - branches.append(Sequence([Character(c, case_flags=case_flags) - for c in string])) - - if len(branches) > 1: - branch = Branch(branches) - else: - branch = branches[0] - branch = branch.optimise(self.info, - reverse).pack_characters(self.info) - - return branch.compile(reverse, fuzzy) - else: - min_len = min(len(i) for i in items) - max_len = max(len(self._folded(fold_flags, i)) for i in items) - return [(self._opcode[case_flags, reverse], index, min_len, - max_len)] - - def _dump(self, indent, reverse): - print "%sSTRING_SET %s%s" % (INDENT * indent, self.name, - CASE_TEXT[self.case_flags]) - - def _folded(self, fold_flags, item): - if isinstance(item, unicode): - return [ord(c) for c in _regex.fold_case(fold_flags, item)] - else: - return [ord(c) for c in item] - - def _flatten(self, s): - # Flattens the branches. - if isinstance(s, Branch): - for b in s.branches: - self._flatten(b) - elif isinstance(s, Sequence) and s.items: - seq = s.items - - while isinstance(seq[-1], Sequence): - seq[-1 : ] = seq[-1].items - - n = 0 - while n < len(seq) and isinstance(seq[n], Character): - n += 1 - - if n > 1: - seq[ : n] = [String([c.value for c in seq[ : n]], - case_flags=self.case_flags)] - - self._flatten(seq[-1]) - - def max_width(self): - if not self.info.kwargs[self.name]: - return 0 - - if self.case_flags & IGNORECASE: - fold_flags = (self.info.flags & _ALL_ENCODINGS) | self.case_flags - return max(len(_regex.fold_case(fold_flags, i)) for i in - self.info.kwargs[self.name]) - else: - return max(len(i) for i in self.info.kwargs[self.name]) - -class Source(object): - "Scanner for the regular expression source string." - def __init__(self, string): - if isinstance(string, unicode): - self.string = string - self.char_type = unichr - else: - self.string = string - self.char_type = chr - - self.pos = 0 - self.ignore_space = False - self.sep = string[ : 0] - - def get(self): - string = self.string - pos = self.pos - - try: - if self.ignore_space: - while True: - if string[pos].isspace(): - # Skip over the whitespace. - pos += 1 - elif string[pos] == "#": - # Skip over the comment to the end of the line. - pos = string.index("\n", pos) - else: - break - - ch = string[pos] - self.pos = pos + 1 - return ch - except IndexError: - # We've reached the end of the string. - self.pos = pos - return string[ : 0] - except ValueError: - # The comment extended to the end of the string. - self.pos = len(string) - return string[ : 0] - - def get_many(self, count=1): - string = self.string - pos = self.pos - - try: - if self.ignore_space: - substring = [] - - while len(substring) < count: - while True: - if string[pos].isspace(): - # Skip over the whitespace. - pos += 1 - elif string[pos] == "#": - # Skip over the comment to the end of the line. - pos = string.index("\n", pos) - else: - break - - substring.append(string[pos]) - pos += 1 - - substring = "".join(substring) - else: - substring = string[pos : pos + count] - pos += len(substring) - - self.pos = pos - return substring - except IndexError: - # We've reached the end of the string. - self.pos = len(string) - return "".join(substring) - except ValueError: - # The comment extended to the end of the string. - self.pos = len(string) - return "".join(substring) - - def get_while(self, test_set, include=True): - string = self.string - pos = self.pos - - if self.ignore_space: - try: - substring = [] - - while True: - if string[pos].isspace(): - # Skip over the whitespace. - pos += 1 - elif string[pos] == "#": - # Skip over the comment to the end of the line. - pos = string.index("\n", pos) - elif (string[pos] in test_set) == include: - substring.append(string[pos]) - pos += 1 - else: - break - - self.pos = pos - except IndexError: - # We've reached the end of the string. - self.pos = len(string) - except ValueError: - # The comment extended to the end of the string. - self.pos = len(string) - - return "".join(substring) - else: - try: - while (string[pos] in test_set) == include: - pos += 1 - - substring = string[self.pos : pos] - - self.pos = pos - - return substring - except IndexError: - # We've reached the end of the string. - substring = string[self.pos : pos] - - self.pos = pos - - return substring - - def skip_while(self, test_set, include=True): - string = self.string - pos = self.pos - - try: - if self.ignore_space: - while True: - if string[pos].isspace(): - # Skip over the whitespace. - pos += 1 - elif string[pos] == "#": - # Skip over the comment to the end of the line. - pos = string.index("\n", pos) - elif (string[pos] in test_set) == include: - pos += 1 - else: - break - else: - while (string[pos] in test_set) == include: - pos += 1 - - self.pos = pos - except IndexError: - # We've reached the end of the string. - self.pos = len(string) - except ValueError: - # The comment extended to the end of the string. - self.pos = len(string) - - def match(self, substring): - string = self.string - pos = self.pos - - if self.ignore_space: - try: - for c in substring: - while True: - if string[pos].isspace(): - # Skip over the whitespace. - pos += 1 - elif string[pos] == "#": - # Skip over the comment to the end of the line. - pos = string.index("\n", pos) - else: - break - - if string[pos] != c: - return False - - pos += 1 - - self.pos = pos - - return True - except IndexError: - # We've reached the end of the string. - return False - except ValueError: - # The comment extended to the end of the string. - return False - else: - if not string.startswith(substring, pos): - return False - - self.pos = pos + len(substring) - - return True - - def expect(self, substring): - if not self.match(substring): - raise error("missing %s" % substring, self.string, self.pos) - - def at_end(self): - string = self.string - pos = self.pos - - try: - if self.ignore_space: - while True: - if string[pos].isspace(): - pos += 1 - elif string[pos] == "#": - pos = string.index("\n", pos) - else: - break - - return pos >= len(string) - except IndexError: - # We've reached the end of the string. - return True - except ValueError: - # The comment extended to the end of the string. - return True - -class Info(object): - "Info about the regular expression." - - def __init__(self, flags=0, char_type=None, kwargs={}): - flags |= DEFAULT_FLAGS[(flags & _ALL_VERSIONS) or DEFAULT_VERSION] - self.flags = flags - self.global_flags = flags - self.inline_locale = False - - self.kwargs = kwargs - - self.group_count = 0 - self.group_index = {} - self.group_name = {} - self.char_type = char_type - self.named_lists_used = {} - self.open_groups = [] - self.open_group_count = {} - self.defined_groups = {} - self.group_calls = [] - self.private_groups = {} - - def open_group(self, name=None): - group = self.group_index.get(name) - if group is None: - while True: - self.group_count += 1 - if name is None or self.group_count not in self.group_name: - break - - group = self.group_count - if name: - self.group_index[name] = group - self.group_name[group] = name - - if group in self.open_groups: - # We have a nested named group. We'll assign it a private group - # number, initially negative until we can assign a proper - # (positive) number. - group_alias = -(len(self.private_groups) + 1) - self.private_groups[group_alias] = group - group = group_alias - - self.open_groups.append(group) - self.open_group_count[group] = self.open_group_count.get(group, 0) + 1 - - return group - - def close_group(self): - self.open_groups.pop() - - def is_open_group(self, name): - # In version 1, a group reference can refer to an open group. We'll - # just pretend the group isn't open. - version = (self.flags & _ALL_VERSIONS) or DEFAULT_VERSION - if version == VERSION1: - return False - - if name.isdigit(): - group = int(name) - else: - group = self.group_index.get(name) - - return group in self.open_groups - -def _check_group_features(info, parsed): - """Checks whether the reverse and fuzzy features of the group calls match - the groups which they call. - """ - call_refs = {} - additional_groups = [] - for call, reverse, fuzzy in info.group_calls: - # Look up the reference of this group call. - key = (call.group, reverse, fuzzy) - ref = call_refs.get(key) - if ref is None: - # This group doesn't have a reference yet, so look up its features. - if call.group == 0: - # Calling the pattern as a whole. - rev = bool(info.flags & REVERSE) - fuz = isinstance(parsed, Fuzzy) - if (rev, fuz) != (reverse, fuzzy): - # The pattern as a whole doesn't have the features we want, - # so we'll need to make a copy of it with the desired - # features. - additional_groups.append((CallRef(len(call_refs), parsed), - reverse, fuzzy)) - else: - # Calling a capture group. - def_info = info.defined_groups[call.group] - group = def_info[0] - if def_info[1 : ] != (reverse, fuzzy): - # The group doesn't have the features we want, so we'll - # need to make a copy of it with the desired features. - additional_groups.append((group, reverse, fuzzy)) - - ref = len(call_refs) - call_refs[key] = ref - - call.call_ref = ref - - info.call_refs = call_refs - info.additional_groups = additional_groups - -def _get_required_string(parsed, flags): - "Gets the required string and related info of a parsed pattern." - - req_offset, required = parsed.get_required_string(bool(flags & REVERSE)) - if required: - required.required = True - if req_offset >= UNLIMITED: - req_offset = -1 - - req_flags = required.case_flags - if not (flags & UNICODE): - req_flags &= ~UNICODE - - req_chars = required.folded_characters - else: - req_offset = 0 - req_chars = () - req_flags = 0 - - return req_offset, req_chars, req_flags - -class Scanner: - def __init__(self, lexicon, flags=0): - self.lexicon = lexicon - - # Combine phrases into a compound pattern. - patterns = [] - for phrase, action in lexicon: - # Parse the regular expression. - source = Source(phrase) - info = Info(flags, source.char_type) - source.ignore_space = bool(info.flags & VERBOSE) - parsed = _parse_pattern(source, info) - if not source.at_end(): - raise error("unbalanced parenthesis", source.string, - source.pos) - - # We want to forbid capture groups within each phrase. - patterns.append(parsed.remove_captures()) - - # Combine all the subpatterns into one pattern. - info = Info(flags) - patterns = [Group(info, g + 1, p) for g, p in enumerate(patterns)] - parsed = Branch(patterns) - - # Optimise the compound pattern. - reverse = bool(info.flags & REVERSE) - parsed = parsed.optimise(info, reverse) - parsed = parsed.pack_characters(info) - - # Get the required string. - req_offset, req_chars, req_flags = _get_required_string(parsed, - info.flags) - - # Check the features of the groups. - _check_group_features(info, parsed) - - # Complain if there are any group calls. They are not supported by the - # Scanner class. - if info.call_refs: - raise error("recursive regex not supported by Scanner", - source.string, source.pos) - - reverse = bool(info.flags & REVERSE) - - # Compile the compound pattern. The result is a list of tuples. - code = parsed.compile(reverse) + [(OP.SUCCESS, )] - - # Flatten the code into a list of ints. - code = _flatten_code(code) - - if not parsed.has_simple_start(): - # Get the first set, if possible. - try: - fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) - fs_code = _flatten_code(fs_code) - code = fs_code + code - except _FirstSetError: - pass - - # Check the global flags for conflicts. - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - if version not in (0, VERSION0, VERSION1): - raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") - - # Create the PatternObject. - # - # Local flags like IGNORECASE affect the code generation, but aren't - # needed by the PatternObject itself. Conversely, global flags like - # LOCALE _don't_ affect the code generation but _are_ needed by the - # PatternObject. - self.scanner = _regex.compile(None, (flags & GLOBAL_FLAGS) | version, - code, {}, {}, {}, [], req_offset, req_chars, req_flags, - len(patterns)) - - def scan(self, string): - result = [] - append = result.append - match = self.scanner.scanner(string).match - i = 0 - while True: - m = match() - if not m: - break - j = m.end() - if i == j: - break - action = self.lexicon[m.lastindex - 1][1] - if hasattr(action, '__call__'): - self.match = m - action = action(self, m.group()) - if action is not None: - append(action) - i = j - - return result, string[i : ] - -# Get the known properties dict. -PROPERTIES = _regex.get_properties() - -# Build the inverse of the properties dict. -PROPERTY_NAMES = {} -for prop_name, (prop_id, values) in PROPERTIES.items(): - name, prop_values = PROPERTY_NAMES.get(prop_id, ("", {})) - name = max(name, prop_name, key=len) - PROPERTY_NAMES[prop_id] = name, prop_values - - for val_name, val_id in values.items(): - prop_values[val_id] = max(prop_values.get(val_id, ""), val_name, - key=len) - -# Character escape sequences. -CHARACTER_ESCAPES = { - "a": "\a", - "b": "\b", - "f": "\f", - "n": "\n", - "r": "\r", - "t": "\t", - "v": "\v", -} - -# Predefined character set escape sequences. -CHARSET_ESCAPES = { - "d": lookup_property(None, "Digit", True), - "D": lookup_property(None, "Digit", False), - "s": lookup_property(None, "Space", True), - "S": lookup_property(None, "Space", False), - "w": lookup_property(None, "Word", True), - "W": lookup_property(None, "Word", False), -} - -# Positional escape sequences. -POSITION_ESCAPES = { - "A": StartOfString(), - "b": Boundary(), - "B": Boundary(False), - "K": Keep(), - "m": StartOfWord(), - "M": EndOfWord(), - "Z": EndOfString(), -} - -# Positional escape sequences when WORD flag set. -WORD_POSITION_ESCAPES = dict(POSITION_ESCAPES) -WORD_POSITION_ESCAPES.update({ - "b": DefaultBoundary(), - "B": DefaultBoundary(False), - "m": DefaultStartOfWord(), - "M": DefaultEndOfWord(), -}) - -# Regex control verbs. -VERBS = { - "FAIL": Failure(), - "F": Failure(), - "PRUNE": Prune(), - "SKIP": Skip(), -} diff --git a/src/regex/_regex_unicode.c b/src/regex/_regex_unicode.c deleted file mode 100644 index f470005756..0000000000 --- a/src/regex/_regex_unicode.c +++ /dev/null @@ -1,14789 +0,0 @@ -/* For Unicode version 9.0.0 */ - -#include "_regex_unicode.h" - -#define RE_BLANK_MASK ((1 << RE_PROP_ZL) | (1 << RE_PROP_ZP)) -#define RE_GRAPH_MASK ((1 << RE_PROP_CC) | (1 << RE_PROP_CS) | (1 << RE_PROP_CN)) -#define RE_WORD_MASK (RE_PROP_M_MASK | (1 << RE_PROP_ND) | (1 << RE_PROP_PC)) - -typedef struct RE_AllCases { - RE_INT32 diffs[RE_MAX_CASES - 1]; -} RE_AllCases; - -typedef struct RE_FullCaseFolding { - RE_INT32 diff; - RE_UINT16 codepoints[RE_MAX_FOLDED - 1]; -} RE_FullCaseFolding; - -/* strings. */ - -char* re_strings[] = { - "-1/2", - "0", - "1", - "1/10", - "1/12", - "1/16", - "1/160", - "1/2", - "1/20", - "1/3", - "1/4", - "1/40", - "1/5", - "1/6", - "1/7", - "1/8", - "1/9", - "10", - "100", - "1000", - "10000", - "100000", - "1000000", - "100000000", - "10000000000", - "1000000000000", - "103", - "107", - "11", - "11/12", - "11/2", - "118", - "12", - "122", - "129", - "13", - "13/2", - "130", - "132", - "133", - "14", - "15", - "15/2", - "16", - "17", - "17/2", - "18", - "19", - "2", - "2/3", - "2/5", - "20", - "200", - "2000", - "20000", - "200000", - "202", - "21", - "214", - "216", - "216000", - "218", - "22", - "220", - "222", - "224", - "226", - "228", - "23", - "230", - "232", - "233", - "234", - "24", - "240", - "25", - "26", - "27", - "28", - "29", - "3", - "3/16", - "3/2", - "3/20", - "3/4", - "3/5", - "3/8", - "3/80", - "30", - "300", - "3000", - "30000", - "300000", - "31", - "32", - "33", - "34", - "35", - "36", - "37", - "38", - "39", - "4", - "4/5", - "40", - "400", - "4000", - "40000", - "400000", - "41", - "42", - "43", - "432000", - "44", - "45", - "46", - "47", - "48", - "49", - "5", - "5/12", - "5/2", - "5/6", - "5/8", - "50", - "500", - "5000", - "50000", - "500000", - "6", - "60", - "600", - "6000", - "60000", - "600000", - "7", - "7/12", - "7/2", - "7/8", - "70", - "700", - "7000", - "70000", - "700000", - "8", - "80", - "800", - "8000", - "80000", - "800000", - "84", - "9", - "9/2", - "90", - "900", - "9000", - "90000", - "900000", - "91", - "A", - "ABOVE", - "ABOVELEFT", - "ABOVERIGHT", - "ADLAM", - "ADLM", - "AEGEANNUMBERS", - "AFRICANFEH", - "AFRICANNOON", - "AFRICANQAF", - "AGHB", - "AHEX", - "AHOM", - "AI", - "AIN", - "AL", - "ALAPH", - "ALCHEMICAL", - "ALCHEMICALSYMBOLS", - "ALEF", - "ALETTER", - "ALNUM", - "ALPHA", - "ALPHABETIC", - "ALPHABETICPF", - "ALPHABETICPRESENTATIONFORMS", - "ALPHANUMERIC", - "AMBIGUOUS", - "AN", - "ANATOLIANHIEROGLYPHS", - "ANCIENTGREEKMUSIC", - "ANCIENTGREEKMUSICALNOTATION", - "ANCIENTGREEKNUMBERS", - "ANCIENTSYMBOLS", - "ANY", - "AR", - "ARAB", - "ARABIC", - "ARABICEXTA", - "ARABICEXTENDEDA", - "ARABICLETTER", - "ARABICMATH", - "ARABICMATHEMATICALALPHABETICSYMBOLS", - "ARABICNUMBER", - "ARABICPFA", - "ARABICPFB", - "ARABICPRESENTATIONFORMSA", - "ARABICPRESENTATIONFORMSB", - "ARABICSUP", - "ARABICSUPPLEMENT", - "ARMENIAN", - "ARMI", - "ARMN", - "ARROWS", - "ASCII", - "ASCIIHEXDIGIT", - "ASSIGNED", - "AT", - "ATA", - "ATAR", - "ATB", - "ATBL", - "ATERM", - "ATTACHEDABOVE", - "ATTACHEDABOVERIGHT", - "ATTACHEDBELOW", - "ATTACHEDBELOWLEFT", - "AVAGRAHA", - "AVESTAN", - "AVST", - "B", - "B2", - "BA", - "BALI", - "BALINESE", - "BAMU", - "BAMUM", - "BAMUMSUP", - "BAMUMSUPPLEMENT", - "BASICLATIN", - "BASS", - "BASSAVAH", - "BATAK", - "BATK", - "BB", - "BC", - "BEH", - "BELOW", - "BELOWLEFT", - "BELOWRIGHT", - "BENG", - "BENGALI", - "BETH", - "BHAIKSUKI", - "BHKS", - "BIDIC", - "BIDICLASS", - "BIDICONTROL", - "BIDIM", - "BIDIMIRRORED", - "BINDU", - "BK", - "BL", - "BLANK", - "BLK", - "BLOCK", - "BLOCKELEMENTS", - "BN", - "BOPO", - "BOPOMOFO", - "BOPOMOFOEXT", - "BOPOMOFOEXTENDED", - "BOTTOM", - "BOTTOMANDRIGHT", - "BOUNDARYNEUTRAL", - "BOXDRAWING", - "BR", - "BRAH", - "BRAHMI", - "BRAHMIJOININGNUMBER", - "BRAI", - "BRAILLE", - "BRAILLEPATTERNS", - "BREAKAFTER", - "BREAKBEFORE", - "BREAKBOTH", - "BREAKSYMBOLS", - "BUGI", - "BUGINESE", - "BUHD", - "BUHID", - "BURUSHASKIYEHBARREE", - "BYZANTINEMUSIC", - "BYZANTINEMUSICALSYMBOLS", - "C", - "C&", - "CAKM", - "CAN", - "CANADIANABORIGINAL", - "CANADIANSYLLABICS", - "CANONICAL", - "CANONICALCOMBININGCLASS", - "CANS", - "CANTILLATIONMARK", - "CARI", - "CARIAN", - "CARRIAGERETURN", - "CASED", - "CASEDLETTER", - "CASEIGNORABLE", - "CAUCASIANALBANIAN", - "CB", - "CC", - "CCC", - "CCC10", - "CCC103", - "CCC107", - "CCC11", - "CCC118", - "CCC12", - "CCC122", - "CCC129", - "CCC13", - "CCC130", - "CCC132", - "CCC133", - "CCC14", - "CCC15", - "CCC16", - "CCC17", - "CCC18", - "CCC19", - "CCC20", - "CCC21", - "CCC22", - "CCC23", - "CCC24", - "CCC25", - "CCC26", - "CCC27", - "CCC28", - "CCC29", - "CCC30", - "CCC31", - "CCC32", - "CCC33", - "CCC34", - "CCC35", - "CCC36", - "CCC84", - "CCC91", - "CF", - "CHAKMA", - "CHAM", - "CHANGESWHENCASEFOLDED", - "CHANGESWHENCASEMAPPED", - "CHANGESWHENLOWERCASED", - "CHANGESWHENTITLECASED", - "CHANGESWHENUPPERCASED", - "CHER", - "CHEROKEE", - "CHEROKEESUP", - "CHEROKEESUPPLEMENT", - "CI", - "CIRCLE", - "CJ", - "CJK", - "CJKCOMPAT", - "CJKCOMPATFORMS", - "CJKCOMPATIBILITY", - "CJKCOMPATIBILITYFORMS", - "CJKCOMPATIBILITYIDEOGRAPHS", - "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT", - "CJKCOMPATIDEOGRAPHS", - "CJKCOMPATIDEOGRAPHSSUP", - "CJKEXTA", - "CJKEXTB", - "CJKEXTC", - "CJKEXTD", - "CJKEXTE", - "CJKRADICALSSUP", - "CJKRADICALSSUPPLEMENT", - "CJKSTROKES", - "CJKSYMBOLS", - "CJKSYMBOLSANDPUNCTUATION", - "CJKUNIFIEDIDEOGRAPHS", - "CJKUNIFIEDIDEOGRAPHSEXTENSIONA", - "CJKUNIFIEDIDEOGRAPHSEXTENSIONB", - "CJKUNIFIEDIDEOGRAPHSEXTENSIONC", - "CJKUNIFIEDIDEOGRAPHSEXTENSIOND", - "CJKUNIFIEDIDEOGRAPHSEXTENSIONE", - "CL", - "CLOSE", - "CLOSEPARENTHESIS", - "CLOSEPUNCTUATION", - "CM", - "CN", - "CNTRL", - "CO", - "COM", - "COMBININGDIACRITICALMARKS", - "COMBININGDIACRITICALMARKSEXTENDED", - "COMBININGDIACRITICALMARKSFORSYMBOLS", - "COMBININGDIACRITICALMARKSSUPPLEMENT", - "COMBININGHALFMARKS", - "COMBININGMARK", - "COMBININGMARKSFORSYMBOLS", - "COMMON", - "COMMONINDICNUMBERFORMS", - "COMMONSEPARATOR", - "COMPAT", - "COMPATJAMO", - "COMPLEXCONTEXT", - "CONDITIONALJAPANESESTARTER", - "CONNECTORPUNCTUATION", - "CONSONANT", - "CONSONANTDEAD", - "CONSONANTFINAL", - "CONSONANTHEADLETTER", - "CONSONANTKILLER", - "CONSONANTMEDIAL", - "CONSONANTPLACEHOLDER", - "CONSONANTPRECEDINGREPHA", - "CONSONANTPREFIXED", - "CONSONANTSUBJOINED", - "CONSONANTSUCCEEDINGREPHA", - "CONSONANTWITHSTACKER", - "CONTINGENTBREAK", - "CONTROL", - "CONTROLPICTURES", - "COPT", - "COPTIC", - "COPTICEPACTNUMBERS", - "COUNTINGROD", - "COUNTINGRODNUMERALS", - "CP", - "CPRT", - "CR", - "CS", - "CUNEIFORM", - "CUNEIFORMNUMBERS", - "CUNEIFORMNUMBERSANDPUNCTUATION", - "CURRENCYSYMBOL", - "CURRENCYSYMBOLS", - "CWCF", - "CWCM", - "CWL", - "CWT", - "CWU", - "CYPRIOT", - "CYPRIOTSYLLABARY", - "CYRILLIC", - "CYRILLICEXTA", - "CYRILLICEXTB", - "CYRILLICEXTC", - "CYRILLICEXTENDEDA", - "CYRILLICEXTENDEDB", - "CYRILLICEXTENDEDC", - "CYRILLICSUP", - "CYRILLICSUPPLEMENT", - "CYRILLICSUPPLEMENTARY", - "CYRL", - "D", - "DA", - "DAL", - "DALATHRISH", - "DASH", - "DASHPUNCTUATION", - "DB", - "DE", - "DECIMAL", - "DECIMALNUMBER", - "DECOMPOSITIONTYPE", - "DEFAULTIGNORABLECODEPOINT", - "DEP", - "DEPRECATED", - "DESERET", - "DEVA", - "DEVANAGARI", - "DEVANAGARIEXT", - "DEVANAGARIEXTENDED", - "DI", - "DIA", - "DIACRITIC", - "DIACRITICALS", - "DIACRITICALSEXT", - "DIACRITICALSFORSYMBOLS", - "DIACRITICALSSUP", - "DIGIT", - "DINGBATS", - "DOMINO", - "DOMINOTILES", - "DOUBLEABOVE", - "DOUBLEBELOW", - "DOUBLEQUOTE", - "DQ", - "DSRT", - "DT", - "DUALJOINING", - "DUPL", - "DUPLOYAN", - "E", - "EA", - "EARLYDYNASTICCUNEIFORM", - "EASTASIANWIDTH", - "EB", - "EBASE", - "EBASEGAZ", - "EBG", - "EGYP", - "EGYPTIANHIEROGLYPHS", - "ELBA", - "ELBASAN", - "EM", - "EMODIFIER", - "EMOTICONS", - "EN", - "ENC", - "ENCLOSEDALPHANUM", - "ENCLOSEDALPHANUMERICS", - "ENCLOSEDALPHANUMERICSUPPLEMENT", - "ENCLOSEDALPHANUMSUP", - "ENCLOSEDCJK", - "ENCLOSEDCJKLETTERSANDMONTHS", - "ENCLOSEDIDEOGRAPHICSUP", - "ENCLOSEDIDEOGRAPHICSUPPLEMENT", - "ENCLOSINGMARK", - "ES", - "ET", - "ETHI", - "ETHIOPIC", - "ETHIOPICEXT", - "ETHIOPICEXTA", - "ETHIOPICEXTENDED", - "ETHIOPICEXTENDEDA", - "ETHIOPICSUP", - "ETHIOPICSUPPLEMENT", - "EUROPEANNUMBER", - "EUROPEANSEPARATOR", - "EUROPEANTERMINATOR", - "EX", - "EXCLAMATION", - "EXT", - "EXTEND", - "EXTENDER", - "EXTENDNUMLET", - "F", - "FALSE", - "FARSIYEH", - "FE", - "FEH", - "FIN", - "FINAL", - "FINALPUNCTUATION", - "FINALSEMKATH", - "FIRSTSTRONGISOLATE", - "FO", - "FONT", - "FORMAT", - "FRA", - "FRACTION", - "FSI", - "FULLWIDTH", - "GAF", - "GAMAL", - "GAZ", - "GC", - "GCB", - "GEMINATIONMARK", - "GENERALCATEGORY", - "GENERALPUNCTUATION", - "GEOMETRICSHAPES", - "GEOMETRICSHAPESEXT", - "GEOMETRICSHAPESEXTENDED", - "GEOR", - "GEORGIAN", - "GEORGIANSUP", - "GEORGIANSUPPLEMENT", - "GL", - "GLAG", - "GLAGOLITIC", - "GLAGOLITICSUP", - "GLAGOLITICSUPPLEMENT", - "GLUE", - "GLUEAFTERZWJ", - "GOTH", - "GOTHIC", - "GRAN", - "GRANTHA", - "GRAPH", - "GRAPHEMEBASE", - "GRAPHEMECLUSTERBREAK", - "GRAPHEMEEXTEND", - "GRAPHEMELINK", - "GRBASE", - "GREEK", - "GREEKANDCOPTIC", - "GREEKEXT", - "GREEKEXTENDED", - "GREK", - "GREXT", - "GRLINK", - "GUJARATI", - "GUJR", - "GURMUKHI", - "GURU", - "H", - "H2", - "H3", - "HAH", - "HALFANDFULLFORMS", - "HALFMARKS", - "HALFWIDTH", - "HALFWIDTHANDFULLWIDTHFORMS", - "HAMZAONHEHGOAL", - "HAN", - "HANG", - "HANGUL", - "HANGULCOMPATIBILITYJAMO", - "HANGULJAMO", - "HANGULJAMOEXTENDEDA", - "HANGULJAMOEXTENDEDB", - "HANGULSYLLABLES", - "HANGULSYLLABLETYPE", - "HANI", - "HANO", - "HANUNOO", - "HATR", - "HATRAN", - "HE", - "HEBR", - "HEBREW", - "HEBREWLETTER", - "HEH", - "HEHGOAL", - "HETH", - "HEX", - "HEXDIGIT", - "HIGHPRIVATEUSESURROGATES", - "HIGHPUSURROGATES", - "HIGHSURROGATES", - "HIRA", - "HIRAGANA", - "HL", - "HLUW", - "HMNG", - "HRKT", - "HST", - "HUNG", - "HY", - "HYPHEN", - "ID", - "IDC", - "IDCONTINUE", - "IDEO", - "IDEOGRAPHIC", - "IDEOGRAPHICDESCRIPTIONCHARACTERS", - "IDEOGRAPHICSYMBOLS", - "IDEOGRAPHICSYMBOLSANDPUNCTUATION", - "IDS", - "IDSB", - "IDSBINARYOPERATOR", - "IDST", - "IDSTART", - "IDSTRINARYOPERATOR", - "IMPERIALARAMAIC", - "IN", - "INDICNUMBERFORMS", - "INDICPOSITIONALCATEGORY", - "INDICSYLLABICCATEGORY", - "INFIXNUMERIC", - "INHERITED", - "INIT", - "INITIAL", - "INITIALPUNCTUATION", - "INPC", - "INSC", - "INSCRIPTIONALPAHLAVI", - "INSCRIPTIONALPARTHIAN", - "INSEPARABLE", - "INSEPERABLE", - "INVISIBLESTACKER", - "IOTASUBSCRIPT", - "IPAEXT", - "IPAEXTENSIONS", - "IS", - "ISO", - "ISOLATED", - "ITAL", - "JAMO", - "JAMOEXTA", - "JAMOEXTB", - "JAVA", - "JAVANESE", - "JG", - "JL", - "JOINC", - "JOINCAUSING", - "JOINCONTROL", - "JOINER", - "JOININGGROUP", - "JOININGTYPE", - "JT", - "JV", - "KA", - "KAF", - "KAITHI", - "KALI", - "KANA", - "KANASUP", - "KANASUPPLEMENT", - "KANAVOICING", - "KANBUN", - "KANGXI", - "KANGXIRADICALS", - "KANNADA", - "KAPH", - "KATAKANA", - "KATAKANAEXT", - "KATAKANAORHIRAGANA", - "KATAKANAPHONETICEXTENSIONS", - "KAYAHLI", - "KHAPH", - "KHAR", - "KHAROSHTHI", - "KHMER", - "KHMERSYMBOLS", - "KHMR", - "KHOJ", - "KHOJKI", - "KHUDAWADI", - "KNDA", - "KNOTTEDHEH", - "KTHI", - "KV", - "L", - "L&", - "LAM", - "LAMADH", - "LANA", - "LAO", - "LAOO", - "LATIN", - "LATIN1", - "LATIN1SUP", - "LATIN1SUPPLEMENT", - "LATINEXTA", - "LATINEXTADDITIONAL", - "LATINEXTB", - "LATINEXTC", - "LATINEXTD", - "LATINEXTE", - "LATINEXTENDEDA", - "LATINEXTENDEDADDITIONAL", - "LATINEXTENDEDB", - "LATINEXTENDEDC", - "LATINEXTENDEDD", - "LATINEXTENDEDE", - "LATN", - "LB", - "LC", - "LE", - "LEADINGJAMO", - "LEFT", - "LEFTANDRIGHT", - "LEFTJOINING", - "LEFTTORIGHT", - "LEFTTORIGHTEMBEDDING", - "LEFTTORIGHTISOLATE", - "LEFTTORIGHTOVERRIDE", - "LEPC", - "LEPCHA", - "LETTER", - "LETTERLIKESYMBOLS", - "LETTERNUMBER", - "LF", - "LIMB", - "LIMBU", - "LINA", - "LINB", - "LINEARA", - "LINEARB", - "LINEARBIDEOGRAMS", - "LINEARBSYLLABARY", - "LINEBREAK", - "LINEFEED", - "LINESEPARATOR", - "LISU", - "LL", - "LM", - "LO", - "LOE", - "LOGICALORDEREXCEPTION", - "LOWER", - "LOWERCASE", - "LOWERCASELETTER", - "LOWSURROGATES", - "LRE", - "LRI", - "LRO", - "LT", - "LU", - "LV", - "LVSYLLABLE", - "LVT", - "LVTSYLLABLE", - "LYCI", - "LYCIAN", - "LYDI", - "LYDIAN", - "M", - "M&", - "MAHAJANI", - "MAHJ", - "MAHJONG", - "MAHJONGTILES", - "MALAYALAM", - "MAND", - "MANDAIC", - "MANDATORYBREAK", - "MANI", - "MANICHAEAN", - "MANICHAEANALEPH", - "MANICHAEANAYIN", - "MANICHAEANBETH", - "MANICHAEANDALETH", - "MANICHAEANDHAMEDH", - "MANICHAEANFIVE", - "MANICHAEANGIMEL", - "MANICHAEANHETH", - "MANICHAEANHUNDRED", - "MANICHAEANKAPH", - "MANICHAEANLAMEDH", - "MANICHAEANMEM", - "MANICHAEANNUN", - "MANICHAEANONE", - "MANICHAEANPE", - "MANICHAEANQOPH", - "MANICHAEANRESH", - "MANICHAEANSADHE", - "MANICHAEANSAMEKH", - "MANICHAEANTAW", - "MANICHAEANTEN", - "MANICHAEANTETH", - "MANICHAEANTHAMEDH", - "MANICHAEANTWENTY", - "MANICHAEANWAW", - "MANICHAEANYODH", - "MANICHAEANZAYIN", - "MARC", - "MARCHEN", - "MARK", - "MATH", - "MATHALPHANUM", - "MATHEMATICALALPHANUMERICSYMBOLS", - "MATHEMATICALOPERATORS", - "MATHOPERATORS", - "MATHSYMBOL", - "MB", - "MC", - "ME", - "MED", - "MEDIAL", - "MEEM", - "MEETEIMAYEK", - "MEETEIMAYEKEXT", - "MEETEIMAYEKEXTENSIONS", - "MEND", - "MENDEKIKAKUI", - "MERC", - "MERO", - "MEROITICCURSIVE", - "MEROITICHIEROGLYPHS", - "MIAO", - "MIDLETTER", - "MIDNUM", - "MIDNUMLET", - "MIM", - "MISCARROWS", - "MISCELLANEOUSMATHEMATICALSYMBOLSA", - "MISCELLANEOUSMATHEMATICALSYMBOLSB", - "MISCELLANEOUSSYMBOLS", - "MISCELLANEOUSSYMBOLSANDARROWS", - "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS", - "MISCELLANEOUSTECHNICAL", - "MISCMATHSYMBOLSA", - "MISCMATHSYMBOLSB", - "MISCPICTOGRAPHS", - "MISCSYMBOLS", - "MISCTECHNICAL", - "ML", - "MLYM", - "MN", - "MODI", - "MODIFIERLETTER", - "MODIFIERLETTERS", - "MODIFIERSYMBOL", - "MODIFIERTONELETTERS", - "MODIFYINGLETTER", - "MONG", - "MONGOLIAN", - "MONGOLIANSUP", - "MONGOLIANSUPPLEMENT", - "MRO", - "MROO", - "MTEI", - "MULT", - "MULTANI", - "MUSIC", - "MUSICALSYMBOLS", - "MYANMAR", - "MYANMAREXTA", - "MYANMAREXTB", - "MYANMAREXTENDEDA", - "MYANMAREXTENDEDB", - "MYMR", - "N", - "N&", - "NA", - "NABATAEAN", - "NAN", - "NAR", - "NARB", - "NARROW", - "NB", - "NBAT", - "NCHAR", - "ND", - "NEUTRAL", - "NEWA", - "NEWLINE", - "NEWTAILUE", - "NEXTLINE", - "NK", - "NKO", - "NKOO", - "NL", - "NO", - "NOBLOCK", - "NOBREAK", - "NOJOININGGROUP", - "NONCHARACTERCODEPOINT", - "NONE", - "NONJOINER", - "NONJOINING", - "NONSPACINGMARK", - "NONSTARTER", - "NOON", - "NOTAPPLICABLE", - "NOTREORDERED", - "NR", - "NS", - "NSM", - "NT", - "NU", - "NUKTA", - "NUMBER", - "NUMBERFORMS", - "NUMBERJOINER", - "NUMERIC", - "NUMERICTYPE", - "NUMERICVALUE", - "NUN", - "NV", - "NYA", - "OALPHA", - "OCR", - "ODI", - "OGAM", - "OGHAM", - "OGREXT", - "OIDC", - "OIDS", - "OLCHIKI", - "OLCK", - "OLDHUNGARIAN", - "OLDITALIC", - "OLDNORTHARABIAN", - "OLDPERMIC", - "OLDPERSIAN", - "OLDSOUTHARABIAN", - "OLDTURKIC", - "OLETTER", - "OLOWER", - "OMATH", - "ON", - "OP", - "OPENPUNCTUATION", - "OPTICALCHARACTERRECOGNITION", - "ORIYA", - "ORKH", - "ORNAMENTALDINGBATS", - "ORYA", - "OSAGE", - "OSGE", - "OSMA", - "OSMANYA", - "OTHER", - "OTHERALPHABETIC", - "OTHERDEFAULTIGNORABLECODEPOINT", - "OTHERGRAPHEMEEXTEND", - "OTHERIDCONTINUE", - "OTHERIDSTART", - "OTHERLETTER", - "OTHERLOWERCASE", - "OTHERMATH", - "OTHERNEUTRAL", - "OTHERNUMBER", - "OTHERPUNCTUATION", - "OTHERSYMBOL", - "OTHERUPPERCASE", - "OUPPER", - "OV", - "OVERLAY", - "OVERSTRUCK", - "P", - "P&", - "PAHAWHHMONG", - "PALM", - "PALMYRENE", - "PARAGRAPHSEPARATOR", - "PATSYN", - "PATTERNSYNTAX", - "PATTERNWHITESPACE", - "PATWS", - "PAUC", - "PAUCINHAU", - "PC", - "PCM", - "PD", - "PDF", - "PDI", - "PE", - "PERM", - "PF", - "PHAG", - "PHAGSPA", - "PHAISTOS", - "PHAISTOSDISC", - "PHLI", - "PHLP", - "PHNX", - "PHOENICIAN", - "PHONETICEXT", - "PHONETICEXTENSIONS", - "PHONETICEXTENSIONSSUPPLEMENT", - "PHONETICEXTSUP", - "PI", - "PLAYINGCARDS", - "PLRD", - "PO", - "POPDIRECTIONALFORMAT", - "POPDIRECTIONALISOLATE", - "POSIXALNUM", - "POSIXDIGIT", - "POSIXPUNCT", - "POSIXXDIGIT", - "POSTFIXNUMERIC", - "PP", - "PR", - "PREFIXNUMERIC", - "PREPEND", - "PREPENDEDCONCATENATIONMARK", - "PRINT", - "PRIVATEUSE", - "PRIVATEUSEAREA", - "PRTI", - "PS", - "PSALTERPAHLAVI", - "PUA", - "PUNCT", - "PUNCTUATION", - "PUREKILLER", - "QAAC", - "QAAI", - "QAF", - "QAPH", - "QMARK", - "QU", - "QUOTATION", - "QUOTATIONMARK", - "R", - "RADICAL", - "REGIONALINDICATOR", - "REGISTERSHIFTER", - "REH", - "REJANG", - "REVERSEDPE", - "RI", - "RIGHT", - "RIGHTJOINING", - "RIGHTTOLEFT", - "RIGHTTOLEFTEMBEDDING", - "RIGHTTOLEFTISOLATE", - "RIGHTTOLEFTOVERRIDE", - "RJNG", - "RLE", - "RLI", - "RLO", - "ROHINGYAYEH", - "RUMI", - "RUMINUMERALSYMBOLS", - "RUNIC", - "RUNR", - "S", - "S&", - "SA", - "SAD", - "SADHE", - "SAMARITAN", - "SAMR", - "SARB", - "SAUR", - "SAURASHTRA", - "SB", - "SC", - "SCONTINUE", - "SCRIPT", - "SD", - "SE", - "SEEN", - "SEGMENTSEPARATOR", - "SEMKATH", - "SENTENCEBREAK", - "SENTENCETERMINAL", - "SEP", - "SEPARATOR", - "SG", - "SGNW", - "SHARADA", - "SHAVIAN", - "SHAW", - "SHIN", - "SHORTHANDFORMATCONTROLS", - "SHRD", - "SIDD", - "SIDDHAM", - "SIGNWRITING", - "SIND", - "SINGLEQUOTE", - "SINH", - "SINHALA", - "SINHALAARCHAICNUMBERS", - "SK", - "SM", - "SMALL", - "SMALLFORMS", - "SMALLFORMVARIANTS", - "SML", - "SO", - "SOFTDOTTED", - "SORA", - "SORASOMPENG", - "SP", - "SPACE", - "SPACESEPARATOR", - "SPACINGMARK", - "SPACINGMODIFIERLETTERS", - "SPECIALS", - "SQ", - "SQR", - "SQUARE", - "ST", - "STERM", - "STRAIGHTWAW", - "SUB", - "SUND", - "SUNDANESE", - "SUNDANESESUP", - "SUNDANESESUPPLEMENT", - "SUP", - "SUPARROWSA", - "SUPARROWSB", - "SUPARROWSC", - "SUPER", - "SUPERANDSUB", - "SUPERSCRIPTSANDSUBSCRIPTS", - "SUPMATHOPERATORS", - "SUPPLEMENTALARROWSA", - "SUPPLEMENTALARROWSB", - "SUPPLEMENTALARROWSC", - "SUPPLEMENTALMATHEMATICALOPERATORS", - "SUPPLEMENTALPUNCTUATION", - "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS", - "SUPPLEMENTARYPRIVATEUSEAREAA", - "SUPPLEMENTARYPRIVATEUSEAREAB", - "SUPPUAA", - "SUPPUAB", - "SUPPUNCTUATION", - "SUPSYMBOLSANDPICTOGRAPHS", - "SURROGATE", - "SUTTONSIGNWRITING", - "SWASHKAF", - "SY", - "SYLLABLEMODIFIER", - "SYLO", - "SYLOTINAGRI", - "SYMBOL", - "SYRC", - "SYRIAC", - "SYRIACWAW", - "T", - "TAGALOG", - "TAGB", - "TAGBANWA", - "TAGS", - "TAH", - "TAILE", - "TAITHAM", - "TAIVIET", - "TAIXUANJING", - "TAIXUANJINGSYMBOLS", - "TAKR", - "TAKRI", - "TALE", - "TALU", - "TAMIL", - "TAML", - "TANG", - "TANGUT", - "TANGUTCOMPONENTS", - "TAVT", - "TAW", - "TEHMARBUTA", - "TEHMARBUTAGOAL", - "TELU", - "TELUGU", - "TERM", - "TERMINALPUNCTUATION", - "TETH", - "TFNG", - "TGLG", - "THAA", - "THAANA", - "THAI", - "TIBETAN", - "TIBT", - "TIFINAGH", - "TIRH", - "TIRHUTA", - "TITLECASELETTER", - "TONELETTER", - "TONEMARK", - "TOP", - "TOPANDBOTTOM", - "TOPANDBOTTOMANDRIGHT", - "TOPANDLEFT", - "TOPANDLEFTANDRIGHT", - "TOPANDRIGHT", - "TRAILINGJAMO", - "TRANSPARENT", - "TRANSPORTANDMAP", - "TRANSPORTANDMAPSYMBOLS", - "TRUE", - "U", - "UCAS", - "UCASEXT", - "UGAR", - "UGARITIC", - "UIDEO", - "UNASSIGNED", - "UNIFIEDCANADIANABORIGINALSYLLABICS", - "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED", - "UNIFIEDIDEOGRAPH", - "UNKNOWN", - "UP", - "UPPER", - "UPPERCASE", - "UPPERCASELETTER", - "V", - "VAI", - "VAII", - "VARIATIONSELECTOR", - "VARIATIONSELECTORS", - "VARIATIONSELECTORSSUPPLEMENT", - "VEDICEXT", - "VEDICEXTENSIONS", - "VERT", - "VERTICAL", - "VERTICALFORMS", - "VIRAMA", - "VISARGA", - "VISUALORDERLEFT", - "VOWEL", - "VOWELDEPENDENT", - "VOWELINDEPENDENT", - "VOWELJAMO", - "VR", - "VS", - "VSSUP", - "W", - "WARA", - "WARANGCITI", - "WAW", - "WB", - "WHITESPACE", - "WIDE", - "WJ", - "WORD", - "WORDBREAK", - "WORDJOINER", - "WS", - "WSPACE", - "XDIGIT", - "XIDC", - "XIDCONTINUE", - "XIDS", - "XIDSTART", - "XPEO", - "XSUX", - "XX", - "Y", - "YEH", - "YEHBARREE", - "YEHWITHTAIL", - "YES", - "YI", - "YIII", - "YIJING", - "YIJINGHEXAGRAMSYMBOLS", - "YIRADICALS", - "YISYLLABLES", - "YUDH", - "YUDHHE", - "Z", - "Z&", - "ZAIN", - "ZHAIN", - "ZINH", - "ZL", - "ZP", - "ZS", - "ZW", - "ZWJ", - "ZWSPACE", - "ZYYY", - "ZZZZ", -}; - -/* strings: 12639 bytes. */ - -/* properties. */ - -RE_Property re_properties[] = { - { 568, 0, 0}, - { 565, 0, 0}, - { 264, 1, 1}, - { 263, 1, 1}, - {1116, 2, 2}, - {1114, 2, 2}, - {1298, 3, 3}, - {1293, 3, 3}, - { 590, 4, 4}, - { 566, 4, 4}, - {1122, 5, 5}, - {1113, 5, 5}, - { 851, 6, 6}, - { 182, 7, 6}, - { 181, 7, 6}, - { 793, 8, 6}, - { 792, 8, 6}, - {1266, 9, 6}, - {1265, 9, 6}, - { 306, 10, 6}, - { 308, 11, 6}, - { 362, 11, 6}, - { 355, 12, 6}, - { 445, 12, 6}, - { 357, 13, 6}, - { 447, 13, 6}, - { 356, 14, 6}, - { 446, 14, 6}, - { 353, 15, 6}, - { 443, 15, 6}, - { 354, 16, 6}, - { 444, 16, 6}, - { 662, 17, 6}, - { 658, 17, 6}, - { 652, 18, 6}, - { 651, 18, 6}, - {1306, 19, 6}, - {1305, 19, 6}, - {1304, 20, 6}, - {1303, 20, 6}, - { 472, 21, 6}, - { 480, 21, 6}, - { 591, 22, 6}, - { 599, 22, 6}, - { 589, 23, 6}, - { 593, 23, 6}, - { 592, 24, 6}, - { 600, 24, 6}, - {1294, 25, 6}, - {1301, 25, 6}, - {1153, 25, 6}, - { 256, 26, 6}, - { 254, 26, 6}, - { 697, 27, 6}, - { 695, 27, 6}, - { 465, 28, 6}, - { 649, 29, 6}, - {1079, 30, 6}, - {1076, 30, 6}, - {1227, 31, 6}, - {1226, 31, 6}, - {1004, 32, 6}, - { 983, 32, 6}, - { 636, 33, 6}, - { 635, 33, 6}, - { 214, 34, 6}, - { 170, 34, 6}, - { 997, 35, 6}, - { 964, 35, 6}, - { 654, 36, 6}, - { 653, 36, 6}, - { 482, 37, 6}, - { 481, 37, 6}, - { 543, 38, 6}, - { 541, 38, 6}, - {1003, 39, 6}, - { 982, 39, 6}, - {1009, 40, 6}, - {1010, 40, 6}, - { 940, 41, 6}, - { 925, 41, 6}, - { 999, 42, 6}, - { 969, 42, 6}, - { 660, 43, 6}, - { 659, 43, 6}, - { 663, 44, 6}, - { 661, 44, 6}, - {1081, 45, 6}, - {1262, 46, 6}, - {1258, 46, 6}, - { 998, 47, 6}, - { 966, 47, 6}, - { 474, 48, 6}, - { 473, 48, 6}, - {1149, 49, 6}, - {1117, 49, 6}, - { 791, 50, 6}, - { 790, 50, 6}, - {1001, 51, 6}, - { 971, 51, 6}, - {1000, 52, 6}, - { 970, 52, 6}, - {1123, 53, 6}, - {1162, 53, 6}, - {1271, 54, 6}, - {1287, 54, 6}, - {1022, 55, 6}, - {1023, 55, 6}, - {1021, 56, 6}, - {1020, 56, 6}, - {1061, 57, 6}, - {1027, 57, 6}, - { 622, 58, 7}, - { 646, 58, 7}, - { 255, 59, 8}, - { 244, 59, 8}, - { 300, 60, 9}, - { 312, 60, 9}, - { 471, 61, 10}, - { 496, 61, 10}, - { 503, 62, 11}, - { 501, 62, 11}, - { 699, 63, 12}, - { 693, 63, 12}, - { 700, 64, 13}, - { 701, 64, 13}, - { 783, 65, 14}, - { 758, 65, 14}, - { 959, 66, 15}, - { 952, 66, 15}, - { 960, 67, 16}, - { 962, 67, 16}, - { 258, 68, 6}, - { 257, 68, 6}, - { 667, 69, 17}, - { 674, 69, 17}, - { 668, 70, 18}, - { 675, 70, 18}, - { 185, 71, 6}, - { 180, 71, 6}, - { 193, 72, 6}, - { 262, 73, 6}, - { 588, 74, 6}, - {1062, 75, 6}, - {1297, 76, 6}, - {1302, 77, 6}, - {1053, 78, 6}, - {1052, 79, 6}, - {1054, 80, 6}, - {1055, 81, 6}, -}; - -/* properties: 600 bytes. */ - -/* property values. */ - -RE_PropertyValue re_property_values[] = { - {1259, 0, 0}, - { 395, 0, 0}, - {1267, 0, 1}, - { 800, 0, 1}, - { 794, 0, 2}, - { 787, 0, 2}, - {1239, 0, 3}, - { 799, 0, 3}, - { 893, 0, 4}, - { 788, 0, 4}, - {1002, 0, 5}, - { 789, 0, 5}, - { 944, 0, 6}, - { 891, 0, 6}, - { 525, 0, 7}, - { 859, 0, 7}, - {1155, 0, 8}, - { 858, 0, 8}, - { 470, 0, 9}, - { 926, 0, 9}, - { 487, 0, 9}, - { 773, 0, 10}, - { 935, 0, 10}, - {1006, 0, 11}, - { 936, 0, 11}, - {1154, 0, 12}, - {1330, 0, 12}, - { 785, 0, 13}, - {1328, 0, 13}, - {1019, 0, 14}, - {1329, 0, 14}, - { 427, 0, 15}, - { 311, 0, 15}, - { 396, 0, 15}, - { 557, 0, 16}, - { 350, 0, 16}, - {1063, 0, 17}, - { 397, 0, 17}, - {1189, 0, 18}, - { 437, 0, 18}, - { 466, 0, 19}, - {1028, 0, 19}, - { 986, 0, 20}, - {1066, 0, 20}, - { 393, 0, 21}, - {1031, 0, 21}, - { 413, 0, 22}, - {1026, 0, 22}, - {1007, 0, 23}, - {1049, 0, 23}, - { 856, 0, 24}, - {1143, 0, 24}, - { 441, 0, 25}, - {1114, 0, 25}, - { 895, 0, 26}, - {1142, 0, 26}, - {1008, 0, 27}, - {1148, 0, 27}, - { 673, 0, 28}, - {1046, 0, 28}, - { 552, 0, 29}, - {1033, 0, 29}, - { 996, 0, 30}, - { 293, 0, 30}, - { 294, 0, 30}, - { 771, 0, 31}, - { 734, 0, 31}, - { 735, 0, 31}, - { 850, 0, 32}, - { 809, 0, 32}, - { 404, 0, 32}, - { 810, 0, 32}, - { 955, 0, 33}, - { 915, 0, 33}, - { 916, 0, 33}, - {1070, 0, 34}, - {1014, 0, 34}, - {1069, 0, 34}, - {1015, 0, 34}, - {1196, 0, 35}, - {1103, 0, 35}, - {1104, 0, 35}, - {1125, 0, 36}, - {1323, 0, 36}, - {1324, 0, 36}, - { 307, 0, 37}, - { 759, 0, 37}, - { 215, 0, 38}, - { 937, 1, 0}, - { 923, 1, 0}, - { 238, 1, 1}, - { 213, 1, 1}, - { 744, 1, 2}, - { 743, 1, 2}, - { 742, 1, 2}, - { 751, 1, 3}, - { 745, 1, 3}, - { 753, 1, 4}, - { 747, 1, 4}, - { 683, 1, 5}, - { 682, 1, 5}, - {1156, 1, 6}, - { 894, 1, 6}, - { 399, 1, 7}, - { 483, 1, 7}, - { 595, 1, 8}, - { 594, 1, 8}, - { 450, 1, 9}, - { 458, 1, 10}, - { 457, 1, 10}, - { 459, 1, 10}, - { 209, 1, 11}, - { 630, 1, 12}, - { 196, 1, 13}, - {1198, 1, 14}, - { 208, 1, 15}, - { 207, 1, 15}, - {1232, 1, 16}, - { 933, 1, 17}, - {1108, 1, 18}, - { 817, 1, 19}, - { 198, 1, 20}, - { 197, 1, 20}, - { 477, 1, 21}, - { 250, 1, 22}, - { 603, 1, 23}, - { 601, 1, 24}, - { 988, 1, 25}, - {1215, 1, 26}, - {1225, 1, 27}, - { 714, 1, 28}, - { 815, 1, 29}, - {1140, 1, 30}, - {1233, 1, 31}, - { 739, 1, 32}, - {1234, 1, 33}, - { 909, 1, 34}, - { 574, 1, 35}, - { 618, 1, 36}, - { 688, 1, 36}, - { 529, 1, 37}, - { 535, 1, 38}, - { 534, 1, 38}, - { 359, 1, 39}, - {1260, 1, 40}, - {1254, 1, 40}, - { 298, 1, 40}, - { 968, 1, 41}, - {1101, 1, 42}, - {1201, 1, 43}, - { 625, 1, 44}, - { 289, 1, 45}, - {1203, 1, 46}, - { 724, 1, 47}, - { 899, 1, 48}, - {1261, 1, 49}, - {1255, 1, 49}, - { 776, 1, 50}, - {1206, 1, 51}, - { 930, 1, 52}, - { 725, 1, 53}, - { 287, 1, 54}, - {1207, 1, 55}, - { 400, 1, 56}, - { 484, 1, 56}, - { 233, 1, 57}, - {1166, 1, 58}, - { 241, 1, 59}, - { 770, 1, 60}, - { 972, 1, 61}, - { 456, 1, 62}, - { 453, 1, 62}, - {1168, 1, 63}, - {1167, 1, 63}, - {1275, 1, 64}, - {1274, 1, 64}, - {1043, 1, 65}, - {1042, 1, 65}, - {1044, 1, 66}, - {1045, 1, 66}, - { 402, 1, 67}, - { 486, 1, 67}, - { 752, 1, 68}, - { 746, 1, 68}, - { 597, 1, 69}, - { 596, 1, 69}, - { 569, 1, 70}, - {1070, 1, 70}, - {1175, 1, 71}, - {1174, 1, 71}, - { 442, 1, 72}, - { 401, 1, 73}, - { 485, 1, 73}, - { 405, 1, 73}, - { 772, 1, 74}, - { 956, 1, 75}, - { 212, 1, 76}, - { 854, 1, 77}, - { 855, 1, 77}, - { 883, 1, 78}, - { 888, 1, 78}, - { 428, 1, 79}, - { 987, 1, 80}, - { 965, 1, 80}, - { 518, 1, 81}, - { 517, 1, 81}, - { 274, 1, 82}, - { 265, 1, 83}, - { 570, 1, 84}, - { 880, 1, 85}, - { 887, 1, 85}, - { 488, 1, 86}, - { 878, 1, 87}, - { 884, 1, 87}, - {1177, 1, 88}, - {1170, 1, 88}, - { 281, 1, 89}, - { 280, 1, 89}, - {1178, 1, 90}, - {1171, 1, 90}, - { 879, 1, 91}, - { 885, 1, 91}, - {1180, 1, 92}, - {1176, 1, 92}, - { 881, 1, 93}, - { 877, 1, 93}, - { 579, 1, 94}, - { 754, 1, 95}, - { 748, 1, 95}, - { 430, 1, 96}, - { 576, 1, 97}, - { 575, 1, 97}, - {1236, 1, 98}, - { 532, 1, 99}, - { 530, 1, 99}, - { 454, 1, 100}, - { 451, 1, 100}, - {1181, 1, 101}, - {1187, 1, 101}, - { 380, 1, 102}, - { 379, 1, 102}, - { 713, 1, 103}, - { 712, 1, 103}, - { 655, 1, 104}, - { 651, 1, 104}, - { 383, 1, 105}, - { 382, 1, 105}, - { 641, 1, 106}, - { 716, 1, 107}, - { 268, 1, 108}, - { 617, 1, 109}, - { 410, 1, 109}, - { 711, 1, 110}, - { 270, 1, 111}, - { 269, 1, 111}, - { 381, 1, 112}, - { 719, 1, 113}, - { 717, 1, 113}, - { 522, 1, 114}, - { 521, 1, 114}, - { 368, 1, 115}, - { 366, 1, 115}, - { 385, 1, 116}, - { 374, 1, 116}, - {1318, 1, 117}, - {1317, 1, 117}, - { 384, 1, 118}, - { 365, 1, 118}, - {1320, 1, 119}, - {1319, 1, 120}, - { 786, 1, 121}, - {1269, 1, 122}, - { 455, 1, 123}, - { 452, 1, 123}, - { 235, 1, 124}, - { 896, 1, 125}, - { 755, 1, 126}, - { 749, 1, 126}, - {1195, 1, 127}, - { 407, 1, 128}, - { 666, 1, 128}, - {1035, 1, 129}, - {1112, 1, 130}, - { 479, 1, 131}, - { 478, 1, 131}, - { 720, 1, 132}, - {1085, 1, 133}, - { 619, 1, 134}, - { 689, 1, 134}, - { 692, 1, 135}, - { 913, 1, 136}, - { 911, 1, 136}, - { 352, 1, 137}, - { 912, 1, 138}, - { 910, 1, 138}, - {1208, 1, 139}, - { 865, 1, 140}, - { 864, 1, 140}, - { 533, 1, 141}, - { 531, 1, 141}, - { 756, 1, 142}, - { 750, 1, 142}, - { 361, 1, 143}, - { 360, 1, 143}, - { 863, 1, 144}, - { 621, 1, 145}, - { 616, 1, 145}, - { 620, 1, 146}, - { 690, 1, 146}, - { 639, 1, 147}, - { 637, 1, 148}, - { 638, 1, 148}, - { 795, 1, 149}, - {1064, 1, 150}, - {1068, 1, 150}, - {1063, 1, 150}, - { 370, 1, 151}, - { 372, 1, 151}, - { 184, 1, 152}, - { 183, 1, 152}, - { 205, 1, 153}, - { 203, 1, 153}, - {1272, 1, 154}, - {1287, 1, 154}, - {1278, 1, 155}, - { 403, 1, 156}, - { 610, 1, 156}, - { 369, 1, 157}, - { 367, 1, 157}, - {1146, 1, 158}, - {1145, 1, 158}, - { 206, 1, 159}, - { 204, 1, 159}, - { 612, 1, 160}, - { 609, 1, 160}, - {1157, 1, 161}, - { 782, 1, 162}, - { 781, 1, 163}, - { 165, 1, 164}, - { 191, 1, 165}, - { 192, 1, 166}, - {1037, 1, 167}, - {1036, 1, 167}, - { 806, 1, 168}, - { 304, 1, 169}, - { 431, 1, 170}, - { 975, 1, 171}, - { 585, 1, 172}, - { 977, 1, 173}, - {1257, 1, 174}, - { 978, 1, 175}, - { 475, 1, 176}, - {1129, 1, 177}, - { 995, 1, 178}, - { 992, 1, 179}, - { 511, 1, 180}, - { 309, 1, 181}, - { 779, 1, 182}, - { 449, 1, 183}, - { 664, 1, 184}, - {1018, 1, 185}, - { 918, 1, 186}, - { 627, 1, 187}, - {1041, 1, 188}, - { 808, 1, 189}, - { 871, 1, 190}, - { 870, 1, 191}, - { 723, 1, 192}, - { 979, 1, 193}, - { 976, 1, 194}, - { 820, 1, 195}, - { 227, 1, 196}, - { 677, 1, 197}, - { 676, 1, 198}, - {1067, 1, 199}, - { 980, 1, 200}, - { 974, 1, 201}, - {1100, 1, 202}, - {1099, 1, 202}, - { 277, 1, 203}, - { 705, 1, 204}, - {1151, 1, 205}, - { 351, 1, 206}, - { 811, 1, 207}, - {1128, 1, 208}, - {1141, 1, 209}, - { 728, 1, 210}, - { 906, 1, 211}, - { 729, 1, 212}, - { 587, 1, 213}, - { 928, 1, 214}, - {1238, 1, 215}, - {1135, 1, 216}, - { 892, 1, 217}, - { 901, 1, 218}, - { 900, 1, 218}, - {1212, 1, 219}, - { 171, 1, 220}, - {1291, 1, 221}, - {1025, 1, 222}, - { 252, 1, 223}, - { 849, 1, 224}, - { 438, 1, 225}, - { 440, 1, 226}, - { 439, 1, 226}, - { 502, 1, 227}, - { 509, 1, 228}, - { 188, 1, 229}, - { 237, 1, 230}, - { 236, 1, 230}, - { 902, 1, 231}, - { 240, 1, 232}, - {1016, 1, 233}, - { 872, 1, 234}, - { 657, 1, 235}, - { 656, 1, 235}, - {1218, 1, 236}, - {1219, 1, 237}, - { 709, 1, 238}, - { 708, 1, 238}, - { 499, 1, 239}, - {1132, 1, 240}, - { 292, 1, 241}, - { 291, 1, 241}, - { 908, 1, 242}, - { 907, 1, 242}, - { 190, 1, 243}, - { 189, 1, 243}, - {1210, 1, 244}, - {1209, 1, 244}, - { 433, 1, 245}, - { 432, 1, 245}, - { 853, 1, 246}, - { 852, 1, 246}, - {1190, 1, 247}, - { 581, 1, 248}, - { 580, 1, 248}, - { 867, 1, 249}, - { 163, 1, 250}, - { 201, 1, 251}, - { 200, 1, 251}, - { 814, 1, 252}, - { 813, 1, 252}, - { 490, 1, 253}, - { 489, 1, 253}, - {1047, 1, 254}, - { 519, 1, 255}, - { 520, 1, 255}, - { 524, 1, 256}, - { 523, 1, 256}, - { 882, 1, 257}, - { 886, 1, 257}, - { 514, 1, 258}, - { 990, 1, 259}, - {1251, 1, 260}, - {1250, 1, 260}, - { 177, 1, 261}, - { 176, 1, 261}, - { 572, 1, 262}, - { 571, 1, 262}, - {1179, 1, 263}, - {1172, 1, 263}, - {1182, 1, 264}, - {1188, 1, 264}, - { 386, 1, 265}, - { 375, 1, 265}, - { 387, 1, 266}, - { 376, 1, 266}, - { 388, 1, 267}, - { 377, 1, 267}, - { 389, 1, 268}, - { 378, 1, 268}, - { 371, 1, 269}, - { 373, 1, 269}, - {1204, 1, 270}, - {1273, 1, 271}, - {1288, 1, 271}, - {1183, 1, 272}, - {1185, 1, 272}, - {1184, 1, 273}, - {1186, 1, 273}, - {1263, 2, 0}, - {1335, 2, 0}, - { 406, 2, 1}, - {1334, 2, 1}, - { 741, 2, 2}, - { 757, 2, 2}, - { 594, 2, 3}, - { 598, 2, 3}, - { 450, 2, 4}, - { 460, 2, 4}, - { 209, 2, 5}, - { 211, 2, 5}, - { 630, 2, 6}, - { 629, 2, 6}, - { 196, 2, 7}, - { 195, 2, 7}, - {1198, 2, 8}, - {1197, 2, 8}, - {1232, 2, 9}, - {1231, 2, 9}, - { 477, 2, 10}, - { 476, 2, 10}, - { 250, 2, 11}, - { 249, 2, 11}, - { 603, 2, 12}, - { 604, 2, 12}, - { 601, 2, 13}, - { 602, 2, 13}, - { 988, 2, 14}, - { 991, 2, 14}, - {1215, 2, 15}, - {1216, 2, 15}, - {1225, 2, 16}, - {1224, 2, 16}, - { 714, 2, 17}, - { 730, 2, 17}, - { 815, 2, 18}, - { 890, 2, 18}, - {1140, 2, 19}, - {1139, 2, 19}, - {1233, 2, 20}, - { 739, 2, 21}, - { 740, 2, 21}, - {1234, 2, 22}, - {1235, 2, 22}, - { 909, 2, 23}, - { 914, 2, 23}, - { 574, 2, 24}, - { 573, 2, 24}, - { 616, 2, 25}, - { 615, 2, 25}, - { 529, 2, 26}, - { 528, 2, 26}, - { 359, 2, 27}, - { 358, 2, 27}, - { 297, 2, 28}, - { 301, 2, 28}, - { 968, 2, 29}, - { 967, 2, 29}, - {1101, 2, 30}, - {1102, 2, 30}, - { 724, 2, 31}, - { 726, 2, 31}, - { 899, 2, 32}, - { 898, 2, 32}, - { 641, 2, 33}, - { 640, 2, 33}, - { 716, 2, 34}, - { 707, 2, 34}, - { 268, 2, 35}, - { 267, 2, 35}, - { 614, 2, 36}, - { 623, 2, 36}, - {1315, 2, 37}, - {1316, 2, 37}, - { 975, 2, 38}, - { 687, 2, 38}, - { 585, 2, 39}, - { 584, 2, 39}, - { 475, 2, 40}, - { 495, 2, 40}, - { 670, 2, 41}, - {1327, 2, 41}, - {1073, 2, 41}, - {1201, 2, 42}, - {1230, 2, 42}, - { 625, 2, 43}, - { 624, 2, 43}, - { 289, 2, 44}, - { 288, 2, 44}, - {1203, 2, 45}, - {1202, 2, 45}, - { 776, 2, 46}, - { 775, 2, 46}, - {1206, 2, 47}, - {1213, 2, 47}, - { 780, 2, 48}, - { 778, 2, 48}, - {1257, 2, 49}, - {1256, 2, 49}, - {1129, 2, 50}, - {1130, 2, 50}, - { 995, 2, 51}, - { 994, 2, 51}, - { 448, 2, 52}, - { 435, 2, 52}, - { 280, 2, 53}, - { 279, 2, 53}, - { 287, 2, 54}, - { 286, 2, 54}, - { 430, 2, 55}, - { 429, 2, 55}, - {1072, 2, 55}, - { 930, 2, 56}, - {1214, 2, 56}, - { 579, 2, 57}, - { 578, 2, 57}, - {1236, 2, 58}, - {1229, 2, 58}, - {1195, 2, 59}, - {1194, 2, 59}, - { 978, 2, 60}, - {1307, 2, 60}, - { 723, 2, 61}, - { 722, 2, 61}, - { 233, 2, 62}, - { 232, 2, 62}, - { 438, 2, 63}, - {1308, 2, 63}, - {1041, 2, 64}, - {1040, 2, 64}, - {1035, 2, 65}, - {1034, 2, 65}, - { 933, 2, 66}, - { 934, 2, 66}, - {1166, 2, 67}, - {1165, 2, 67}, - { 770, 2, 68}, - { 769, 2, 68}, - { 972, 2, 69}, - { 973, 2, 69}, - {1269, 2, 70}, - {1270, 2, 70}, - {1112, 2, 71}, - {1111, 2, 71}, - { 720, 2, 72}, - { 706, 2, 72}, - {1085, 2, 73}, - {1094, 2, 73}, - { 806, 2, 74}, - { 805, 2, 74}, - { 304, 2, 75}, - { 303, 2, 75}, - { 808, 2, 76}, - { 807, 2, 76}, - { 352, 2, 77}, - {1207, 2, 78}, - { 738, 2, 78}, - {1208, 2, 79}, - {1220, 2, 79}, - { 227, 2, 80}, - { 228, 2, 80}, - { 509, 2, 81}, - { 508, 2, 81}, - {1108, 2, 82}, - {1109, 2, 82}, - { 786, 2, 83}, - { 235, 2, 84}, - { 234, 2, 84}, - { 692, 2, 85}, - { 691, 2, 85}, - { 863, 2, 86}, - { 904, 2, 86}, - { 664, 2, 87}, - { 210, 2, 87}, - { 979, 2, 88}, - {1110, 2, 88}, - { 677, 2, 89}, - {1065, 2, 89}, - { 676, 2, 90}, - {1038, 2, 90}, - { 980, 2, 91}, - { 989, 2, 91}, - { 705, 2, 92}, - { 732, 2, 92}, - { 241, 2, 93}, - { 242, 2, 93}, - { 277, 2, 94}, - { 276, 2, 94}, - { 817, 2, 95}, - { 816, 2, 95}, - { 351, 2, 96}, - { 295, 2, 96}, - { 870, 2, 97}, - { 868, 2, 97}, - { 871, 2, 98}, - { 869, 2, 98}, - { 872, 2, 99}, - {1048, 2, 99}, - {1128, 2, 100}, - {1133, 2, 100}, - {1151, 2, 101}, - {1150, 2, 101}, - {1212, 2, 102}, - {1211, 2, 102}, - { 309, 2, 103}, - { 169, 2, 103}, - { 240, 2, 104}, - { 239, 2, 104}, - { 499, 2, 105}, - { 498, 2, 105}, - { 511, 2, 106}, - { 510, 2, 106}, - { 587, 2, 107}, - { 586, 2, 107}, - {1016, 2, 108}, - { 644, 2, 108}, - { 728, 2, 109}, - { 727, 2, 109}, - { 779, 2, 110}, - { 777, 2, 110}, - { 811, 2, 111}, - { 812, 2, 111}, - { 820, 2, 112}, - { 819, 2, 112}, - { 867, 2, 113}, - { 866, 2, 113}, - { 892, 2, 114}, - { 902, 2, 115}, - { 903, 2, 115}, - { 976, 2, 116}, - { 921, 2, 116}, - { 918, 2, 117}, - { 924, 2, 117}, - {1018, 2, 118}, - {1017, 2, 118}, - {1025, 2, 119}, - {1024, 2, 119}, - { 977, 2, 120}, - {1032, 2, 120}, - {1067, 2, 121}, - {1039, 2, 121}, - {1135, 2, 122}, - {1134, 2, 122}, - { 729, 2, 123}, - {1137, 2, 123}, - {1238, 2, 124}, - {1237, 2, 124}, - {1291, 2, 125}, - {1290, 2, 125}, - { 171, 2, 126}, - { 188, 2, 127}, - { 643, 2, 127}, - { 627, 2, 128}, - { 626, 2, 128}, - { 906, 2, 129}, - { 905, 2, 129}, - { 974, 2, 130}, - { 647, 2, 130}, - {1136, 2, 131}, - {1127, 2, 131}, - { 163, 2, 132}, - { 164, 2, 132}, - { 252, 2, 133}, - { 253, 2, 133}, - { 849, 2, 134}, - { 848, 2, 134}, - { 928, 2, 135}, - { 992, 2, 136}, - { 993, 2, 136}, - {1218, 2, 137}, - {1217, 2, 137}, - { 718, 2, 138}, - { 645, 2, 138}, - { 996, 3, 0}, - {1309, 3, 0}, - { 493, 3, 1}, - { 494, 3, 1}, - {1138, 3, 2}, - {1158, 3, 2}, - { 631, 3, 3}, - { 642, 3, 3}, - { 436, 3, 4}, - { 774, 3, 5}, - { 929, 3, 6}, - { 935, 3, 6}, - { 542, 3, 7}, - {1082, 3, 8}, - {1087, 3, 8}, - { 557, 3, 9}, - { 555, 3, 9}, - { 716, 3, 10}, - { 703, 3, 10}, - { 179, 3, 11}, - { 760, 3, 11}, - { 873, 3, 12}, - { 889, 3, 12}, - { 874, 3, 13}, - { 891, 3, 13}, - { 875, 3, 14}, - { 857, 3, 14}, - { 958, 3, 15}, - { 953, 3, 15}, - { 544, 3, 16}, - { 539, 3, 16}, - { 505, 3, 17}, - { 504, 3, 17}, - { 513, 3, 18}, - { 512, 3, 18}, - {1332, 3, 19}, - { 583, 3, 20}, - { 564, 3, 20}, - { 506, 3, 21}, - { 507, 3, 21}, - { 996, 4, 0}, - {1309, 4, 0}, - {1060, 4, 1}, - {1057, 4, 1}, - { 436, 4, 2}, - { 774, 4, 3}, - { 427, 4, 4}, - { 395, 4, 4}, - { 542, 4, 5}, - { 539, 4, 5}, - {1082, 4, 6}, - {1087, 4, 6}, - {1155, 4, 7}, - {1143, 4, 7}, - { 734, 4, 8}, - {1268, 4, 9}, - {1200, 4, 10}, - { 801, 4, 11}, - { 803, 4, 12}, - { 505, 4, 13}, - { 504, 4, 13}, - { 513, 4, 14}, - { 512, 4, 14}, - {1332, 4, 15}, - { 583, 4, 16}, - { 564, 4, 16}, - { 506, 4, 17}, - { 507, 4, 17}, - { 996, 5, 0}, - {1309, 5, 0}, - { 436, 5, 1}, - { 774, 5, 2}, - { 542, 5, 3}, - { 539, 5, 3}, - {1124, 5, 4}, - {1118, 5, 4}, - { 557, 5, 5}, - { 555, 5, 5}, - {1152, 5, 6}, - { 792, 5, 7}, - { 789, 5, 7}, - {1265, 5, 8}, - {1264, 5, 8}, - { 981, 5, 9}, - { 760, 5, 9}, - { 958, 5, 10}, - { 953, 5, 10}, - { 221, 5, 11}, - { 216, 5, 11}, - {1162, 5, 12}, - {1161, 5, 12}, - { 391, 5, 13}, - { 390, 5, 13}, - {1115, 5, 14}, - {1114, 5, 14}, - { 936, 6, 0}, - { 915, 6, 0}, - { 545, 6, 0}, - { 546, 6, 0}, - {1314, 6, 1}, - {1310, 6, 1}, - {1200, 6, 1}, - {1252, 6, 1}, - { 947, 7, 0}, - { 917, 7, 0}, - { 761, 7, 1}, - { 734, 7, 1}, - {1285, 7, 2}, - {1268, 7, 2}, - {1248, 7, 3}, - {1200, 7, 3}, - { 802, 7, 4}, - { 801, 7, 4}, - { 804, 7, 5}, - { 803, 7, 5}, - { 765, 8, 0}, - { 734, 8, 0}, - {1090, 8, 1}, - {1080, 8, 1}, - { 536, 8, 2}, - { 515, 8, 2}, - { 537, 8, 3}, - { 526, 8, 3}, - { 538, 8, 4}, - { 527, 8, 4}, - { 202, 8, 5}, - { 187, 8, 5}, - { 408, 8, 6}, - { 437, 8, 6}, - {1019, 8, 7}, - { 229, 8, 7}, - {1120, 8, 8}, - {1103, 8, 8}, - {1294, 8, 9}, - {1300, 8, 9}, - {1005, 8, 10}, - { 984, 8, 10}, - { 273, 8, 11}, - { 266, 8, 11}, - { 944, 8, 12}, - { 951, 8, 12}, - { 199, 8, 13}, - { 174, 8, 13}, - { 768, 8, 14}, - { 798, 8, 14}, - {1093, 8, 15}, - {1097, 8, 15}, - { 766, 8, 16}, - { 796, 8, 16}, - {1091, 8, 17}, - {1095, 8, 17}, - {1050, 8, 18}, - {1029, 8, 18}, - { 767, 8, 19}, - { 797, 8, 19}, - {1092, 8, 20}, - {1096, 8, 20}, - { 554, 8, 21}, - { 560, 8, 21}, - {1051, 8, 22}, - {1030, 8, 22}, - { 948, 9, 0}, - { 1, 9, 0}, - { 949, 9, 0}, - {1012, 9, 1}, - { 2, 9, 1}, - {1011, 9, 1}, - { 954, 9, 2}, - { 135, 9, 2}, - { 932, 9, 2}, - { 710, 9, 3}, - { 144, 9, 3}, - { 733, 9, 3}, - {1279, 9, 4}, - { 151, 9, 4}, - {1286, 9, 4}, - { 313, 9, 5}, - { 17, 9, 5}, - { 316, 9, 6}, - { 28, 9, 6}, - { 318, 9, 7}, - { 32, 9, 7}, - { 321, 9, 8}, - { 35, 9, 8}, - { 325, 9, 9}, - { 40, 9, 9}, - { 326, 9, 10}, - { 41, 9, 10}, - { 327, 9, 11}, - { 43, 9, 11}, - { 328, 9, 12}, - { 44, 9, 12}, - { 329, 9, 13}, - { 46, 9, 13}, - { 330, 9, 14}, - { 47, 9, 14}, - { 331, 9, 15}, - { 51, 9, 15}, - { 332, 9, 16}, - { 57, 9, 16}, - { 333, 9, 17}, - { 62, 9, 17}, - { 334, 9, 18}, - { 68, 9, 18}, - { 335, 9, 19}, - { 73, 9, 19}, - { 336, 9, 20}, - { 75, 9, 20}, - { 337, 9, 21}, - { 76, 9, 21}, - { 338, 9, 22}, - { 77, 9, 22}, - { 339, 9, 23}, - { 78, 9, 23}, - { 340, 9, 24}, - { 79, 9, 24}, - { 341, 9, 25}, - { 88, 9, 25}, - { 342, 9, 26}, - { 93, 9, 26}, - { 343, 9, 27}, - { 94, 9, 27}, - { 344, 9, 28}, - { 95, 9, 28}, - { 345, 9, 29}, - { 96, 9, 29}, - { 346, 9, 30}, - { 97, 9, 30}, - { 347, 9, 31}, - { 98, 9, 31}, - { 348, 9, 32}, - { 150, 9, 32}, - { 349, 9, 33}, - { 158, 9, 33}, - { 314, 9, 34}, - { 26, 9, 34}, - { 315, 9, 35}, - { 27, 9, 35}, - { 317, 9, 36}, - { 31, 9, 36}, - { 319, 9, 37}, - { 33, 9, 37}, - { 320, 9, 38}, - { 34, 9, 38}, - { 322, 9, 39}, - { 37, 9, 39}, - { 323, 9, 40}, - { 38, 9, 40}, - { 224, 9, 41}, - { 56, 9, 41}, - { 219, 9, 41}, - { 222, 9, 42}, - { 58, 9, 42}, - { 217, 9, 42}, - { 223, 9, 43}, - { 59, 9, 43}, - { 218, 9, 43}, - { 247, 9, 44}, - { 61, 9, 44}, - { 261, 9, 44}, - { 246, 9, 45}, - { 63, 9, 45}, - { 229, 9, 45}, - { 248, 9, 46}, - { 64, 9, 46}, - { 275, 9, 46}, - { 762, 9, 47}, - { 65, 9, 47}, - { 734, 9, 47}, - {1088, 9, 48}, - { 66, 9, 48}, - {1080, 9, 48}, - { 161, 9, 49}, - { 67, 9, 49}, - { 174, 9, 49}, - { 160, 9, 50}, - { 69, 9, 50}, - { 159, 9, 50}, - { 162, 9, 51}, - { 70, 9, 51}, - { 194, 9, 51}, - { 492, 9, 52}, - { 71, 9, 52}, - { 467, 9, 52}, - { 491, 9, 53}, - { 72, 9, 53}, - { 462, 9, 53}, - { 681, 9, 54}, - { 74, 9, 54}, - { 684, 9, 54}, - { 324, 9, 55}, - { 39, 9, 55}, - { 225, 9, 56}, - { 52, 9, 56}, - { 220, 9, 56}, - { 941, 10, 0}, - { 299, 10, 1}, - { 296, 10, 1}, - { 409, 10, 2}, - { 398, 10, 2}, - { 556, 10, 3}, - { 938, 10, 4}, - { 923, 10, 4}, - { 672, 10, 5}, - { 671, 10, 5}, - { 861, 10, 6}, - { 860, 10, 6}, - { 551, 10, 7}, - { 550, 10, 7}, - { 686, 10, 8}, - { 685, 10, 8}, - { 363, 10, 9}, - { 516, 10, 9}, - {1173, 10, 10}, - {1169, 10, 10}, - {1164, 10, 11}, - {1277, 10, 12}, - {1276, 10, 12}, - {1295, 10, 13}, - { 922, 10, 14}, - { 920, 10, 14}, - {1144, 10, 15}, - {1147, 10, 15}, - {1160, 10, 16}, - {1159, 10, 16}, - { 559, 10, 17}, - { 558, 10, 17}, - { 927, 11, 0}, - { 915, 11, 0}, - { 186, 11, 1}, - { 159, 11, 1}, - { 611, 11, 2}, - { 605, 11, 2}, - {1295, 11, 3}, - {1289, 11, 3}, - { 561, 11, 4}, - { 545, 11, 4}, - { 922, 11, 5}, - { 917, 11, 5}, - { 939, 12, 0}, - { 173, 12, 1}, - { 175, 12, 2}, - { 178, 12, 3}, - { 245, 12, 4}, - { 251, 12, 5}, - { 463, 12, 6}, - { 464, 12, 7}, - { 500, 12, 8}, - { 549, 12, 9}, - { 553, 12, 10}, - { 562, 12, 11}, - { 563, 12, 12}, - { 608, 12, 13}, - { 613, 12, 14}, - {1223, 12, 14}, - { 628, 12, 15}, - { 632, 12, 16}, - { 633, 12, 17}, - { 634, 12, 18}, - { 704, 12, 19}, - { 715, 12, 20}, - { 731, 12, 21}, - { 736, 12, 22}, - { 737, 12, 23}, - { 862, 12, 24}, - { 876, 12, 25}, - { 946, 12, 26}, - { 961, 12, 27}, - {1031, 12, 28}, - {1074, 12, 29}, - {1075, 12, 30}, - {1084, 12, 31}, - {1086, 12, 32}, - {1106, 12, 33}, - {1107, 12, 34}, - {1119, 12, 35}, - {1121, 12, 36}, - {1131, 12, 37}, - {1191, 12, 38}, - {1205, 12, 39}, - {1221, 12, 40}, - {1222, 12, 41}, - {1228, 12, 42}, - {1292, 12, 43}, - {1199, 12, 44}, - {1311, 12, 45}, - {1312, 12, 46}, - {1313, 12, 47}, - {1321, 12, 48}, - {1322, 12, 49}, - {1325, 12, 50}, - {1326, 12, 51}, - { 721, 12, 52}, - { 548, 12, 53}, - { 290, 12, 54}, - { 547, 12, 55}, - { 963, 12, 56}, - {1098, 12, 57}, - {1163, 12, 58}, - { 821, 12, 59}, - { 822, 12, 60}, - { 823, 12, 61}, - { 824, 12, 62}, - { 825, 12, 63}, - { 826, 12, 64}, - { 827, 12, 65}, - { 828, 12, 66}, - { 829, 12, 67}, - { 830, 12, 68}, - { 831, 12, 69}, - { 832, 12, 70}, - { 833, 12, 71}, - { 834, 12, 72}, - { 835, 12, 73}, - { 836, 12, 74}, - { 837, 12, 75}, - { 838, 12, 76}, - { 839, 12, 77}, - { 840, 12, 78}, - { 841, 12, 79}, - { 842, 12, 80}, - { 843, 12, 81}, - { 844, 12, 82}, - { 845, 12, 83}, - { 846, 12, 84}, - { 847, 12, 85}, - { 166, 12, 86}, - { 168, 12, 87}, - { 167, 12, 88}, - { 943, 13, 0}, - {1253, 13, 0}, - { 696, 13, 1}, - { 293, 13, 1}, - { 497, 13, 2}, - { 461, 13, 2}, - {1089, 13, 3}, - {1080, 13, 3}, - { 764, 13, 4}, - { 734, 13, 4}, - {1249, 13, 5}, - {1200, 13, 5}, - {1263, 14, 0}, - {1309, 14, 0}, - { 986, 14, 1}, - { 985, 14, 1}, - { 393, 14, 2}, - { 390, 14, 2}, - {1078, 14, 3}, - {1077, 14, 3}, - { 582, 14, 4}, - { 577, 14, 4}, - { 945, 14, 5}, - { 950, 14, 5}, - { 540, 14, 6}, - { 539, 14, 6}, - { 285, 14, 7}, - {1192, 14, 7}, - { 669, 14, 8}, - { 684, 14, 8}, - {1059, 14, 9}, - {1058, 14, 9}, - {1056, 14, 10}, - {1049, 14, 10}, - { 958, 14, 11}, - { 953, 14, 11}, - { 182, 14, 12}, - { 174, 14, 12}, - { 654, 14, 13}, - { 650, 14, 13}, - { 678, 14, 14}, - { 665, 14, 14}, - { 679, 14, 14}, - { 649, 14, 15}, - { 648, 14, 15}, - { 404, 14, 16}, - { 394, 14, 16}, - { 283, 14, 17}, - { 243, 14, 17}, - { 282, 14, 18}, - { 231, 14, 18}, - {1153, 14, 19}, - {1152, 14, 19}, - { 818, 14, 20}, - { 260, 14, 20}, - { 305, 14, 21}, - { 436, 14, 21}, - { 784, 14, 22}, - { 774, 14, 22}, - { 426, 14, 23}, - { 310, 14, 23}, - { 411, 14, 24}, - {1105, 14, 24}, - { 186, 14, 25}, - { 172, 14, 25}, - { 284, 14, 26}, - { 230, 14, 26}, - {1189, 14, 27}, - {1126, 14, 27}, - {1333, 14, 28}, - {1331, 14, 28}, - { 931, 14, 29}, - { 935, 14, 29}, - {1299, 14, 30}, - {1296, 14, 30}, - { 694, 14, 31}, - { 702, 14, 32}, - { 701, 14, 33}, - { 606, 14, 34}, - { 607, 14, 35}, - { 392, 14, 36}, - { 434, 14, 36}, - { 631, 14, 37}, - { 642, 14, 37}, - { 412, 14, 38}, - { 364, 14, 38}, - {1082, 14, 39}, - {1087, 14, 39}, - { 505, 14, 40}, - { 504, 14, 40}, - { 513, 14, 41}, - { 512, 14, 41}, - {1332, 14, 42}, - { 941, 15, 0}, - { 958, 15, 1}, - { 953, 15, 1}, - { 487, 15, 2}, - { 480, 15, 2}, - { 469, 15, 3}, - { 468, 15, 3}, - { 919, 16, 0}, - { 0, 16, 1}, - { 1, 16, 2}, - { 6, 16, 3}, - { 11, 16, 4}, - { 87, 16, 5}, - { 8, 16, 6}, - { 5, 16, 7}, - { 4, 16, 8}, - { 3, 16, 9}, - { 16, 16, 10}, - { 15, 16, 11}, - { 14, 16, 12}, - { 83, 16, 13}, - { 13, 16, 14}, - { 81, 16, 15}, - { 12, 16, 16}, - { 10, 16, 17}, - { 9, 16, 18}, - { 86, 16, 19}, - { 50, 16, 20}, - { 120, 16, 21}, - { 7, 16, 22}, - { 136, 16, 23}, - { 85, 16, 24}, - { 123, 16, 25}, - { 49, 16, 26}, - { 84, 16, 27}, - { 103, 16, 28}, - { 122, 16, 29}, - { 138, 16, 30}, - { 29, 16, 31}, - { 2, 16, 32}, - { 82, 16, 33}, - { 48, 16, 34}, - { 121, 16, 35}, - { 80, 16, 36}, - { 137, 16, 37}, - { 102, 16, 38}, - { 152, 16, 39}, - { 119, 16, 40}, - { 30, 16, 41}, - { 129, 16, 42}, - { 36, 16, 43}, - { 135, 16, 44}, - { 42, 16, 45}, - { 144, 16, 46}, - { 45, 16, 47}, - { 151, 16, 48}, - { 17, 16, 49}, - { 28, 16, 50}, - { 32, 16, 51}, - { 35, 16, 52}, - { 40, 16, 53}, - { 41, 16, 54}, - { 43, 16, 55}, - { 44, 16, 56}, - { 46, 16, 57}, - { 47, 16, 58}, - { 51, 16, 59}, - { 57, 16, 60}, - { 62, 16, 61}, - { 68, 16, 62}, - { 73, 16, 63}, - { 75, 16, 64}, - { 76, 16, 65}, - { 77, 16, 66}, - { 78, 16, 67}, - { 79, 16, 68}, - { 88, 16, 69}, - { 93, 16, 70}, - { 94, 16, 71}, - { 95, 16, 72}, - { 96, 16, 73}, - { 97, 16, 74}, - { 98, 16, 75}, - { 99, 16, 76}, - { 100, 16, 77}, - { 101, 16, 78}, - { 104, 16, 79}, - { 109, 16, 80}, - { 110, 16, 81}, - { 111, 16, 82}, - { 113, 16, 83}, - { 114, 16, 84}, - { 115, 16, 85}, - { 116, 16, 86}, - { 117, 16, 87}, - { 118, 16, 88}, - { 124, 16, 89}, - { 130, 16, 90}, - { 139, 16, 91}, - { 145, 16, 92}, - { 153, 16, 93}, - { 18, 16, 94}, - { 52, 16, 95}, - { 89, 16, 96}, - { 105, 16, 97}, - { 125, 16, 98}, - { 131, 16, 99}, - { 140, 16, 100}, - { 146, 16, 101}, - { 154, 16, 102}, - { 19, 16, 103}, - { 53, 16, 104}, - { 90, 16, 105}, - { 106, 16, 106}, - { 126, 16, 107}, - { 132, 16, 108}, - { 141, 16, 109}, - { 147, 16, 110}, - { 155, 16, 111}, - { 20, 16, 112}, - { 54, 16, 113}, - { 91, 16, 114}, - { 107, 16, 115}, - { 127, 16, 116}, - { 133, 16, 117}, - { 142, 16, 118}, - { 148, 16, 119}, - { 156, 16, 120}, - { 21, 16, 121}, - { 55, 16, 122}, - { 60, 16, 123}, - { 92, 16, 124}, - { 108, 16, 125}, - { 112, 16, 126}, - { 128, 16, 127}, - { 134, 16, 128}, - { 143, 16, 129}, - { 149, 16, 130}, - { 157, 16, 131}, - { 22, 16, 132}, - { 23, 16, 133}, - { 24, 16, 134}, - { 25, 16, 135}, - { 917, 17, 0}, - {1088, 17, 1}, - { 762, 17, 2}, - {1281, 17, 3}, - { 763, 17, 4}, - {1242, 17, 5}, - { 271, 17, 6}, - {1243, 17, 7}, - {1247, 17, 8}, - {1245, 17, 9}, - {1246, 17, 10}, - { 272, 17, 11}, - {1244, 17, 12}, - {1013, 17, 13}, - { 996, 18, 0}, - { 259, 18, 1}, - {1280, 18, 2}, - { 226, 18, 3}, - { 954, 18, 4}, - {1279, 18, 5}, - {1071, 18, 6}, - { 680, 18, 7}, - {1284, 18, 8}, - {1283, 18, 9}, - {1282, 18, 10}, - { 420, 18, 11}, - { 414, 18, 12}, - { 415, 18, 13}, - { 425, 18, 14}, - { 422, 18, 15}, - { 421, 18, 16}, - { 424, 18, 17}, - { 423, 18, 18}, - { 419, 18, 19}, - { 416, 18, 20}, - { 417, 18, 21}, - { 897, 18, 22}, - {1240, 18, 23}, - {1241, 18, 24}, - { 567, 18, 25}, - { 302, 18, 26}, - {1083, 18, 27}, - {1193, 18, 28}, - { 418, 18, 29}, - { 942, 18, 30}, - { 698, 18, 31}, - { 957, 18, 32}, - { 955, 18, 33}, - { 278, 18, 34}, -}; - -/* property values: 5876 bytes. */ - -/* Codepoints which expand on full case-folding. */ - -RE_UINT16 re_expand_on_folding[] = { - 223, 304, 329, 496, 912, 944, 1415, 7830, - 7831, 7832, 7833, 7834, 7838, 8016, 8018, 8020, - 8022, 8064, 8065, 8066, 8067, 8068, 8069, 8070, - 8071, 8072, 8073, 8074, 8075, 8076, 8077, 8078, - 8079, 8080, 8081, 8082, 8083, 8084, 8085, 8086, - 8087, 8088, 8089, 8090, 8091, 8092, 8093, 8094, - 8095, 8096, 8097, 8098, 8099, 8100, 8101, 8102, - 8103, 8104, 8105, 8106, 8107, 8108, 8109, 8110, - 8111, 8114, 8115, 8116, 8118, 8119, 8124, 8130, - 8131, 8132, 8134, 8135, 8140, 8146, 8147, 8150, - 8151, 8162, 8163, 8164, 8166, 8167, 8178, 8179, - 8180, 8182, 8183, 8188, 64256, 64257, 64258, 64259, - 64260, 64261, 64262, 64275, 64276, 64277, 64278, 64279, -}; - -/* expand_on_folding: 208 bytes. */ - -/* General_Category. */ - -static RE_UINT8 re_general_category_stage_1[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 6, 7, 5, 5, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 5, 18, 16, 16, 19, 16, 20, 21, 22, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 23, 24, 25, 16, 16, 26, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 27, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 28, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 28, -}; - -static RE_UINT8 re_general_category_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 34, 35, 36, 37, 38, 39, 34, 34, 34, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 69, 72, 73, - 69, 69, 64, 74, 64, 64, 75, 76, 77, 78, 79, 80, 81, 82, 69, 83, - 84, 85, 86, 87, 88, 89, 69, 69, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 90, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 91, - 92, 34, 34, 34, 34, 34, 34, 34, 34, 93, 34, 34, 94, 95, 96, 97, - 98, 99, 100, 101, 102, 103, 104, 105, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 106, - 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, - 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, - 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, - 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, - 108, 108, 34, 34, 109, 110, 111, 112, 34, 34, 113, 114, 115, 116, 117, 118, - 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 123, 34, 34, 130, 123, - 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 123, 123, 141, 123, 123, 123, - 142, 143, 144, 145, 146, 147, 148, 123, 149, 150, 123, 151, 152, 153, 154, 123, - 123, 155, 123, 123, 123, 156, 123, 123, 157, 158, 123, 123, 123, 123, 123, 123, - 34, 34, 34, 34, 34, 34, 34, 159, 160, 34, 161, 123, 123, 123, 123, 123, - 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 34, 34, 34, 34, 34, 34, 34, 34, 162, 123, 123, 123, 123, 123, 123, 123, - 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 123, 123, 123, 123, 123, 123, 123, 123, 34, 34, 34, 34, 163, 123, 123, 123, - 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 34, 34, 34, 34, 164, 165, 166, 167, 123, 123, 123, 123, 123, 123, 168, 169, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 170, - 34, 34, 34, 34, 34, 171, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 172, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 123, 123, 123, 123, 123, 123, 123, 123, 173, 174, 123, 123, 123, 123, 123, 123, - 69, 175, 176, 177, 178, 123, 179, 123, 180, 181, 182, 183, 184, 185, 186, 187, - 69, 69, 69, 69, 188, 189, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 190, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 34, 191, 192, 123, 123, 123, 123, 123, 123, 123, 123, 123, 193, 194, 123, 123, - 195, 196, 197, 198, 199, 123, 69, 200, 69, 69, 69, 69, 69, 201, 202, 203, - 204, 205, 206, 207, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 208, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 209, 34, - 210, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 211, 123, 123, - 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 34, 34, 34, 34, 212, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 213, 123, 214, 215, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, - 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 216, -}; - -static RE_UINT16 re_general_category_stage_3[] = { - 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7, 8, 9, 10, 11, 12, - 13, 13, 13, 14, 15, 13, 13, 16, 17, 18, 19, 20, 21, 22, 13, 23, - 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31, - 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39, - 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13, - 13, 13, 13, 42, 9, 43, 44, 11, 45, 46, 32, 47, 48, 49, 50, 51, - 52, 53, 49, 49, 54, 32, 55, 56, 49, 49, 49, 49, 49, 57, 58, 59, - 60, 61, 49, 32, 62, 49, 49, 49, 49, 49, 63, 64, 65, 49, 66, 67, - 49, 68, 69, 70, 49, 71, 72, 72, 72, 72, 49, 73, 72, 74, 75, 32, - 76, 49, 49, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 83, 84, 91, 92, 93, 94, 95, 96, 97, 84, 98, 99, 100, 88, 101, - 102, 83, 84, 103, 104, 105, 88, 106, 107, 108, 109, 110, 111, 112, 94, 113, - 114, 115, 84, 116, 117, 118, 88, 119, 120, 115, 84, 121, 122, 123, 88, 124, - 125, 115, 49, 126, 127, 128, 88, 129, 130, 131, 49, 132, 133, 134, 94, 135, - 136, 49, 49, 137, 138, 139, 72, 72, 140, 141, 142, 143, 144, 145, 72, 72, - 146, 147, 148, 149, 150, 49, 151, 152, 153, 154, 32, 155, 156, 157, 72, 72, - 49, 49, 158, 159, 160, 161, 162, 163, 164, 165, 9, 9, 166, 49, 49, 167, - 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 168, 169, 49, 49, - 168, 49, 49, 170, 171, 172, 49, 49, 49, 171, 49, 49, 49, 173, 174, 175, - 49, 176, 9, 9, 9, 9, 9, 177, 178, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 179, 49, 180, 181, 49, 49, 49, 49, 182, 183, - 184, 185, 49, 186, 49, 187, 184, 188, 49, 49, 49, 189, 190, 191, 192, 193, - 194, 192, 49, 49, 195, 49, 49, 196, 197, 49, 198, 49, 49, 49, 49, 199, - 49, 200, 201, 202, 203, 49, 204, 205, 49, 49, 206, 49, 207, 208, 209, 209, - 49, 210, 49, 49, 49, 211, 212, 213, 192, 192, 214, 215, 72, 72, 72, 72, - 216, 49, 49, 217, 218, 160, 219, 220, 221, 49, 222, 65, 49, 49, 223, 224, - 49, 49, 225, 226, 227, 65, 49, 228, 229, 72, 72, 72, 230, 231, 232, 233, - 11, 11, 234, 27, 27, 27, 235, 236, 11, 237, 27, 27, 32, 32, 32, 238, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 239, 13, 13, 13, 13, 13, 13, - 240, 241, 240, 240, 241, 242, 240, 243, 244, 244, 244, 245, 246, 247, 248, 249, - 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 72, 262, 263, 264, - 265, 266, 267, 268, 269, 270, 271, 271, 272, 273, 274, 209, 275, 276, 209, 277, - 278, 278, 278, 278, 278, 278, 278, 278, 279, 209, 280, 209, 209, 209, 209, 281, - 209, 282, 278, 283, 209, 284, 285, 286, 209, 209, 287, 72, 288, 72, 270, 270, - 270, 289, 209, 209, 209, 209, 290, 270, 209, 209, 209, 209, 209, 209, 209, 209, - 209, 209, 209, 291, 292, 209, 209, 293, 209, 209, 209, 209, 209, 209, 294, 209, - 209, 209, 209, 209, 209, 209, 295, 296, 270, 297, 209, 209, 298, 278, 299, 278, - 300, 301, 278, 278, 278, 302, 278, 303, 209, 209, 209, 278, 304, 209, 209, 305, - 209, 306, 209, 307, 308, 309, 310, 72, 9, 9, 311, 11, 11, 312, 313, 314, - 13, 13, 13, 13, 13, 13, 315, 316, 11, 11, 317, 49, 49, 49, 318, 319, - 49, 320, 321, 321, 321, 321, 32, 32, 322, 323, 324, 325, 326, 72, 72, 72, - 209, 327, 209, 209, 209, 209, 209, 328, 209, 209, 209, 209, 209, 329, 72, 330, - 331, 332, 333, 334, 136, 49, 49, 49, 49, 335, 178, 49, 49, 49, 49, 336, - 337, 49, 204, 136, 49, 49, 49, 49, 200, 338, 49, 50, 209, 209, 328, 49, - 209, 286, 339, 209, 340, 341, 209, 209, 339, 209, 209, 341, 209, 209, 209, 286, - 49, 49, 49, 199, 209, 209, 209, 209, 49, 49, 49, 49, 49, 199, 72, 72, - 49, 342, 49, 49, 49, 49, 49, 49, 151, 209, 209, 209, 287, 49, 49, 228, - 343, 49, 344, 72, 13, 13, 345, 346, 13, 347, 49, 49, 49, 49, 348, 349, - 31, 350, 351, 352, 13, 13, 13, 353, 354, 355, 356, 357, 72, 72, 72, 358, - 359, 49, 360, 361, 49, 49, 49, 362, 363, 49, 49, 364, 365, 192, 32, 366, - 65, 49, 367, 49, 368, 369, 49, 151, 76, 49, 49, 370, 371, 372, 373, 374, - 49, 49, 375, 376, 377, 378, 49, 379, 49, 49, 49, 380, 381, 382, 383, 384, - 385, 386, 321, 11, 11, 387, 388, 11, 11, 11, 11, 11, 49, 49, 389, 192, - 49, 49, 390, 49, 391, 49, 49, 206, 392, 392, 392, 392, 392, 392, 392, 392, - 393, 393, 393, 393, 393, 393, 393, 393, 49, 49, 49, 49, 49, 49, 204, 49, - 49, 49, 49, 49, 49, 207, 72, 72, 394, 395, 396, 397, 398, 49, 49, 49, - 49, 49, 49, 399, 400, 401, 49, 49, 49, 49, 49, 402, 72, 49, 49, 49, - 49, 403, 49, 49, 196, 72, 72, 404, 32, 405, 32, 406, 407, 408, 409, 410, - 49, 49, 49, 49, 49, 49, 49, 411, 412, 2, 3, 4, 5, 413, 414, 415, - 49, 416, 49, 200, 417, 418, 419, 420, 421, 49, 172, 422, 204, 204, 72, 72, - 49, 49, 49, 49, 49, 49, 49, 50, 423, 270, 270, 424, 271, 271, 271, 425, - 426, 330, 427, 72, 72, 209, 209, 428, 72, 72, 72, 72, 72, 72, 72, 72, - 49, 151, 49, 49, 49, 100, 429, 430, 49, 49, 431, 49, 432, 49, 49, 433, - 49, 434, 49, 49, 435, 436, 72, 72, 9, 9, 437, 11, 11, 49, 49, 49, - 49, 204, 192, 9, 9, 438, 11, 439, 49, 49, 196, 49, 49, 49, 440, 72, - 49, 49, 49, 320, 49, 199, 196, 72, 441, 49, 49, 442, 49, 443, 49, 444, - 49, 200, 445, 72, 72, 72, 49, 446, 49, 447, 49, 448, 72, 72, 72, 72, - 49, 49, 49, 449, 270, 450, 270, 270, 451, 452, 49, 453, 454, 455, 49, 456, - 49, 457, 72, 72, 458, 49, 459, 460, 49, 49, 49, 461, 49, 462, 49, 463, - 49, 464, 465, 72, 72, 72, 72, 72, 49, 49, 49, 49, 466, 72, 72, 72, - 9, 9, 9, 467, 11, 11, 11, 468, 72, 72, 72, 72, 72, 72, 270, 469, - 470, 49, 49, 471, 472, 450, 473, 474, 221, 49, 49, 475, 476, 49, 466, 192, - 477, 49, 478, 479, 480, 49, 49, 481, 221, 49, 49, 482, 483, 484, 485, 486, - 49, 97, 487, 488, 72, 72, 72, 72, 489, 490, 491, 49, 49, 492, 493, 192, - 494, 83, 84, 98, 495, 496, 497, 498, 49, 49, 49, 499, 500, 501, 72, 72, - 49, 49, 49, 502, 503, 192, 72, 72, 49, 49, 504, 505, 506, 507, 72, 72, - 49, 49, 49, 508, 509, 192, 510, 72, 49, 49, 511, 512, 192, 72, 72, 72, - 49, 513, 514, 515, 72, 72, 72, 72, 72, 72, 9, 9, 11, 11, 148, 516, - 72, 72, 72, 72, 49, 49, 49, 466, 84, 49, 504, 517, 518, 148, 175, 519, - 49, 520, 521, 522, 72, 72, 72, 72, 49, 207, 72, 72, 72, 72, 72, 72, - 271, 271, 271, 271, 271, 271, 523, 524, 49, 49, 49, 49, 390, 72, 72, 72, - 49, 49, 200, 72, 72, 72, 72, 72, 49, 49, 49, 49, 320, 72, 72, 72, - 49, 49, 49, 466, 49, 200, 372, 72, 72, 72, 72, 72, 72, 49, 204, 525, - 49, 49, 49, 526, 527, 528, 529, 530, 49, 72, 72, 72, 72, 72, 72, 72, - 49, 49, 49, 49, 205, 531, 532, 533, 474, 534, 72, 72, 72, 72, 535, 72, - 49, 49, 49, 49, 49, 49, 151, 72, 49, 49, 49, 49, 49, 49, 49, 536, - 537, 72, 72, 72, 72, 72, 72, 72, 49, 49, 49, 49, 49, 49, 50, 151, - 466, 538, 539, 72, 72, 72, 72, 72, 209, 209, 209, 209, 209, 209, 209, 329, - 209, 209, 540, 209, 209, 209, 541, 542, 543, 209, 544, 209, 209, 209, 545, 72, - 209, 209, 209, 209, 546, 72, 72, 72, 209, 209, 209, 209, 209, 287, 270, 547, - 9, 548, 11, 549, 550, 551, 240, 9, 552, 553, 554, 555, 556, 9, 548, 11, - 557, 558, 11, 559, 560, 561, 562, 9, 563, 11, 9, 548, 11, 549, 550, 11, - 240, 9, 552, 562, 9, 563, 11, 9, 548, 11, 564, 9, 565, 566, 567, 568, - 11, 569, 9, 570, 571, 572, 573, 11, 574, 9, 575, 11, 576, 577, 577, 577, - 32, 32, 32, 578, 32, 32, 579, 580, 581, 582, 46, 72, 72, 72, 72, 72, - 583, 584, 585, 72, 72, 72, 72, 72, 49, 49, 49, 49, 586, 587, 72, 72, - 9, 9, 552, 11, 588, 372, 72, 72, 589, 49, 590, 591, 592, 593, 594, 595, - 596, 206, 597, 206, 72, 72, 72, 598, 209, 209, 330, 209, 209, 209, 209, 209, - 209, 328, 286, 599, 599, 599, 209, 329, 175, 209, 286, 209, 209, 209, 330, 209, - 209, 209, 600, 72, 72, 72, 601, 209, 602, 209, 209, 330, 545, 309, 72, 72, - 209, 209, 209, 209, 209, 209, 209, 603, 209, 209, 209, 209, 209, 602, 600, 287, - 209, 209, 209, 209, 209, 209, 209, 328, 209, 209, 209, 209, 209, 604, 72, 72, - 330, 209, 209, 209, 605, 176, 209, 209, 605, 209, 606, 72, 72, 72, 72, 72, - 72, 286, 605, 607, 330, 286, 72, 72, 209, 309, 72, 72, 427, 72, 72, 72, - 49, 49, 49, 49, 49, 320, 72, 72, 49, 49, 49, 205, 49, 49, 49, 49, - 49, 204, 49, 49, 49, 49, 49, 49, 49, 49, 537, 72, 72, 72, 72, 72, - 49, 204, 72, 72, 72, 72, 72, 72, 608, 72, 609, 609, 609, 609, 609, 609, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 72, - 393, 393, 393, 393, 393, 393, 393, 610, -}; - -static RE_UINT8 re_general_category_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 4, 5, 6, 2, - 7, 7, 7, 7, 7, 2, 8, 9, 10, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 17, 18, 19, 1, 20, 20, 21, 22, 23, 24, 25, - 26, 27, 15, 2, 28, 29, 27, 30, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 31, 11, 11, 11, 32, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 33, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 34, 34, 34, 34, 34, 34, 34, 34, 16, 32, 32, 32, - 32, 32, 32, 32, 11, 34, 34, 16, 34, 32, 32, 11, 34, 11, 16, 11, - 11, 34, 32, 11, 32, 16, 11, 34, 32, 32, 32, 11, 34, 16, 32, 11, - 34, 11, 34, 34, 32, 35, 32, 16, 36, 36, 37, 34, 38, 37, 34, 34, - 34, 34, 34, 34, 34, 34, 16, 32, 34, 38, 32, 11, 32, 32, 32, 32, - 32, 32, 16, 16, 16, 11, 34, 32, 34, 34, 11, 32, 32, 32, 32, 32, - 16, 16, 39, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 41, 41, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, - 40, 40, 42, 41, 41, 41, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, - 43, 43, 43, 43, 43, 43, 43, 43, 32, 32, 42, 32, 44, 45, 16, 10, - 44, 44, 41, 46, 11, 47, 47, 11, 34, 11, 11, 11, 11, 11, 11, 11, - 11, 48, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34, - 16, 11, 32, 16, 32, 32, 32, 32, 16, 16, 32, 49, 34, 32, 34, 11, - 32, 50, 43, 43, 51, 32, 32, 32, 11, 34, 34, 34, 34, 34, 34, 16, - 48, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 47, 52, 2, 2, 2, - 53, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 54, 55, 56, 57, - 58, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 59, - 60, 61, 43, 60, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 62, 44, 44, 36, 63, 64, 44, 44, 44, 44, 44, - 65, 65, 65, 8, 9, 66, 2, 67, 43, 43, 43, 43, 43, 61, 68, 2, - 69, 36, 36, 36, 36, 70, 43, 43, 7, 7, 7, 7, 7, 2, 2, 36, - 71, 36, 36, 36, 36, 36, 36, 36, 36, 36, 72, 43, 43, 43, 73, 50, - 43, 43, 74, 75, 76, 43, 43, 36, 7, 7, 7, 7, 7, 36, 77, 78, - 2, 2, 2, 2, 2, 2, 2, 79, 70, 36, 36, 36, 36, 36, 36, 36, - 43, 43, 43, 43, 43, 80, 81, 36, 36, 36, 36, 43, 43, 43, 43, 43, - 71, 44, 44, 44, 44, 44, 44, 44, 7, 7, 7, 7, 7, 36, 36, 36, - 36, 36, 36, 36, 36, 70, 43, 43, 43, 43, 40, 21, 2, 82, 44, 44, - 36, 36, 36, 43, 43, 75, 43, 43, 43, 43, 75, 43, 75, 43, 43, 44, - 2, 2, 2, 2, 2, 2, 2, 64, 36, 36, 36, 36, 70, 43, 44, 64, - 44, 44, 44, 44, 44, 44, 44, 44, 36, 36, 62, 36, 36, 36, 36, 44, - 44, 44, 43, 43, 43, 43, 43, 43, 43, 83, 43, 43, 43, 43, 43, 43, - 43, 84, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 84, 71, 85, - 86, 43, 43, 43, 84, 85, 86, 85, 70, 43, 43, 43, 36, 36, 36, 36, - 36, 43, 2, 7, 7, 7, 7, 7, 87, 36, 36, 36, 36, 36, 36, 36, - 70, 85, 81, 36, 36, 36, 62, 81, 62, 81, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 62, 36, 36, 36, 62, 62, 44, 36, 36, 44, 71, 85, - 86, 43, 80, 88, 89, 88, 86, 62, 44, 44, 44, 88, 44, 44, 36, 81, - 36, 43, 44, 7, 7, 7, 7, 7, 36, 20, 27, 27, 27, 57, 44, 44, - 58, 84, 81, 36, 36, 62, 44, 81, 62, 36, 81, 62, 36, 44, 80, 85, - 86, 80, 44, 58, 80, 58, 43, 44, 58, 44, 44, 44, 81, 36, 62, 62, - 44, 44, 44, 7, 7, 7, 7, 7, 43, 36, 70, 44, 44, 44, 44, 44, - 58, 84, 81, 36, 36, 36, 36, 81, 36, 81, 36, 36, 36, 36, 36, 36, - 62, 36, 81, 36, 36, 44, 71, 85, 86, 43, 43, 58, 84, 88, 86, 44, - 62, 44, 44, 44, 44, 44, 44, 44, 66, 44, 44, 44, 81, 44, 44, 44, - 58, 85, 81, 36, 36, 36, 62, 81, 62, 36, 81, 36, 36, 44, 71, 86, - 86, 43, 80, 88, 89, 88, 86, 44, 44, 44, 44, 84, 44, 44, 36, 81, - 78, 27, 27, 27, 44, 44, 44, 44, 44, 71, 81, 36, 36, 62, 44, 36, - 62, 36, 36, 44, 81, 62, 62, 36, 44, 81, 62, 44, 36, 62, 44, 36, - 36, 36, 36, 36, 36, 44, 44, 85, 84, 89, 44, 85, 89, 85, 86, 44, - 62, 44, 44, 88, 44, 44, 44, 44, 27, 90, 67, 67, 57, 91, 44, 44, - 84, 85, 81, 36, 36, 36, 62, 36, 62, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 44, 81, 43, 84, 85, 89, 43, 80, 43, 43, 44, - 44, 44, 58, 80, 36, 62, 44, 44, 44, 44, 44, 44, 27, 27, 27, 90, - 70, 85, 81, 36, 36, 36, 62, 36, 36, 36, 81, 36, 36, 44, 71, 86, - 85, 85, 89, 84, 89, 85, 43, 44, 44, 44, 88, 89, 44, 44, 44, 62, - 81, 62, 44, 44, 44, 44, 44, 44, 58, 85, 81, 36, 36, 36, 62, 36, - 36, 36, 36, 36, 36, 62, 81, 85, 86, 43, 80, 85, 89, 85, 86, 77, - 44, 44, 36, 92, 27, 27, 27, 93, 27, 27, 27, 27, 90, 36, 36, 36, - 44, 85, 81, 36, 36, 36, 36, 36, 36, 36, 36, 62, 44, 36, 36, 36, - 36, 81, 36, 36, 36, 36, 81, 44, 36, 36, 36, 62, 44, 80, 44, 88, - 85, 43, 80, 80, 85, 85, 85, 85, 44, 85, 64, 44, 44, 44, 44, 44, - 81, 36, 36, 36, 36, 36, 36, 36, 70, 36, 43, 43, 43, 80, 44, 94, - 36, 36, 36, 75, 43, 43, 43, 61, 7, 7, 7, 7, 7, 2, 44, 44, - 81, 62, 62, 81, 62, 62, 81, 44, 44, 44, 36, 36, 81, 36, 36, 36, - 81, 36, 81, 81, 44, 36, 81, 36, 70, 36, 43, 43, 43, 58, 71, 44, - 36, 36, 62, 82, 43, 43, 43, 44, 7, 7, 7, 7, 7, 44, 36, 36, - 77, 67, 2, 2, 2, 2, 2, 2, 2, 95, 95, 67, 43, 67, 67, 67, - 7, 7, 7, 7, 7, 27, 27, 27, 27, 27, 50, 50, 50, 4, 4, 85, - 36, 36, 36, 36, 81, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 44, - 58, 43, 43, 43, 43, 43, 43, 84, 43, 43, 61, 43, 36, 36, 70, 43, - 43, 43, 43, 43, 58, 43, 43, 43, 43, 43, 43, 43, 43, 43, 80, 67, - 67, 67, 67, 76, 67, 67, 91, 67, 2, 2, 95, 67, 21, 64, 44, 44, - 36, 36, 36, 36, 36, 92, 86, 43, 84, 43, 43, 43, 86, 84, 86, 71, - 7, 7, 7, 7, 7, 2, 2, 2, 36, 36, 36, 85, 43, 36, 36, 43, - 71, 85, 96, 92, 85, 85, 85, 36, 70, 43, 71, 36, 36, 36, 36, 36, - 36, 84, 86, 84, 85, 85, 86, 92, 7, 7, 7, 7, 7, 85, 86, 67, - 11, 11, 11, 48, 44, 44, 48, 44, 36, 36, 36, 36, 36, 63, 69, 36, - 36, 36, 36, 36, 62, 36, 36, 44, 36, 36, 36, 62, 62, 36, 36, 44, - 62, 36, 36, 44, 36, 36, 36, 62, 62, 36, 36, 44, 36, 36, 36, 36, - 36, 36, 36, 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 58, 43, - 2, 2, 2, 2, 97, 27, 27, 27, 27, 27, 27, 27, 27, 27, 98, 44, - 67, 67, 67, 67, 67, 44, 44, 44, 11, 11, 11, 44, 16, 16, 16, 44, - 99, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 63, 72, - 100, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 101, 102, 44, - 36, 36, 36, 36, 36, 63, 2, 103, 104, 36, 36, 36, 62, 44, 44, 44, - 36, 36, 36, 36, 36, 36, 62, 36, 36, 43, 80, 44, 44, 44, 44, 44, - 36, 43, 61, 64, 44, 44, 44, 44, 36, 43, 44, 44, 44, 44, 44, 44, - 62, 43, 44, 44, 44, 44, 44, 44, 36, 36, 43, 86, 43, 43, 43, 85, - 85, 85, 85, 84, 86, 43, 43, 43, 43, 43, 2, 87, 2, 66, 70, 44, - 7, 7, 7, 7, 7, 44, 44, 44, 27, 27, 27, 27, 27, 44, 44, 44, - 2, 2, 2, 105, 2, 60, 43, 68, 36, 106, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 44, 44, 44, 44, 36, 36, 70, 71, 36, 36, 36, 36, - 36, 36, 36, 36, 70, 62, 44, 44, 36, 36, 36, 44, 44, 44, 44, 44, - 36, 36, 36, 36, 36, 36, 36, 62, 43, 84, 85, 86, 84, 85, 44, 44, - 85, 84, 85, 85, 86, 43, 44, 44, 91, 44, 2, 7, 7, 7, 7, 7, - 36, 36, 36, 36, 36, 36, 36, 44, 36, 36, 62, 44, 44, 44, 44, 44, - 36, 36, 36, 36, 36, 36, 44, 44, 36, 36, 36, 36, 36, 44, 44, 44, - 7, 7, 7, 7, 7, 98, 44, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 36, 36, 36, 70, 84, 86, 44, 2, 36, 36, 92, 84, 43, 43, 43, 80, - 84, 84, 86, 43, 43, 43, 84, 85, 85, 86, 43, 43, 43, 43, 80, 58, - 2, 2, 2, 87, 2, 2, 2, 44, 43, 43, 43, 43, 43, 43, 43, 107, - 43, 43, 96, 36, 36, 36, 36, 36, 36, 36, 84, 43, 43, 84, 84, 85, - 85, 84, 96, 36, 36, 36, 44, 44, 95, 67, 67, 67, 67, 50, 43, 43, - 43, 43, 67, 67, 67, 67, 91, 44, 43, 96, 36, 36, 36, 36, 36, 36, - 92, 43, 43, 85, 43, 86, 43, 36, 36, 36, 36, 84, 43, 85, 86, 86, - 43, 85, 44, 44, 44, 44, 2, 2, 36, 36, 85, 85, 85, 85, 43, 43, - 43, 43, 85, 43, 44, 54, 2, 2, 7, 7, 7, 7, 7, 44, 81, 36, - 36, 36, 36, 36, 40, 40, 40, 2, 16, 16, 16, 16, 108, 44, 44, 44, - 2, 2, 2, 2, 44, 44, 44, 44, 43, 61, 43, 43, 43, 43, 43, 43, - 84, 43, 43, 43, 71, 36, 70, 36, 36, 85, 71, 62, 43, 44, 44, 44, - 16, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 45, 16, 16, - 16, 16, 16, 16, 45, 16, 16, 16, 16, 16, 16, 16, 16, 109, 40, 40, - 43, 43, 43, 44, 44, 58, 43, 43, 32, 32, 32, 16, 16, 16, 16, 32, - 16, 16, 16, 16, 11, 11, 11, 11, 16, 16, 16, 44, 11, 11, 11, 44, - 16, 16, 16, 16, 48, 48, 48, 48, 16, 16, 16, 16, 16, 16, 16, 44, - 16, 16, 16, 16, 110, 110, 110, 110, 16, 16, 108, 16, 11, 11, 111, 112, - 41, 16, 108, 16, 11, 11, 111, 41, 16, 16, 44, 16, 11, 11, 113, 41, - 16, 16, 16, 16, 11, 11, 114, 41, 44, 16, 108, 16, 11, 11, 111, 115, - 116, 116, 116, 116, 116, 117, 65, 65, 118, 118, 118, 2, 119, 120, 119, 120, - 2, 2, 2, 2, 121, 65, 65, 122, 2, 2, 2, 2, 123, 124, 2, 125, - 126, 2, 127, 128, 2, 2, 2, 2, 2, 9, 126, 2, 2, 2, 2, 129, - 65, 65, 68, 65, 65, 65, 65, 65, 130, 44, 27, 27, 27, 8, 127, 131, - 27, 27, 27, 27, 27, 8, 127, 102, 40, 40, 40, 40, 40, 40, 82, 44, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 132, - 43, 43, 43, 43, 43, 43, 133, 51, 134, 51, 134, 43, 43, 43, 43, 43, - 80, 44, 44, 44, 44, 44, 44, 44, 67, 135, 67, 136, 67, 34, 11, 16, - 11, 32, 136, 67, 49, 11, 11, 67, 67, 67, 135, 135, 135, 11, 11, 137, - 11, 11, 35, 36, 39, 67, 16, 11, 8, 8, 49, 16, 16, 26, 67, 138, - 27, 27, 27, 27, 27, 27, 27, 27, 103, 103, 103, 103, 103, 103, 103, 103, - 103, 139, 140, 103, 141, 67, 44, 44, 8, 8, 142, 67, 67, 8, 67, 67, - 142, 26, 67, 142, 67, 67, 67, 142, 67, 67, 67, 67, 67, 67, 67, 8, - 67, 142, 142, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 67, 67, 67, 67, 4, 4, 67, 67, - 8, 67, 67, 67, 143, 144, 67, 67, 67, 67, 67, 67, 67, 67, 142, 67, - 67, 67, 67, 67, 67, 26, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 91, 67, 67, 67, 91, 44, 44, 44, 44, - 67, 67, 67, 67, 67, 91, 44, 44, 27, 27, 27, 27, 27, 27, 67, 67, - 67, 67, 67, 67, 67, 27, 27, 27, 67, 67, 67, 26, 67, 67, 67, 67, - 26, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, - 67, 67, 67, 67, 67, 67, 67, 26, 67, 67, 67, 67, 4, 4, 4, 4, - 4, 4, 4, 27, 27, 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, - 8, 8, 127, 145, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, - 8, 127, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 145, 8, 8, 8, - 8, 8, 8, 8, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, - 8, 8, 142, 26, 8, 8, 142, 67, 67, 67, 44, 67, 67, 67, 67, 67, - 67, 67, 67, 44, 67, 67, 67, 67, 67, 67, 67, 67, 67, 44, 56, 67, - 67, 67, 67, 67, 91, 67, 67, 67, 67, 44, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 44, 67, 67, 11, 11, 11, 11, 11, 11, 11, 47, - 16, 16, 16, 16, 16, 16, 16, 108, 32, 11, 32, 34, 34, 34, 34, 11, - 32, 32, 34, 16, 16, 16, 40, 11, 32, 32, 138, 67, 67, 136, 34, 147, - 43, 32, 44, 44, 54, 2, 97, 2, 16, 16, 16, 53, 44, 44, 53, 44, - 36, 36, 36, 36, 44, 44, 44, 52, 64, 44, 44, 44, 44, 44, 44, 58, - 36, 36, 36, 62, 44, 44, 44, 44, 36, 36, 36, 62, 36, 36, 36, 62, - 2, 119, 119, 2, 123, 124, 119, 2, 2, 2, 2, 6, 2, 105, 119, 2, - 119, 4, 4, 4, 4, 2, 2, 87, 2, 2, 2, 2, 2, 118, 2, 2, - 105, 148, 64, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 56, 67, 67, - 67, 67, 44, 44, 44, 44, 44, 44, 67, 67, 67, 44, 44, 44, 44, 44, - 67, 67, 67, 67, 67, 67, 44, 44, 1, 2, 149, 150, 4, 4, 4, 4, - 4, 67, 4, 4, 4, 4, 151, 152, 153, 103, 103, 103, 103, 43, 43, 85, - 154, 40, 40, 67, 103, 155, 63, 67, 36, 36, 36, 62, 58, 156, 157, 69, - 36, 36, 36, 36, 36, 63, 40, 69, 44, 44, 81, 36, 36, 36, 36, 36, - 67, 27, 27, 67, 67, 67, 67, 67, 27, 27, 27, 27, 27, 67, 67, 67, - 67, 67, 67, 67, 27, 27, 27, 27, 158, 27, 27, 27, 27, 27, 27, 27, - 36, 36, 106, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 159, 2, - 7, 7, 7, 7, 7, 36, 44, 44, 32, 32, 32, 32, 32, 32, 32, 70, - 51, 160, 43, 43, 43, 43, 43, 87, 32, 32, 32, 32, 32, 32, 40, 43, - 36, 36, 36, 103, 103, 103, 103, 103, 43, 2, 2, 2, 44, 44, 44, 44, - 41, 41, 41, 157, 40, 40, 40, 40, 41, 32, 32, 32, 32, 32, 32, 32, - 16, 32, 32, 32, 32, 32, 32, 32, 45, 16, 16, 16, 34, 34, 34, 32, - 32, 32, 32, 32, 42, 161, 34, 35, 32, 32, 16, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 11, 11, 47, 11, 11, 32, 32, 44, 44, 44, 44, - 44, 44, 44, 81, 40, 35, 36, 36, 36, 71, 36, 71, 36, 70, 36, 36, - 36, 92, 86, 84, 67, 67, 44, 44, 27, 27, 27, 67, 162, 44, 44, 44, - 36, 36, 2, 2, 44, 44, 44, 44, 85, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 85, 85, 85, 85, 85, 85, 85, 85, 43, 44, 44, 44, 44, 2, - 43, 36, 36, 36, 2, 72, 72, 44, 36, 36, 36, 43, 43, 43, 43, 2, - 36, 36, 36, 70, 43, 43, 43, 43, 43, 85, 44, 44, 44, 44, 44, 54, - 36, 70, 85, 43, 43, 85, 84, 85, 163, 2, 2, 2, 2, 2, 2, 52, - 7, 7, 7, 7, 7, 44, 44, 2, 36, 36, 70, 69, 36, 36, 36, 36, - 7, 7, 7, 7, 7, 36, 36, 62, 36, 36, 36, 36, 70, 43, 43, 84, - 86, 84, 86, 80, 44, 44, 44, 44, 36, 70, 36, 36, 36, 36, 84, 44, - 7, 7, 7, 7, 7, 44, 2, 2, 69, 36, 36, 77, 67, 92, 84, 36, - 71, 43, 71, 70, 71, 36, 36, 43, 70, 62, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 81, 106, 2, 36, 36, 36, 36, 36, 92, 43, 85, - 2, 106, 164, 80, 44, 44, 44, 44, 81, 36, 36, 62, 81, 36, 36, 62, - 81, 36, 36, 62, 44, 44, 44, 44, 16, 16, 16, 16, 16, 112, 40, 40, - 16, 16, 16, 44, 44, 44, 44, 44, 36, 92, 86, 85, 84, 163, 86, 44, - 36, 36, 44, 44, 44, 44, 44, 44, 36, 36, 36, 62, 44, 81, 36, 36, - 165, 165, 165, 165, 165, 165, 165, 165, 166, 166, 166, 166, 166, 166, 166, 166, - 16, 16, 16, 108, 44, 44, 44, 44, 44, 53, 16, 16, 44, 44, 81, 71, - 36, 36, 36, 36, 167, 36, 36, 36, 36, 36, 36, 62, 36, 36, 62, 62, - 36, 81, 62, 36, 36, 36, 36, 36, 36, 41, 41, 41, 41, 41, 41, 41, - 41, 44, 44, 44, 44, 44, 44, 44, 44, 81, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 146, 44, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 162, 44, 2, 2, 2, 168, 128, 44, 44, 44, - 6, 169, 170, 146, 146, 146, 146, 146, 146, 146, 128, 168, 128, 2, 125, 171, - 2, 64, 2, 2, 151, 146, 146, 128, 2, 172, 8, 173, 66, 2, 44, 44, - 36, 36, 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 79, - 54, 2, 3, 2, 4, 5, 6, 2, 16, 16, 16, 16, 16, 17, 18, 127, - 128, 4, 2, 36, 36, 36, 36, 36, 69, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 40, 44, 36, 36, 36, 44, 36, 36, 36, - 44, 36, 36, 36, 44, 36, 62, 44, 20, 174, 57, 132, 26, 8, 142, 91, - 44, 44, 44, 44, 79, 65, 67, 44, 36, 36, 36, 36, 36, 36, 81, 36, - 36, 36, 36, 36, 36, 62, 36, 81, 2, 64, 44, 175, 27, 27, 27, 27, - 27, 27, 44, 56, 67, 67, 67, 67, 103, 103, 141, 27, 90, 67, 67, 67, - 67, 67, 67, 67, 67, 27, 67, 91, 91, 44, 44, 44, 44, 44, 44, 44, - 67, 67, 67, 67, 67, 67, 50, 44, 176, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 44, 44, 27, 27, 44, 44, 44, 44, 44, 44, - 150, 36, 36, 36, 36, 177, 44, 44, 36, 36, 36, 43, 43, 80, 44, 44, - 36, 36, 36, 36, 36, 36, 36, 54, 36, 36, 44, 44, 36, 36, 36, 36, - 178, 103, 103, 44, 44, 44, 44, 44, 11, 11, 11, 11, 16, 16, 16, 16, - 11, 11, 44, 44, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 44, 44, - 36, 36, 44, 44, 44, 44, 44, 54, 36, 36, 36, 44, 62, 36, 36, 36, - 36, 36, 36, 81, 62, 44, 62, 81, 36, 36, 36, 54, 27, 27, 27, 27, - 36, 36, 36, 77, 158, 27, 27, 27, 44, 44, 44, 175, 27, 27, 27, 27, - 36, 62, 36, 44, 44, 175, 27, 27, 36, 36, 36, 27, 27, 27, 44, 54, - 36, 36, 36, 36, 36, 44, 44, 54, 36, 36, 36, 36, 44, 44, 27, 36, - 44, 27, 27, 27, 27, 27, 27, 27, 70, 43, 58, 80, 44, 44, 43, 43, - 36, 36, 81, 36, 81, 36, 36, 36, 36, 36, 44, 44, 43, 80, 44, 58, - 27, 27, 27, 27, 44, 44, 44, 44, 2, 2, 2, 2, 64, 44, 44, 44, - 36, 36, 36, 36, 36, 36, 179, 30, 36, 36, 36, 36, 36, 36, 179, 27, - 36, 36, 36, 36, 78, 36, 36, 36, 36, 36, 70, 80, 44, 175, 27, 27, - 2, 2, 2, 64, 44, 44, 44, 44, 36, 36, 36, 44, 54, 2, 2, 2, - 36, 36, 36, 44, 27, 27, 27, 27, 36, 62, 44, 44, 27, 27, 27, 27, - 36, 44, 44, 44, 54, 2, 64, 44, 44, 44, 44, 44, 175, 27, 27, 27, - 36, 36, 36, 36, 62, 44, 44, 44, 11, 47, 44, 44, 44, 44, 44, 44, - 16, 108, 44, 44, 44, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 98, - 86, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, - 43, 43, 43, 61, 2, 2, 2, 44, 27, 27, 27, 7, 7, 7, 7, 7, - 44, 44, 44, 44, 44, 44, 44, 58, 85, 86, 43, 84, 86, 61, 180, 2, - 2, 44, 44, 44, 44, 44, 44, 44, 43, 71, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 70, 43, 43, 86, 43, 43, 43, 80, 7, 7, 7, 7, 7, - 2, 2, 44, 44, 44, 44, 44, 44, 36, 70, 2, 62, 44, 44, 44, 44, - 36, 92, 85, 43, 43, 43, 43, 84, 96, 36, 63, 2, 2, 43, 61, 44, - 7, 7, 7, 7, 7, 63, 63, 2, 175, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 98, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 85, 86, - 43, 85, 84, 43, 2, 2, 2, 80, 36, 36, 36, 62, 62, 36, 36, 81, - 36, 36, 36, 36, 36, 36, 36, 81, 36, 36, 36, 36, 63, 44, 44, 44, - 36, 36, 36, 36, 36, 36, 36, 70, 85, 86, 43, 43, 43, 80, 44, 44, - 43, 85, 81, 36, 36, 36, 62, 81, 84, 85, 89, 88, 89, 88, 85, 44, - 62, 44, 44, 88, 44, 44, 81, 36, 36, 85, 44, 43, 43, 43, 80, 44, - 43, 43, 80, 44, 44, 44, 44, 44, 36, 36, 92, 85, 43, 43, 43, 43, - 85, 43, 84, 71, 36, 63, 2, 2, 7, 7, 7, 7, 7, 54, 54, 44, - 85, 86, 43, 43, 84, 84, 85, 86, 84, 43, 36, 72, 44, 44, 44, 44, - 36, 36, 36, 36, 36, 36, 36, 92, 85, 43, 43, 44, 85, 85, 43, 86, - 61, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 36, 36, 43, 44, - 85, 86, 43, 43, 43, 84, 86, 86, 61, 2, 62, 44, 44, 44, 44, 44, - 2, 2, 2, 2, 2, 2, 64, 44, 36, 36, 36, 36, 36, 70, 86, 85, - 43, 43, 43, 86, 44, 44, 44, 44, 36, 36, 36, 36, 36, 44, 58, 43, - 85, 43, 43, 86, 43, 43, 44, 44, 7, 7, 7, 7, 7, 27, 2, 95, - 27, 98, 44, 44, 44, 44, 44, 81, 43, 43, 43, 80, 43, 43, 43, 86, - 63, 2, 2, 44, 44, 44, 44, 44, 2, 36, 36, 36, 36, 36, 36, 36, - 44, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 88, 43, 43, 43, - 84, 43, 86, 80, 44, 44, 44, 44, 103, 103, 103, 103, 103, 103, 103, 177, - 2, 2, 64, 44, 44, 44, 44, 44, 43, 43, 61, 44, 44, 44, 44, 44, - 43, 43, 43, 61, 2, 2, 67, 67, 40, 40, 95, 44, 44, 44, 44, 44, - 7, 7, 7, 7, 7, 175, 27, 27, 27, 81, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 44, 44, 81, 36, 92, 85, 85, 85, 85, 85, 85, 85, - 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 89, - 43, 74, 40, 40, 40, 40, 40, 40, 82, 44, 44, 44, 44, 44, 44, 44, - 36, 62, 44, 44, 44, 44, 44, 44, 36, 44, 44, 44, 44, 44, 44, 44, - 36, 36, 36, 36, 36, 44, 50, 61, 65, 65, 44, 44, 44, 44, 44, 44, - 67, 67, 67, 91, 56, 67, 67, 67, 67, 67, 181, 86, 43, 67, 181, 85, - 85, 182, 65, 65, 65, 83, 43, 43, 43, 76, 50, 43, 43, 43, 67, 67, - 67, 67, 67, 67, 67, 43, 43, 67, 67, 67, 67, 67, 91, 44, 44, 44, - 67, 43, 76, 44, 44, 44, 44, 44, 27, 44, 44, 44, 44, 44, 44, 44, - 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 16, 16, 16, 108, 16, 16, 16, 16, 16, - 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 47, 11, - 44, 47, 48, 47, 48, 11, 47, 11, 11, 11, 11, 16, 16, 53, 53, 16, - 16, 16, 53, 16, 16, 16, 16, 16, 16, 16, 11, 48, 11, 47, 48, 11, - 11, 11, 47, 11, 11, 11, 47, 16, 16, 16, 16, 16, 11, 48, 11, 47, - 11, 11, 47, 47, 44, 11, 11, 11, 47, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, - 16, 16, 16, 44, 11, 11, 11, 11, 31, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, - 11, 11, 31, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 33, - 16, 16, 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 31, - 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 31, 16, 16, 16, - 16, 33, 16, 16, 16, 32, 44, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 43, 43, 43, 76, 67, 50, 43, 43, 43, 43, 43, 43, 43, 43, 76, 67, - 67, 67, 50, 67, 67, 67, 67, 67, 67, 67, 76, 21, 2, 2, 44, 44, - 44, 44, 44, 44, 44, 58, 43, 43, 43, 43, 43, 80, 43, 43, 43, 43, - 43, 43, 43, 43, 80, 58, 43, 43, 43, 58, 80, 43, 43, 80, 44, 44, - 36, 36, 62, 175, 27, 27, 27, 27, 43, 43, 43, 80, 44, 44, 44, 44, - 16, 16, 43, 43, 43, 80, 44, 44, 36, 36, 81, 36, 36, 36, 36, 36, - 81, 62, 62, 81, 81, 36, 36, 36, 36, 62, 36, 36, 81, 81, 44, 44, - 44, 62, 44, 81, 81, 81, 81, 36, 81, 62, 62, 81, 81, 81, 81, 81, - 81, 62, 62, 81, 36, 62, 36, 36, 36, 62, 36, 36, 81, 36, 62, 62, - 36, 36, 36, 36, 36, 81, 36, 36, 81, 36, 81, 36, 36, 81, 36, 36, - 8, 44, 44, 44, 44, 44, 44, 44, 56, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 91, 44, 44, 44, 44, 67, 67, 67, 67, 67, - 67, 91, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 25, 41, 41, - 67, 67, 91, 44, 44, 44, 44, 44, 67, 67, 67, 67, 44, 44, 44, 44, - 67, 67, 67, 67, 67, 67, 67, 44, 91, 56, 67, 67, 67, 67, 67, 91, - 79, 44, 44, 44, 44, 44, 44, 44, 65, 65, 65, 65, 65, 65, 65, 65, - 166, 166, 166, 166, 166, 166, 166, 44, -}; - -static RE_UINT8 re_general_category_stage_5[] = { - 15, 15, 12, 23, 23, 23, 25, 23, 20, 21, 23, 24, 23, 19, 9, 9, - 24, 24, 24, 23, 23, 1, 1, 1, 1, 20, 23, 21, 26, 22, 26, 2, - 2, 2, 2, 20, 24, 21, 24, 15, 25, 25, 27, 23, 26, 27, 5, 28, - 24, 16, 27, 26, 27, 24, 11, 11, 26, 11, 5, 29, 11, 23, 1, 24, - 1, 2, 2, 24, 2, 1, 2, 5, 5, 5, 1, 3, 3, 2, 5, 2, - 4, 4, 26, 26, 4, 26, 6, 6, 0, 0, 4, 2, 1, 23, 1, 0, - 0, 1, 24, 1, 27, 6, 7, 7, 0, 4, 0, 2, 0, 23, 19, 0, - 0, 27, 27, 25, 0, 6, 19, 6, 23, 6, 6, 23, 5, 0, 5, 23, - 23, 0, 16, 16, 23, 25, 27, 27, 16, 0, 4, 5, 5, 6, 6, 5, - 23, 5, 6, 16, 6, 4, 4, 6, 6, 27, 5, 27, 27, 5, 0, 16, - 6, 0, 0, 5, 4, 0, 16, 6, 6, 8, 8, 8, 8, 6, 23, 4, - 0, 8, 8, 0, 11, 27, 27, 0, 5, 8, 11, 5, 0, 25, 23, 27, - 8, 5, 23, 11, 11, 0, 19, 5, 12, 5, 5, 20, 21, 0, 10, 10, - 10, 5, 19, 23, 5, 4, 7, 0, 2, 0, 2, 4, 3, 3, 3, 26, - 2, 26, 0, 26, 1, 26, 26, 0, 12, 12, 12, 16, 19, 19, 28, 29, - 20, 28, 13, 14, 16, 12, 23, 28, 29, 23, 23, 22, 22, 23, 24, 20, - 21, 23, 23, 12, 11, 4, 21, 4, 25, 0, 6, 7, 7, 6, 1, 27, - 27, 1, 27, 2, 2, 27, 10, 1, 2, 10, 10, 11, 24, 27, 27, 20, - 21, 27, 21, 24, 21, 20, 2, 6, 20, 23, 27, 4, 5, 10, 19, 20, - 21, 21, 27, 10, 19, 4, 10, 4, 6, 26, 26, 4, 27, 11, 4, 23, - 7, 23, 26, 1, 25, 27, 8, 23, 4, 8, 18, 18, 17, 17, 5, 24, - 23, 20, 19, 22, 22, 20, 22, 22, 24, 19, 24, 0, 24, 26, 0, 11, - 6, 11, 10, 0, 23, 10, 5, 11, 23, 16, 27, 8, 8, 16, -}; - -/* General_Category: 9926 bytes. */ - -RE_UINT32 re_get_general_category(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_general_category_stage_1[f] << 5; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_general_category_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_general_category_stage_3[pos + f] << 3; - f = code >> 1; - code ^= f << 1; - pos = (RE_UINT32)re_general_category_stage_4[pos + f] << 1; - value = re_general_category_stage_5[pos + code]; - - return value; -} - -/* Block. */ - -static RE_UINT8 re_block_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 10, 11, 12, 12, 12, 12, 13, 14, 15, 15, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 22, 24, 22, 22, 22, 22, 25, 26, 26, - 26, 27, 22, 22, 22, 22, 28, 29, 22, 22, 30, 31, 32, 33, 34, 35, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 37, 38, 39, 40, 41, 42, 22, 22, 22, 22, 22, 43, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 44, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, - 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, -}; - -static RE_UINT8 re_block_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 10, 11, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, - 29, 30, 31, 31, 32, 32, 32, 33, 34, 34, 34, 34, 34, 35, 36, 37, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 50, 51, 51, - 52, 53, 54, 55, 56, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, - 65, 65, 66, 67, 68, 68, 69, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 82, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, - 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, - 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 85, 86, 86, 86, 86, - 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, - 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 89, 89, 90, 91, 92, 93, - 94, 95, 96, 97, 98, 99, 100, 101, 102, 102, 102, 102, 102, 102, 102, 102, - 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, - 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 103, - 104, 104, 104, 104, 104, 104, 104, 105, 106, 106, 106, 106, 106, 106, 106, 106, - 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, - 107, 107, 108, 108, 108, 108, 109, 110, 110, 110, 110, 110, 111, 112, 113, 114, - 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 119, 126, 126, 126, 119, - 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 119, 119, 137, 119, 119, 119, - 138, 139, 140, 141, 142, 143, 144, 119, 145, 146, 119, 147, 148, 149, 150, 119, - 119, 151, 119, 119, 119, 152, 119, 119, 153, 154, 119, 119, 119, 119, 119, 119, - 155, 155, 155, 155, 155, 155, 155, 155, 156, 157, 158, 119, 119, 119, 119, 119, - 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 159, 159, 159, 159, 159, 159, 159, 159, 160, 119, 119, 119, 119, 119, 119, 119, - 119, 119, 119, 119, 119, 119, 119, 119, 161, 161, 161, 161, 161, 119, 119, 119, - 162, 162, 162, 162, 163, 164, 165, 166, 119, 119, 119, 119, 119, 119, 167, 168, - 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, - 170, 170, 170, 170, 170, 170, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 171, 171, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 119, 119, 119, 119, 119, 119, 119, 119, 172, 173, 119, 119, 119, 119, 119, 119, - 174, 174, 175, 175, 176, 119, 177, 119, 178, 178, 178, 178, 178, 178, 178, 178, - 179, 179, 179, 179, 179, 180, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 181, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 182, 183, 184, 119, 119, 119, 119, 119, 119, 119, 119, 119, 185, 185, 119, 119, - 186, 187, 188, 188, 189, 189, 190, 190, 190, 190, 190, 190, 191, 192, 193, 194, - 195, 195, 196, 196, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, - 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 198, 199, 199, - 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, - 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 200, 201, - 202, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, - 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, - 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 204, 119, 119, - 205, 205, 205, 205, 206, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 207, 119, 208, 209, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, - 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, -}; - -static RE_UINT16 re_block_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, - 10, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, - 13, 13, 13, 13, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, - 17, 17, 17, 17, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, - 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, - 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, - 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, - 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, - 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 36, - 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 42, 42, 42, 42, 42, 42, - 43, 43, 44, 44, 45, 45, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, - 50, 50, 50, 50, 50, 51, 51, 51, 52, 52, 52, 52, 52, 52, 53, 53, - 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, - 57, 57, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, - 60, 60, 60, 60, 60, 61, 61, 61, 62, 19, 19, 19, 63, 64, 64, 64, - 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, - 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, - 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, - 74, 74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, - 77, 77, 77, 77, 77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, - 79, 79, 79, 79, 80, 80, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, - 82, 82, 82, 82, 82, 82, 82, 82, 83, 83, 84, 84, 84, 84, 84, 84, - 85, 85, 85, 85, 85, 85, 85, 85, 86, 86, 86, 86, 86, 86, 86, 86, - 86, 86, 86, 86, 87, 87, 87, 88, 89, 89, 89, 89, 89, 89, 89, 89, - 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, 91, 91, - 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, - 94, 94, 94, 94, 94, 94, 95, 95, 96, 96, 96, 96, 96, 96, 96, 96, - 97, 97, 97, 98, 98, 98, 98, 98, 99, 99, 99, 99, 99, 99, 100, 100, - 101, 101, 101, 101, 101, 101, 101, 101, 102, 102, 102, 102, 102, 102, 102, 102, - 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 19, 104, - 105, 105, 105, 105, 106, 106, 106, 106, 106, 106, 107, 107, 107, 107, 107, 107, - 108, 108, 108, 109, 109, 109, 109, 109, 109, 110, 111, 111, 112, 112, 112, 113, - 114, 114, 114, 114, 114, 114, 114, 114, 115, 115, 115, 115, 115, 115, 115, 115, - 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 117, 117, 117, 117, - 118, 118, 118, 118, 118, 118, 118, 118, 119, 119, 119, 119, 119, 119, 119, 119, - 119, 120, 120, 120, 120, 121, 121, 121, 122, 122, 122, 122, 122, 122, 122, 122, - 122, 122, 122, 122, 123, 123, 123, 123, 123, 123, 124, 124, 124, 124, 124, 124, - 125, 125, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, - 127, 127, 127, 128, 129, 129, 129, 129, 130, 130, 130, 130, 130, 130, 131, 131, - 132, 132, 132, 133, 133, 133, 134, 134, 135, 135, 135, 135, 135, 135, 136, 136, - 137, 137, 137, 137, 137, 137, 138, 138, 139, 139, 139, 139, 139, 139, 140, 140, - 141, 141, 141, 142, 142, 142, 142, 143, 143, 143, 143, 143, 144, 144, 144, 144, - 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 146, 146, 146, 146, 146, - 147, 147, 147, 147, 147, 147, 147, 147, 148, 148, 148, 148, 148, 148, 148, 148, - 149, 149, 149, 149, 149, 149, 149, 149, 150, 150, 150, 150, 150, 150, 150, 150, - 151, 151, 151, 151, 151, 151, 151, 151, 152, 152, 152, 152, 152, 153, 153, 153, - 153, 153, 153, 153, 153, 153, 153, 153, 154, 155, 156, 157, 157, 158, 158, 159, - 159, 159, 159, 159, 159, 159, 159, 159, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 161, 162, 162, 162, 162, 162, 162, 162, 162, - 163, 163, 163, 163, 163, 163, 163, 163, 164, 164, 164, 164, 165, 165, 165, 165, - 165, 166, 166, 166, 166, 167, 167, 167, 19, 19, 19, 19, 19, 19, 19, 19, - 168, 168, 169, 169, 169, 169, 170, 170, 171, 171, 171, 172, 172, 173, 173, 173, - 174, 174, 175, 175, 175, 175, 19, 19, 176, 176, 176, 176, 176, 177, 177, 177, - 178, 178, 178, 179, 179, 179, 179, 179, 180, 180, 180, 181, 181, 181, 181, 19, - 182, 182, 182, 182, 182, 182, 182, 182, 183, 183, 183, 183, 184, 184, 185, 185, - 186, 186, 186, 19, 19, 19, 187, 187, 188, 188, 189, 189, 19, 19, 19, 19, - 190, 190, 191, 191, 191, 191, 191, 191, 192, 192, 192, 192, 192, 192, 193, 193, - 194, 194, 19, 19, 195, 195, 195, 195, 196, 196, 196, 196, 197, 197, 198, 198, - 199, 199, 199, 19, 19, 19, 19, 19, 200, 200, 200, 200, 200, 19, 19, 19, - 201, 201, 201, 201, 201, 201, 201, 201, 19, 19, 19, 19, 19, 19, 202, 202, - 203, 203, 203, 203, 203, 203, 203, 203, 204, 204, 204, 204, 204, 205, 205, 205, - 206, 206, 206, 206, 206, 207, 207, 207, 208, 208, 208, 208, 208, 208, 209, 209, - 210, 210, 210, 210, 210, 19, 19, 19, 211, 211, 211, 212, 212, 212, 212, 212, - 213, 213, 213, 213, 213, 213, 213, 213, 214, 214, 214, 214, 214, 214, 214, 214, - 215, 215, 215, 215, 215, 215, 19, 19, 216, 216, 216, 216, 216, 216, 216, 216, - 217, 217, 217, 217, 217, 217, 218, 218, 219, 219, 219, 219, 219, 19, 19, 19, - 220, 220, 220, 220, 19, 19, 19, 19, 19, 19, 221, 221, 221, 221, 221, 221, - 19, 19, 19, 19, 222, 222, 222, 222, 223, 223, 223, 223, 223, 223, 223, 224, - 224, 224, 224, 224, 19, 19, 19, 19, 225, 225, 225, 225, 225, 225, 225, 225, - 226, 226, 226, 226, 226, 226, 226, 226, 227, 227, 227, 227, 227, 227, 227, 227, - 227, 227, 227, 227, 227, 19, 19, 19, 228, 228, 228, 228, 228, 228, 228, 228, - 228, 228, 228, 19, 19, 19, 19, 19, 229, 229, 229, 229, 229, 229, 229, 229, - 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 231, 231, 231, 19, - 19, 19, 19, 19, 19, 232, 232, 232, 233, 233, 233, 233, 233, 233, 233, 233, - 233, 19, 19, 19, 19, 19, 19, 19, 234, 234, 234, 234, 234, 234, 234, 234, - 234, 234, 19, 19, 19, 19, 235, 235, 236, 236, 236, 236, 236, 236, 236, 236, - 237, 237, 237, 237, 237, 237, 237, 237, 238, 238, 238, 238, 238, 238, 238, 238, - 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 240, 19, 19, 19, 19, 19, - 241, 241, 241, 241, 241, 241, 241, 241, 242, 242, 242, 242, 242, 242, 242, 242, - 243, 243, 243, 243, 243, 19, 19, 19, 244, 244, 244, 244, 244, 244, 245, 245, - 246, 246, 246, 246, 246, 246, 246, 246, 247, 247, 247, 247, 247, 247, 247, 247, - 247, 247, 247, 19, 19, 19, 19, 19, 248, 248, 248, 19, 19, 19, 19, 19, - 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 19, 19, - 250, 250, 250, 250, 250, 250, 19, 19, 251, 251, 251, 251, 251, 251, 251, 251, - 252, 252, 252, 253, 253, 253, 253, 253, 253, 253, 254, 254, 254, 254, 254, 254, - 255, 255, 255, 255, 255, 255, 255, 255, 256, 256, 256, 256, 256, 256, 256, 256, - 257, 257, 257, 257, 257, 257, 257, 257, 258, 258, 258, 258, 258, 259, 259, 259, - 260, 260, 260, 260, 260, 260, 260, 260, 261, 261, 261, 261, 261, 261, 261, 261, - 262, 262, 262, 262, 262, 262, 262, 262, 263, 263, 263, 263, 263, 263, 263, 263, - 264, 264, 264, 264, 264, 264, 264, 264, 265, 265, 265, 265, 265, 265, 265, 265, - 265, 265, 265, 265, 265, 265, 19, 19, 266, 266, 266, 266, 266, 266, 266, 266, - 266, 266, 266, 266, 267, 267, 267, 267, 267, 267, 267, 267, 267, 267, 267, 267, - 267, 267, 268, 268, 268, 268, 268, 268, 268, 268, 268, 268, 268, 268, 268, 268, - 268, 268, 268, 19, 19, 19, 19, 19, 269, 269, 269, 269, 269, 269, 269, 269, - 269, 269, 19, 19, 19, 19, 19, 19, 270, 270, 270, 270, 270, 270, 270, 270, - 271, 271, 271, 271, 271, 271, 271, 271, 271, 271, 271, 271, 271, 271, 271, 19, - 272, 272, 272, 272, 272, 272, 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, -}; - -static RE_UINT16 re_block_stage_4[] = { - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, - 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, - 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, - 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, - 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, - 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, - 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, - 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, - 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, - 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, - 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, - 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, - 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, - 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, - 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, - 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, - 76, 76, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, - 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, - 84, 84, 84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 87, 87, 87, 87, - 88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, - 92, 92, 92, 92, 93, 93, 93, 93, 94, 94, 94, 94, 95, 95, 95, 95, - 96, 96, 96, 96, 97, 97, 97, 97, 98, 98, 98, 98, 99, 99, 99, 99, - 100, 100, 100, 100, 101, 101, 101, 101, 102, 102, 102, 102, 103, 103, 103, 103, - 104, 104, 104, 104, 105, 105, 105, 105, 106, 106, 106, 106, 107, 107, 107, 107, - 108, 108, 108, 108, 109, 109, 109, 109, 110, 110, 110, 110, 111, 111, 111, 111, - 112, 112, 112, 112, 113, 113, 113, 113, 114, 114, 114, 114, 115, 115, 115, 115, - 116, 116, 116, 116, 117, 117, 117, 117, 118, 118, 118, 118, 119, 119, 119, 119, - 120, 120, 120, 120, 121, 121, 121, 121, 122, 122, 122, 122, 123, 123, 123, 123, - 124, 124, 124, 124, 125, 125, 125, 125, 126, 126, 126, 126, 127, 127, 127, 127, - 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, - 132, 132, 132, 132, 133, 133, 133, 133, 134, 134, 134, 134, 135, 135, 135, 135, - 136, 136, 136, 136, 137, 137, 137, 137, 138, 138, 138, 138, 139, 139, 139, 139, - 140, 140, 140, 140, 141, 141, 141, 141, 142, 142, 142, 142, 143, 143, 143, 143, - 144, 144, 144, 144, 145, 145, 145, 145, 146, 146, 146, 146, 147, 147, 147, 147, - 148, 148, 148, 148, 149, 149, 149, 149, 150, 150, 150, 150, 151, 151, 151, 151, - 152, 152, 152, 152, 153, 153, 153, 153, 154, 154, 154, 154, 155, 155, 155, 155, - 156, 156, 156, 156, 157, 157, 157, 157, 158, 158, 158, 158, 159, 159, 159, 159, - 160, 160, 160, 160, 161, 161, 161, 161, 162, 162, 162, 162, 163, 163, 163, 163, - 164, 164, 164, 164, 165, 165, 165, 165, 166, 166, 166, 166, 167, 167, 167, 167, - 168, 168, 168, 168, 169, 169, 169, 169, 170, 170, 170, 170, 171, 171, 171, 171, - 172, 172, 172, 172, 173, 173, 173, 173, 174, 174, 174, 174, 175, 175, 175, 175, - 176, 176, 176, 176, 177, 177, 177, 177, 178, 178, 178, 178, 179, 179, 179, 179, - 180, 180, 180, 180, 181, 181, 181, 181, 182, 182, 182, 182, 183, 183, 183, 183, - 184, 184, 184, 184, 185, 185, 185, 185, 186, 186, 186, 186, 187, 187, 187, 187, - 188, 188, 188, 188, 189, 189, 189, 189, 190, 190, 190, 190, 191, 191, 191, 191, - 192, 192, 192, 192, 193, 193, 193, 193, 194, 194, 194, 194, 195, 195, 195, 195, - 196, 196, 196, 196, 197, 197, 197, 197, 198, 198, 198, 198, 199, 199, 199, 199, - 200, 200, 200, 200, 201, 201, 201, 201, 202, 202, 202, 202, 203, 203, 203, 203, - 204, 204, 204, 204, 205, 205, 205, 205, 206, 206, 206, 206, 207, 207, 207, 207, - 208, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 210, 211, 211, 211, 211, - 212, 212, 212, 212, 213, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 215, - 216, 216, 216, 216, 217, 217, 217, 217, 218, 218, 218, 218, 219, 219, 219, 219, - 220, 220, 220, 220, 221, 221, 221, 221, 222, 222, 222, 222, 223, 223, 223, 223, - 224, 224, 224, 224, 225, 225, 225, 225, 226, 226, 226, 226, 227, 227, 227, 227, - 228, 228, 228, 228, 229, 229, 229, 229, 230, 230, 230, 230, 231, 231, 231, 231, - 232, 232, 232, 232, 233, 233, 233, 233, 234, 234, 234, 234, 235, 235, 235, 235, - 236, 236, 236, 236, 237, 237, 237, 237, 238, 238, 238, 238, 239, 239, 239, 239, - 240, 240, 240, 240, 241, 241, 241, 241, 242, 242, 242, 242, 243, 243, 243, 243, - 244, 244, 244, 244, 245, 245, 245, 245, 246, 246, 246, 246, 247, 247, 247, 247, - 248, 248, 248, 248, 249, 249, 249, 249, 250, 250, 250, 250, 251, 251, 251, 251, - 252, 252, 252, 252, 253, 253, 253, 253, 254, 254, 254, 254, 255, 255, 255, 255, - 256, 256, 256, 256, 257, 257, 257, 257, 258, 258, 258, 258, 259, 259, 259, 259, - 260, 260, 260, 260, 261, 261, 261, 261, 262, 262, 262, 262, 263, 263, 263, 263, - 264, 264, 264, 264, 265, 265, 265, 265, 266, 266, 266, 266, 267, 267, 267, 267, - 268, 268, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, - 272, 272, 272, 272, 273, 273, 273, 273, -}; - -static RE_UINT16 re_block_stage_5[] = { - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, - 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, - 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, - 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, - 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 0, 0, 0, 0, - 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, - 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, - 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, - 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, - 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, - 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, - 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, - 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, - 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, - 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, - 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, - 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, - 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, - 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, - 76, 76, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, - 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, - 84, 84, 84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 87, 87, 87, 87, - 88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, - 92, 92, 92, 92, 93, 93, 93, 93, 94, 94, 94, 94, 95, 95, 95, 95, - 96, 96, 96, 96, 97, 97, 97, 97, 98, 98, 98, 98, 99, 99, 99, 99, - 100, 100, 100, 100, 101, 101, 101, 101, 102, 102, 102, 102, 103, 103, 103, 103, - 104, 104, 104, 104, 105, 105, 105, 105, 106, 106, 106, 106, 107, 107, 107, 107, - 108, 108, 108, 108, 109, 109, 109, 109, 110, 110, 110, 110, 111, 111, 111, 111, - 112, 112, 112, 112, 113, 113, 113, 113, 114, 114, 114, 114, 115, 115, 115, 115, - 116, 116, 116, 116, 117, 117, 117, 117, 118, 118, 118, 118, 119, 119, 119, 119, - 120, 120, 120, 120, 121, 121, 121, 121, 122, 122, 122, 122, 123, 123, 123, 123, - 124, 124, 124, 124, 125, 125, 125, 125, 126, 126, 126, 126, 127, 127, 127, 127, - 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, - 132, 132, 132, 132, 133, 133, 133, 133, 134, 134, 134, 134, 135, 135, 135, 135, - 136, 136, 136, 136, 137, 137, 137, 137, 138, 138, 138, 138, 139, 139, 139, 139, - 140, 140, 140, 140, 141, 141, 141, 141, 142, 142, 142, 142, 143, 143, 143, 143, - 144, 144, 144, 144, 145, 145, 145, 145, 146, 146, 146, 146, 147, 147, 147, 147, - 148, 148, 148, 148, 149, 149, 149, 149, 150, 150, 150, 150, 151, 151, 151, 151, - 152, 152, 152, 152, 153, 153, 153, 153, 154, 154, 154, 154, 155, 155, 155, 155, - 156, 156, 156, 156, 157, 157, 157, 157, 158, 158, 158, 158, 159, 159, 159, 159, - 160, 160, 160, 160, 161, 161, 161, 161, 162, 162, 162, 162, 163, 163, 163, 163, - 164, 164, 164, 164, 165, 165, 165, 165, 166, 166, 166, 166, 167, 167, 167, 167, - 168, 168, 168, 168, 169, 169, 169, 169, 170, 170, 170, 170, 171, 171, 171, 171, - 172, 172, 172, 172, 173, 173, 173, 173, 174, 174, 174, 174, 175, 175, 175, 175, - 176, 176, 176, 176, 177, 177, 177, 177, 178, 178, 178, 178, 179, 179, 179, 179, - 180, 180, 180, 180, 181, 181, 181, 181, 182, 182, 182, 182, 183, 183, 183, 183, - 184, 184, 184, 184, 185, 185, 185, 185, 186, 186, 186, 186, 187, 187, 187, 187, - 188, 188, 188, 188, 189, 189, 189, 189, 190, 190, 190, 190, 191, 191, 191, 191, - 192, 192, 192, 192, 193, 193, 193, 193, 194, 194, 194, 194, 195, 195, 195, 195, - 196, 196, 196, 196, 197, 197, 197, 197, 198, 198, 198, 198, 199, 199, 199, 199, - 200, 200, 200, 200, 201, 201, 201, 201, 202, 202, 202, 202, 203, 203, 203, 203, - 204, 204, 204, 204, 205, 205, 205, 205, 206, 206, 206, 206, 207, 207, 207, 207, - 208, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 210, 211, 211, 211, 211, - 212, 212, 212, 212, 213, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 215, - 216, 216, 216, 216, 217, 217, 217, 217, 218, 218, 218, 218, 219, 219, 219, 219, - 220, 220, 220, 220, 221, 221, 221, 221, 222, 222, 222, 222, 223, 223, 223, 223, - 224, 224, 224, 224, 225, 225, 225, 225, 226, 226, 226, 226, 227, 227, 227, 227, - 228, 228, 228, 228, 229, 229, 229, 229, 230, 230, 230, 230, 231, 231, 231, 231, - 232, 232, 232, 232, 233, 233, 233, 233, 234, 234, 234, 234, 235, 235, 235, 235, - 236, 236, 236, 236, 237, 237, 237, 237, 238, 238, 238, 238, 239, 239, 239, 239, - 240, 240, 240, 240, 241, 241, 241, 241, 242, 242, 242, 242, 243, 243, 243, 243, - 244, 244, 244, 244, 245, 245, 245, 245, 246, 246, 246, 246, 247, 247, 247, 247, - 248, 248, 248, 248, 249, 249, 249, 249, 250, 250, 250, 250, 251, 251, 251, 251, - 252, 252, 252, 252, 253, 253, 253, 253, 254, 254, 254, 254, 255, 255, 255, 255, - 256, 256, 256, 256, 257, 257, 257, 257, 258, 258, 258, 258, 259, 259, 259, 259, - 260, 260, 260, 260, 261, 261, 261, 261, 262, 262, 262, 262, 263, 263, 263, 263, - 264, 264, 264, 264, 265, 265, 265, 265, 266, 266, 266, 266, 267, 267, 267, 267, - 268, 268, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, - 272, 272, 272, 272, 273, 273, 273, 273, -}; - -/* Block: 9072 bytes. */ - -RE_UINT32 re_get_block(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 11; - code = ch ^ (f << 11); - pos = (RE_UINT32)re_block_stage_1[f] << 4; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_block_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_block_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_block_stage_4[pos + f] << 2; - value = re_block_stage_5[pos + code]; - - return value; -} - -/* Script. */ - -static RE_UINT8 re_script_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 12, 12, 12, 12, 13, 14, 14, 14, 14, 15, - 16, 17, 18, 19, 20, 14, 21, 14, 22, 14, 14, 14, 14, 23, 24, 24, - 25, 26, 14, 14, 14, 14, 27, 28, 14, 14, 29, 30, 31, 32, 33, 34, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 35, 7, 36, 37, 7, 38, 14, 14, 14, 14, 14, 39, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 40, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, -}; - -static RE_UINT8 re_script_stage_2[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 32, 33, 34, 35, 36, 37, 37, 37, 37, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 2, 2, 53, 54, - 55, 56, 57, 58, 59, 59, 59, 60, 61, 59, 59, 59, 59, 59, 59, 59, - 62, 62, 59, 59, 59, 59, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 74, 75, 76, 77, 78, 79, 59, 71, 71, 71, 71, 71, 71, 71, 71, - 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, - 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 80, 71, 71, 71, 71, - 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 81, - 82, 82, 82, 82, 82, 82, 82, 82, 82, 83, 84, 84, 85, 86, 87, 88, - 89, 90, 91, 92, 93, 94, 95, 96, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 97, - 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, - 98, 98, 71, 71, 99, 100, 101, 102, 103, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 98, 114, 115, 116, 117, 118, 119, 98, 120, 120, 121, 98, - 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 98, 98, 132, 98, 98, 98, - 133, 134, 135, 136, 137, 138, 139, 98, 140, 141, 98, 142, 143, 144, 145, 98, - 98, 146, 98, 98, 98, 147, 98, 98, 148, 149, 98, 98, 98, 98, 98, 98, - 150, 150, 150, 150, 150, 150, 150, 151, 152, 150, 153, 98, 98, 98, 98, 98, - 154, 154, 154, 154, 154, 154, 154, 154, 155, 98, 98, 98, 98, 98, 98, 98, - 98, 98, 98, 98, 98, 98, 98, 98, 156, 156, 156, 156, 157, 98, 98, 98, - 158, 158, 158, 158, 159, 160, 161, 162, 98, 98, 98, 98, 98, 98, 163, 164, - 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, - 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 166, - 165, 165, 165, 165, 165, 167, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, - 168, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, - 98, 98, 98, 98, 98, 98, 98, 98, 169, 170, 98, 98, 98, 98, 98, 98, - 59, 171, 172, 173, 174, 98, 175, 98, 176, 177, 178, 59, 59, 179, 59, 180, - 181, 181, 181, 181, 181, 182, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, - 183, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, - 184, 185, 186, 98, 98, 98, 98, 98, 98, 98, 98, 98, 187, 188, 98, 98, - 189, 190, 191, 192, 193, 98, 59, 59, 59, 59, 59, 59, 59, 194, 195, 196, - 197, 198, 199, 200, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, - 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 201, 71, 71, - 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 202, 71, - 203, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, - 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 204, 98, 98, - 71, 71, 71, 71, 205, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, - 206, 98, 207, 208, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, -}; - -static RE_UINT16 re_script_stage_3[] = { - 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 3, 3, 4, 5, 4, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 0, 0, 7, 0, - 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 11, 11, 11, 13, 11, - 14, 14, 14, 14, 14, 14, 14, 14, 15, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 16, 17, 18, 16, 17, 19, 20, 21, 21, 22, 21, 23, 24, - 25, 26, 27, 27, 28, 29, 27, 30, 27, 27, 27, 27, 27, 31, 27, 27, - 32, 33, 33, 33, 34, 27, 27, 27, 35, 35, 35, 36, 37, 37, 37, 38, - 39, 39, 40, 41, 42, 43, 44, 44, 44, 44, 27, 45, 44, 46, 47, 27, - 48, 48, 48, 48, 48, 49, 50, 48, 51, 52, 53, 54, 55, 56, 57, 58, - 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, - 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, - 107, 108, 109, 110, 111, 112, 113, 109, 114, 115, 116, 117, 118, 119, 120, 121, - 122, 123, 123, 124, 123, 125, 44, 44, 126, 127, 128, 129, 130, 131, 44, 44, - 132, 132, 132, 132, 133, 132, 134, 135, 132, 133, 132, 136, 136, 137, 44, 44, - 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 139, 139, 140, 139, 139, 141, - 142, 142, 142, 142, 142, 142, 142, 142, 143, 143, 143, 143, 144, 145, 143, 143, - 144, 143, 143, 146, 147, 148, 143, 143, 143, 147, 143, 143, 143, 149, 143, 150, - 143, 151, 152, 152, 152, 152, 152, 153, 154, 154, 154, 154, 154, 154, 154, 154, - 155, 156, 157, 157, 157, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, - 168, 168, 168, 168, 168, 169, 170, 170, 171, 172, 173, 173, 173, 173, 173, 174, - 173, 173, 175, 154, 154, 154, 154, 176, 177, 178, 179, 179, 180, 181, 182, 183, - 184, 184, 185, 184, 186, 187, 168, 168, 188, 189, 190, 190, 190, 191, 190, 192, - 193, 193, 194, 195, 44, 44, 44, 44, 196, 196, 196, 196, 197, 196, 196, 198, - 199, 199, 199, 199, 200, 200, 200, 201, 202, 202, 202, 203, 204, 205, 205, 205, - 206, 44, 44, 44, 207, 208, 209, 210, 4, 4, 211, 4, 4, 212, 213, 214, - 4, 4, 4, 215, 8, 8, 8, 216, 11, 217, 11, 11, 217, 218, 11, 219, - 11, 11, 11, 220, 220, 221, 11, 222, 223, 0, 0, 0, 0, 0, 224, 225, - 226, 227, 0, 226, 44, 8, 8, 228, 0, 0, 229, 230, 231, 0, 4, 4, - 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 226, 0, 0, 233, 44, 234, 44, 0, 0, - 235, 235, 235, 235, 235, 235, 235, 235, 0, 0, 0, 0, 0, 0, 0, 236, - 0, 237, 0, 238, 239, 240, 241, 44, 242, 242, 243, 242, 242, 243, 4, 4, - 244, 244, 244, 244, 244, 244, 244, 245, 139, 139, 140, 246, 246, 246, 247, 248, - 143, 249, 250, 250, 250, 250, 14, 14, 0, 0, 0, 0, 251, 44, 44, 44, - 252, 253, 252, 252, 252, 252, 252, 254, 252, 252, 252, 252, 252, 252, 252, 252, - 252, 252, 252, 252, 252, 255, 44, 256, 257, 0, 258, 259, 260, 261, 261, 261, - 261, 262, 263, 264, 264, 264, 264, 265, 266, 267, 268, 269, 142, 142, 142, 142, - 270, 0, 267, 271, 0, 0, 272, 264, 142, 270, 0, 0, 0, 0, 142, 273, - 0, 0, 0, 0, 0, 264, 264, 274, 264, 264, 264, 264, 264, 275, 0, 0, - 252, 252, 252, 255, 0, 0, 0, 0, 252, 252, 252, 252, 252, 255, 44, 44, - 276, 276, 276, 276, 276, 276, 276, 276, 277, 276, 276, 276, 278, 279, 279, 279, - 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 281, 44, 14, 14, 14, 14, - 14, 14, 282, 282, 282, 282, 282, 283, 0, 0, 284, 4, 4, 4, 4, 4, - 285, 4, 286, 287, 44, 44, 44, 288, 289, 289, 290, 291, 292, 292, 292, 293, - 294, 294, 294, 294, 295, 296, 48, 297, 298, 298, 299, 300, 300, 301, 142, 302, - 303, 303, 303, 303, 304, 305, 138, 306, 307, 307, 307, 308, 309, 310, 138, 138, - 311, 311, 311, 311, 312, 313, 314, 315, 316, 317, 250, 4, 4, 318, 319, 152, - 152, 152, 152, 152, 314, 314, 320, 321, 142, 142, 322, 142, 323, 142, 142, 324, - 44, 44, 44, 44, 44, 44, 44, 44, 252, 252, 252, 252, 252, 252, 325, 252, - 252, 252, 252, 252, 252, 326, 44, 44, 327, 328, 21, 329, 330, 27, 27, 27, - 27, 27, 27, 27, 331, 332, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 333, 44, 27, 27, 27, 27, 334, 27, 27, 335, 44, 44, 336, - 8, 291, 337, 0, 0, 338, 339, 340, 27, 27, 27, 27, 27, 27, 27, 341, - 342, 0, 1, 2, 1, 2, 343, 263, 264, 344, 142, 270, 345, 346, 347, 348, - 349, 350, 351, 352, 353, 353, 44, 44, 350, 350, 350, 350, 350, 350, 350, 354, - 355, 0, 0, 356, 11, 11, 11, 11, 357, 256, 358, 44, 44, 0, 0, 359, - 360, 361, 362, 362, 362, 363, 364, 256, 365, 365, 366, 367, 368, 369, 369, 370, - 371, 372, 373, 373, 374, 375, 44, 44, 376, 376, 376, 376, 376, 377, 377, 377, - 378, 379, 380, 381, 381, 382, 381, 383, 384, 384, 385, 386, 386, 386, 387, 44, - 388, 388, 388, 388, 388, 388, 388, 388, 388, 388, 388, 389, 388, 390, 391, 44, - 392, 393, 393, 394, 395, 396, 397, 397, 398, 399, 400, 44, 44, 44, 401, 402, - 403, 404, 405, 406, 44, 44, 44, 44, 407, 407, 408, 409, 408, 410, 408, 408, - 411, 412, 413, 414, 415, 416, 417, 417, 418, 418, 44, 44, 419, 419, 420, 421, - 422, 422, 422, 423, 424, 425, 426, 427, 428, 429, 430, 44, 44, 44, 44, 44, - 431, 431, 431, 431, 432, 44, 44, 44, 433, 433, 433, 434, 433, 433, 433, 435, - 44, 44, 44, 44, 44, 44, 27, 436, 437, 437, 437, 437, 438, 439, 437, 440, - 441, 441, 441, 441, 442, 443, 444, 445, 446, 446, 446, 447, 448, 449, 449, 450, - 451, 451, 451, 451, 452, 451, 453, 454, 455, 456, 455, 457, 44, 44, 44, 44, - 458, 459, 460, 461, 461, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, - 472, 472, 472, 472, 472, 473, 44, 44, 474, 474, 474, 474, 475, 476, 44, 44, - 477, 477, 477, 478, 477, 479, 44, 44, 480, 480, 480, 480, 481, 482, 483, 44, - 484, 484, 484, 485, 486, 44, 44, 44, 487, 488, 489, 487, 44, 44, 44, 44, - 44, 44, 490, 490, 490, 490, 490, 491, 44, 44, 44, 44, 492, 492, 492, 493, - 494, 495, 495, 496, 497, 495, 498, 499, 499, 500, 501, 502, 44, 44, 44, 44, - 503, 503, 503, 503, 503, 503, 503, 503, 503, 504, 44, 44, 44, 44, 44, 44, - 503, 503, 503, 503, 503, 503, 505, 506, 503, 503, 503, 503, 507, 44, 44, 44, - 508, 508, 508, 508, 508, 508, 508, 508, 508, 508, 509, 44, 44, 44, 44, 44, - 510, 510, 510, 510, 510, 510, 510, 510, 510, 510, 510, 510, 511, 44, 44, 44, - 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 512, 513, 514, 515, 44, - 44, 44, 44, 44, 44, 516, 517, 518, 519, 519, 519, 519, 520, 521, 522, 523, - 519, 44, 44, 44, 44, 44, 44, 44, 524, 524, 524, 524, 525, 524, 524, 526, - 527, 524, 44, 44, 44, 44, 528, 44, 529, 529, 529, 529, 529, 529, 529, 529, - 529, 529, 529, 529, 529, 529, 530, 44, 529, 529, 529, 529, 529, 529, 529, 531, - 532, 44, 44, 44, 44, 44, 44, 44, 533, 533, 533, 533, 533, 533, 534, 535, - 536, 537, 272, 44, 44, 44, 44, 44, 0, 0, 0, 0, 0, 0, 0, 538, - 0, 0, 539, 0, 0, 0, 540, 541, 542, 0, 543, 0, 0, 0, 544, 44, - 11, 11, 11, 11, 545, 44, 44, 44, 0, 0, 0, 0, 0, 233, 0, 240, - 0, 0, 0, 0, 0, 224, 0, 0, 0, 546, 547, 548, 549, 0, 0, 0, - 550, 551, 0, 552, 553, 554, 0, 0, 0, 0, 237, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 555, 0, 0, 0, 556, 556, 556, 556, 556, 556, 556, 556, - 557, 558, 559, 44, 44, 44, 44, 44, 560, 561, 562, 44, 44, 44, 44, 44, - 563, 563, 563, 563, 563, 563, 563, 563, 563, 563, 563, 563, 564, 565, 44, 44, - 566, 566, 566, 566, 567, 568, 44, 44, 569, 27, 570, 571, 572, 573, 574, 575, - 576, 577, 578, 577, 44, 44, 44, 331, 0, 0, 256, 0, 0, 0, 0, 0, - 0, 272, 226, 342, 342, 342, 0, 538, 579, 0, 226, 0, 0, 0, 256, 0, - 0, 0, 579, 44, 44, 44, 580, 0, 581, 0, 0, 256, 544, 240, 44, 44, - 0, 0, 0, 0, 0, 582, 579, 233, 0, 0, 0, 0, 0, 0, 0, 272, - 0, 0, 0, 0, 0, 251, 44, 44, 256, 0, 0, 0, 583, 291, 0, 0, - 583, 0, 584, 44, 44, 44, 44, 44, 44, 226, 583, 585, 256, 226, 44, 44, - 0, 240, 44, 44, 586, 44, 44, 44, 252, 252, 252, 252, 252, 587, 44, 44, - 252, 252, 252, 588, 252, 252, 252, 252, 252, 325, 252, 252, 252, 252, 252, 252, - 252, 252, 589, 44, 44, 44, 44, 44, 252, 325, 44, 44, 44, 44, 44, 44, - 590, 44, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 44, -}; - -static RE_UINT16 re_script_stage_4[] = { - 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 3, 0, 0, 0, 4, 0, - 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 5, 0, 2, 5, 6, 0, - 7, 7, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15, 8, 8, 8, 8, - 16, 8, 8, 8, 17, 18, 18, 18, 19, 19, 19, 19, 19, 20, 19, 19, - 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 21, 22, 22, 22, 24, 21, - 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 12, 12, 26, 26, 27, 12, - 26, 28, 12, 12, 29, 30, 29, 31, 29, 29, 32, 33, 29, 29, 29, 29, - 31, 29, 34, 7, 7, 35, 29, 29, 36, 29, 29, 29, 29, 29, 29, 30, - 37, 37, 37, 38, 37, 37, 37, 37, 37, 37, 39, 40, 41, 41, 41, 41, - 42, 12, 12, 12, 43, 43, 43, 43, 43, 43, 44, 12, 45, 45, 45, 45, - 45, 45, 45, 46, 45, 45, 45, 47, 48, 48, 48, 48, 48, 48, 48, 49, - 12, 12, 12, 12, 29, 50, 29, 51, 12, 29, 29, 29, 52, 29, 29, 29, - 53, 53, 53, 53, 54, 53, 53, 53, 53, 55, 53, 53, 56, 57, 56, 58, - 58, 56, 56, 56, 56, 56, 59, 56, 60, 61, 62, 56, 56, 58, 58, 63, - 12, 64, 12, 65, 56, 61, 56, 56, 56, 56, 56, 12, 66, 66, 67, 68, - 69, 70, 70, 70, 70, 70, 71, 70, 71, 72, 73, 71, 67, 68, 69, 73, - 74, 12, 66, 75, 12, 76, 70, 70, 70, 73, 12, 12, 77, 77, 78, 79, - 79, 78, 78, 78, 78, 78, 80, 78, 80, 77, 81, 78, 78, 79, 79, 81, - 82, 12, 12, 12, 78, 83, 78, 78, 81, 12, 84, 12, 85, 85, 86, 87, - 87, 86, 86, 86, 86, 86, 88, 86, 88, 85, 89, 86, 86, 87, 87, 89, - 12, 90, 12, 91, 86, 90, 86, 86, 86, 86, 12, 12, 92, 93, 94, 92, - 95, 96, 97, 95, 98, 99, 94, 92, 100, 100, 96, 92, 94, 92, 95, 96, - 99, 98, 12, 12, 12, 92, 100, 100, 100, 100, 94, 12, 101, 102, 101, 103, - 103, 101, 101, 101, 101, 101, 103, 101, 101, 101, 104, 102, 101, 103, 103, 104, - 12, 105, 106, 12, 101, 107, 101, 101, 12, 12, 101, 101, 108, 109, 108, 110, - 110, 108, 108, 108, 108, 108, 110, 108, 108, 109, 111, 108, 108, 110, 110, 111, - 12, 112, 12, 113, 108, 114, 108, 108, 112, 12, 12, 12, 115, 115, 116, 117, - 117, 116, 116, 116, 116, 116, 116, 116, 116, 116, 118, 115, 116, 117, 117, 116, - 12, 116, 116, 116, 116, 119, 116, 116, 120, 121, 122, 122, 122, 123, 120, 122, - 122, 122, 122, 122, 124, 122, 122, 125, 122, 123, 126, 127, 122, 128, 122, 122, - 12, 120, 122, 122, 120, 129, 12, 12, 130, 131, 131, 131, 131, 131, 131, 131, - 131, 131, 132, 133, 131, 131, 131, 12, 134, 135, 136, 137, 12, 138, 139, 138, - 139, 140, 141, 139, 138, 138, 142, 143, 138, 136, 138, 143, 138, 138, 143, 138, - 144, 144, 144, 144, 144, 144, 145, 144, 144, 144, 144, 146, 145, 144, 144, 144, - 144, 144, 144, 147, 144, 148, 149, 12, 150, 150, 150, 150, 151, 151, 151, 151, - 151, 152, 12, 153, 151, 151, 154, 151, 155, 155, 155, 155, 156, 156, 156, 156, - 156, 156, 157, 158, 156, 159, 157, 158, 157, 158, 156, 159, 157, 158, 156, 156, - 156, 159, 156, 156, 156, 156, 159, 160, 156, 156, 156, 161, 156, 156, 158, 12, - 162, 162, 162, 162, 162, 163, 162, 163, 164, 164, 164, 164, 165, 165, 165, 165, - 165, 165, 165, 166, 167, 167, 167, 167, 167, 167, 168, 169, 167, 167, 170, 12, - 171, 171, 171, 172, 171, 173, 12, 12, 174, 174, 174, 174, 174, 175, 12, 12, - 176, 176, 176, 176, 176, 12, 12, 12, 177, 177, 177, 178, 178, 12, 12, 12, - 179, 179, 179, 179, 179, 179, 179, 180, 179, 179, 180, 12, 181, 182, 183, 184, - 183, 183, 185, 12, 183, 183, 183, 183, 183, 183, 12, 12, 183, 183, 184, 12, - 164, 186, 12, 12, 187, 187, 187, 187, 187, 187, 187, 188, 187, 187, 187, 12, - 189, 187, 187, 187, 190, 190, 190, 190, 190, 190, 190, 191, 190, 192, 12, 12, - 193, 193, 193, 193, 193, 193, 193, 12, 193, 193, 194, 12, 193, 193, 195, 196, - 197, 197, 197, 197, 197, 197, 197, 198, 199, 199, 199, 199, 199, 199, 199, 200, - 199, 199, 199, 201, 199, 199, 202, 12, 199, 199, 199, 202, 7, 7, 7, 203, - 204, 204, 204, 204, 204, 204, 204, 12, 204, 204, 204, 205, 206, 206, 206, 206, - 207, 207, 207, 207, 207, 12, 12, 207, 208, 208, 208, 208, 208, 208, 209, 208, - 208, 208, 210, 211, 212, 212, 212, 212, 19, 19, 213, 12, 206, 206, 12, 12, - 214, 7, 7, 7, 215, 7, 216, 217, 0, 218, 219, 12, 2, 220, 221, 2, - 2, 2, 2, 222, 223, 220, 224, 2, 2, 2, 225, 2, 2, 2, 2, 226, - 7, 219, 227, 7, 8, 228, 8, 228, 8, 8, 229, 229, 8, 8, 8, 228, - 8, 15, 8, 8, 8, 10, 8, 230, 10, 15, 8, 14, 0, 0, 0, 231, - 0, 232, 0, 0, 233, 0, 0, 234, 0, 0, 0, 235, 2, 2, 2, 236, - 237, 12, 12, 12, 0, 238, 239, 0, 4, 0, 0, 0, 0, 0, 0, 4, - 2, 2, 5, 12, 0, 235, 12, 12, 0, 0, 235, 12, 240, 240, 240, 240, - 0, 241, 0, 0, 0, 242, 0, 0, 0, 0, 242, 243, 0, 0, 232, 0, - 242, 12, 12, 12, 12, 12, 12, 0, 244, 244, 244, 244, 244, 244, 244, 245, - 18, 18, 18, 18, 18, 12, 246, 18, 247, 247, 247, 247, 247, 247, 12, 248, - 249, 12, 12, 248, 156, 159, 12, 12, 156, 159, 156, 159, 0, 250, 12, 12, - 251, 251, 251, 251, 251, 251, 252, 251, 251, 12, 12, 12, 251, 253, 12, 12, - 0, 0, 0, 12, 0, 254, 0, 0, 255, 251, 256, 257, 0, 0, 251, 0, - 258, 259, 259, 259, 259, 259, 259, 259, 259, 260, 261, 262, 263, 264, 264, 264, - 264, 264, 264, 264, 264, 264, 265, 263, 12, 266, 267, 267, 267, 267, 267, 267, - 267, 267, 267, 268, 269, 155, 155, 155, 155, 155, 155, 270, 267, 267, 271, 12, - 0, 12, 12, 12, 155, 155, 155, 272, 264, 264, 264, 273, 264, 264, 0, 0, - 274, 274, 274, 274, 274, 274, 274, 275, 274, 276, 12, 12, 277, 277, 277, 277, - 278, 278, 278, 278, 278, 278, 278, 12, 279, 279, 279, 279, 279, 279, 12, 12, - 239, 2, 2, 2, 2, 2, 234, 2, 2, 2, 2, 280, 2, 2, 12, 12, - 12, 281, 2, 2, 282, 282, 282, 282, 282, 282, 282, 12, 0, 0, 242, 12, - 283, 283, 283, 283, 283, 283, 12, 12, 284, 284, 284, 284, 284, 285, 12, 286, - 284, 284, 285, 12, 53, 53, 53, 287, 288, 288, 288, 288, 288, 288, 288, 289, - 290, 290, 290, 290, 290, 12, 12, 291, 155, 155, 155, 292, 293, 293, 293, 293, - 293, 293, 293, 294, 293, 293, 295, 296, 150, 150, 150, 297, 298, 298, 298, 298, - 298, 299, 12, 12, 298, 298, 298, 300, 298, 298, 300, 298, 301, 301, 301, 301, - 302, 12, 12, 12, 12, 12, 303, 301, 304, 304, 304, 304, 304, 305, 12, 12, - 160, 159, 160, 159, 160, 159, 12, 12, 2, 2, 3, 2, 2, 306, 12, 12, - 304, 304, 304, 307, 304, 304, 307, 12, 155, 12, 12, 12, 155, 270, 308, 155, - 155, 155, 155, 12, 251, 251, 251, 253, 251, 251, 253, 12, 2, 280, 12, 12, - 309, 22, 12, 25, 26, 27, 26, 310, 311, 312, 26, 26, 51, 12, 12, 12, - 313, 29, 29, 29, 29, 29, 29, 314, 315, 29, 29, 29, 29, 29, 12, 12, - 29, 29, 29, 51, 7, 7, 7, 316, 235, 0, 0, 0, 0, 235, 0, 12, - 29, 50, 29, 29, 29, 29, 29, 317, 243, 0, 0, 0, 0, 318, 264, 264, - 264, 264, 264, 319, 320, 155, 320, 155, 320, 155, 320, 292, 0, 235, 0, 235, - 12, 12, 243, 242, 321, 321, 321, 322, 321, 321, 321, 321, 321, 323, 321, 321, - 321, 321, 323, 324, 321, 321, 321, 325, 321, 321, 323, 12, 235, 133, 0, 0, - 0, 133, 0, 0, 8, 8, 8, 14, 326, 12, 12, 12, 0, 0, 0, 327, - 328, 328, 328, 328, 328, 328, 328, 329, 330, 330, 330, 330, 331, 12, 12, 12, - 216, 0, 0, 0, 332, 332, 332, 332, 332, 12, 12, 12, 333, 333, 333, 333, - 333, 333, 334, 12, 335, 335, 335, 335, 335, 335, 336, 12, 337, 337, 337, 337, - 337, 337, 337, 338, 339, 339, 339, 339, 339, 12, 339, 339, 339, 340, 12, 12, - 341, 341, 341, 341, 342, 342, 342, 342, 343, 343, 343, 343, 343, 343, 343, 344, - 343, 343, 344, 12, 345, 345, 345, 345, 345, 12, 345, 345, 345, 345, 345, 12, - 346, 346, 346, 346, 346, 346, 12, 12, 347, 347, 347, 347, 347, 12, 12, 348, - 349, 349, 349, 349, 349, 350, 12, 12, 349, 351, 12, 12, 349, 349, 12, 12, - 352, 353, 354, 352, 352, 352, 352, 352, 352, 355, 356, 357, 358, 358, 358, 358, - 358, 359, 358, 358, 360, 360, 360, 360, 361, 361, 361, 361, 361, 361, 361, 362, - 12, 363, 361, 361, 364, 364, 364, 364, 365, 366, 367, 364, 368, 368, 368, 368, - 368, 368, 368, 369, 370, 370, 370, 370, 370, 370, 371, 372, 373, 373, 373, 373, - 374, 374, 374, 374, 374, 374, 12, 374, 375, 374, 374, 374, 376, 377, 12, 376, - 376, 378, 378, 376, 376, 376, 376, 376, 376, 12, 379, 380, 376, 376, 12, 12, - 376, 376, 381, 12, 382, 382, 382, 382, 383, 383, 383, 383, 384, 384, 384, 384, - 384, 385, 386, 384, 384, 385, 12, 12, 387, 387, 387, 387, 387, 388, 389, 387, - 390, 390, 390, 390, 390, 391, 390, 390, 392, 392, 392, 392, 393, 12, 392, 392, - 394, 394, 394, 394, 395, 12, 396, 397, 12, 12, 396, 394, 398, 398, 398, 398, - 398, 398, 399, 12, 400, 400, 400, 400, 401, 12, 12, 12, 401, 12, 402, 400, - 29, 29, 29, 403, 404, 404, 404, 404, 404, 404, 404, 405, 406, 404, 404, 404, - 12, 12, 12, 407, 408, 408, 408, 408, 409, 12, 12, 12, 410, 410, 410, 410, - 410, 410, 411, 12, 410, 410, 412, 12, 413, 413, 413, 413, 413, 414, 413, 413, - 413, 12, 12, 12, 415, 415, 415, 415, 415, 416, 12, 12, 417, 417, 417, 417, - 417, 417, 417, 418, 121, 122, 122, 122, 122, 129, 12, 12, 419, 419, 419, 419, - 420, 419, 419, 419, 419, 419, 419, 421, 422, 423, 424, 425, 422, 422, 422, 425, - 422, 422, 426, 12, 427, 427, 427, 427, 427, 427, 428, 12, 427, 427, 429, 12, - 430, 431, 430, 432, 432, 430, 430, 430, 430, 430, 433, 430, 433, 431, 434, 430, - 430, 432, 432, 434, 435, 436, 12, 431, 430, 437, 430, 435, 430, 435, 12, 12, - 438, 438, 438, 438, 438, 438, 439, 440, 441, 441, 441, 441, 441, 441, 12, 12, - 441, 441, 442, 12, 443, 443, 443, 443, 443, 444, 443, 443, 443, 443, 443, 444, - 445, 445, 445, 445, 445, 446, 12, 12, 445, 445, 447, 12, 183, 183, 183, 448, - 449, 449, 449, 449, 449, 449, 12, 12, 449, 449, 450, 12, 451, 451, 451, 451, - 451, 451, 452, 453, 451, 451, 451, 12, 454, 454, 454, 454, 455, 12, 12, 456, - 457, 457, 457, 457, 457, 457, 458, 12, 459, 459, 460, 459, 459, 459, 459, 459, - 459, 461, 459, 459, 459, 462, 12, 12, 459, 459, 459, 463, 464, 464, 464, 464, - 465, 464, 464, 464, 464, 464, 466, 464, 464, 467, 12, 12, 468, 468, 468, 468, - 468, 468, 469, 12, 468, 468, 468, 470, 468, 471, 12, 12, 468, 12, 12, 12, - 472, 472, 472, 472, 472, 472, 472, 473, 474, 474, 474, 474, 474, 475, 12, 12, - 279, 279, 476, 12, 477, 477, 477, 477, 477, 477, 477, 478, 477, 477, 479, 480, - 481, 481, 481, 481, 481, 481, 481, 482, 481, 482, 12, 12, 483, 483, 483, 483, - 483, 484, 12, 12, 483, 483, 485, 483, 485, 483, 483, 483, 483, 483, 12, 486, - 487, 487, 487, 487, 487, 488, 12, 12, 487, 487, 487, 489, 12, 12, 12, 490, - 491, 12, 12, 12, 492, 492, 492, 492, 492, 492, 492, 491, 493, 12, 12, 12, - 494, 12, 12, 12, 495, 495, 495, 495, 495, 495, 496, 12, 495, 495, 495, 497, - 495, 495, 497, 12, 495, 495, 498, 495, 0, 242, 12, 12, 0, 235, 243, 0, - 0, 499, 231, 0, 0, 0, 499, 7, 214, 500, 7, 0, 0, 0, 501, 231, - 0, 0, 250, 12, 8, 228, 12, 12, 0, 0, 0, 232, 502, 503, 243, 232, - 0, 0, 504, 243, 0, 243, 0, 0, 0, 504, 235, 243, 0, 232, 0, 232, - 0, 0, 504, 235, 0, 505, 241, 0, 232, 0, 0, 0, 0, 0, 0, 241, - 506, 506, 506, 506, 506, 506, 506, 12, 12, 12, 507, 506, 508, 506, 506, 506, - 244, 245, 244, 244, 244, 244, 509, 244, 510, 511, 245, 12, 512, 512, 512, 512, - 512, 513, 512, 512, 512, 514, 12, 12, 515, 515, 515, 515, 515, 515, 516, 12, - 515, 515, 517, 518, 29, 519, 29, 29, 520, 521, 519, 29, 403, 29, 522, 12, - 523, 313, 522, 519, 520, 521, 522, 522, 520, 521, 403, 29, 403, 29, 519, 524, - 29, 29, 525, 29, 29, 29, 29, 12, 519, 519, 525, 29, 0, 0, 0, 250, - 12, 241, 0, 0, 526, 12, 12, 12, 235, 12, 12, 12, 0, 0, 12, 12, - 0, 0, 0, 242, 527, 0, 0, 235, 250, 12, 12, 12, 251, 528, 12, 12, - 251, 529, 12, 12, 253, 12, 12, 12, 530, 12, 12, 12, -}; - -static RE_UINT8 re_script_stage_5[] = { - 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, - 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 35, 35, 41, 41, 41, 41, - 3, 3, 3, 3, 1, 3, 3, 3, 0, 0, 3, 3, 3, 3, 1, 3, - 0, 0, 0, 0, 3, 1, 3, 1, 3, 3, 3, 0, 3, 0, 3, 3, - 3, 3, 0, 3, 3, 3, 55, 55, 55, 55, 55, 55, 4, 4, 4, 4, - 4, 41, 41, 4, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, - 0, 1, 5, 0, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, - 6, 0, 0, 0, 7, 7, 7, 7, 7, 1, 7, 7, 1, 7, 7, 7, - 7, 7, 7, 1, 1, 0, 7, 1, 7, 7, 7, 41, 41, 41, 7, 7, - 41, 7, 7, 7, 8, 8, 8, 8, 8, 8, 0, 8, 8, 8, 8, 0, - 0, 8, 8, 8, 9, 9, 9, 9, 9, 9, 0, 0, 66, 66, 66, 66, - 66, 66, 66, 0, 82, 82, 82, 82, 82, 82, 0, 0, 82, 82, 82, 0, - 95, 95, 95, 95, 0, 0, 95, 0, 7, 0, 7, 7, 7, 7, 0, 0, - 7, 7, 1, 7, 10, 10, 10, 10, 10, 41, 41, 10, 1, 1, 10, 10, - 11, 11, 11, 11, 0, 11, 11, 11, 11, 0, 0, 11, 11, 0, 11, 11, - 11, 0, 11, 0, 0, 0, 11, 11, 11, 11, 0, 0, 11, 11, 11, 0, - 0, 0, 0, 11, 11, 11, 0, 11, 0, 12, 12, 12, 12, 12, 12, 0, - 0, 0, 0, 12, 12, 0, 0, 12, 12, 12, 12, 12, 12, 0, 12, 12, - 0, 12, 12, 0, 12, 12, 0, 0, 0, 12, 0, 0, 12, 0, 12, 0, - 0, 0, 12, 12, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, 0, 13, - 13, 0, 13, 13, 13, 13, 0, 0, 13, 0, 0, 0, 0, 0, 13, 13, - 0, 13, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 14, - 14, 0, 14, 14, 14, 14, 0, 0, 0, 0, 14, 14, 14, 14, 0, 14, - 0, 0, 15, 15, 0, 15, 15, 15, 15, 15, 15, 0, 15, 0, 15, 15, - 15, 15, 0, 0, 0, 15, 15, 0, 0, 0, 0, 15, 15, 0, 0, 0, - 15, 15, 15, 15, 16, 16, 16, 16, 0, 16, 16, 16, 16, 0, 16, 16, - 16, 16, 0, 0, 0, 16, 16, 0, 16, 16, 16, 0, 0, 0, 16, 16, - 17, 17, 17, 17, 0, 17, 17, 17, 17, 0, 17, 17, 17, 17, 0, 0, - 0, 17, 17, 0, 0, 0, 17, 0, 0, 0, 17, 17, 0, 18, 18, 18, - 18, 18, 18, 18, 18, 0, 18, 18, 18, 18, 18, 0, 0, 0, 18, 18, - 0, 0, 19, 19, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, - 19, 19, 0, 19, 0, 19, 0, 0, 0, 0, 19, 0, 0, 0, 0, 19, - 19, 0, 19, 0, 19, 0, 0, 0, 0, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 0, 0, 0, 0, 1, 0, 21, 21, 0, 21, 0, 0, 21, - 21, 0, 21, 0, 0, 21, 0, 0, 21, 21, 21, 21, 0, 21, 21, 21, - 0, 21, 0, 21, 0, 0, 21, 21, 21, 21, 0, 21, 21, 21, 0, 0, - 22, 22, 22, 22, 0, 22, 22, 22, 22, 0, 0, 0, 22, 0, 22, 22, - 22, 1, 1, 1, 1, 22, 22, 0, 23, 23, 23, 23, 24, 24, 24, 24, - 24, 24, 0, 24, 0, 24, 0, 0, 24, 24, 24, 1, 25, 25, 25, 25, - 26, 26, 26, 26, 26, 0, 26, 26, 26, 26, 0, 0, 26, 26, 26, 0, - 0, 26, 26, 26, 26, 0, 0, 0, 27, 27, 27, 27, 27, 27, 0, 0, - 28, 28, 28, 28, 29, 29, 29, 29, 29, 0, 0, 0, 30, 30, 30, 30, - 30, 30, 30, 1, 1, 1, 30, 30, 30, 0, 0, 0, 42, 42, 42, 42, - 42, 0, 42, 42, 42, 0, 0, 0, 43, 43, 43, 43, 43, 1, 1, 0, - 44, 44, 44, 44, 45, 45, 45, 45, 45, 0, 45, 45, 31, 31, 31, 31, - 31, 31, 0, 0, 32, 32, 1, 1, 32, 1, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 0, 32, 32, 0, 0, 28, 28, 0, 0, 46, 46, 46, 46, - 46, 46, 46, 0, 46, 0, 0, 0, 47, 47, 47, 47, 47, 47, 0, 0, - 47, 0, 0, 0, 56, 56, 56, 56, 56, 56, 0, 0, 56, 56, 56, 0, - 0, 0, 56, 56, 54, 54, 54, 54, 0, 0, 54, 54, 78, 78, 78, 78, - 78, 78, 78, 0, 78, 0, 0, 78, 78, 78, 0, 0, 41, 41, 41, 0, - 62, 62, 62, 62, 62, 0, 0, 0, 67, 67, 67, 67, 93, 93, 93, 93, - 68, 68, 68, 68, 0, 0, 0, 68, 68, 68, 0, 0, 0, 68, 68, 68, - 69, 69, 69, 69, 4, 0, 0, 0, 41, 41, 41, 1, 41, 1, 41, 41, - 41, 1, 1, 1, 1, 41, 1, 1, 41, 1, 1, 0, 41, 41, 0, 0, - 2, 2, 3, 3, 3, 3, 3, 4, 2, 3, 3, 3, 3, 3, 2, 2, - 3, 3, 3, 2, 4, 2, 2, 2, 2, 2, 2, 3, 0, 0, 0, 41, - 3, 3, 0, 0, 0, 3, 0, 3, 0, 3, 3, 3, 41, 41, 1, 1, - 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, 1, 2, 1, 1, 1, 0, - 2, 0, 0, 0, 41, 0, 0, 0, 1, 1, 3, 1, 1, 1, 2, 2, - 53, 53, 53, 53, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, - 57, 57, 57, 57, 57, 57, 57, 0, 0, 55, 55, 55, 58, 58, 58, 58, - 0, 0, 0, 58, 58, 0, 0, 0, 1, 0, 0, 0, 36, 36, 36, 36, - 36, 36, 0, 36, 36, 36, 0, 0, 1, 36, 1, 36, 1, 36, 36, 36, - 36, 36, 41, 41, 41, 41, 25, 25, 0, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 0, 0, 41, 41, 1, 1, 33, 33, 33, 1, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 1, 0, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 35, 35, 35, 0, - 25, 25, 25, 1, 34, 34, 34, 0, 37, 37, 37, 37, 37, 0, 0, 0, - 37, 37, 37, 0, 83, 83, 83, 83, 70, 70, 70, 70, 84, 84, 84, 84, - 2, 2, 2, 0, 0, 0, 0, 2, 59, 59, 59, 59, 65, 65, 65, 65, - 71, 71, 71, 71, 71, 71, 0, 0, 0, 0, 71, 71, 10, 10, 0, 0, - 72, 72, 72, 72, 72, 72, 1, 72, 73, 73, 73, 73, 0, 0, 0, 73, - 25, 0, 0, 0, 85, 85, 85, 85, 85, 85, 0, 1, 85, 85, 0, 0, - 0, 0, 85, 85, 23, 23, 23, 0, 77, 77, 77, 77, 77, 77, 77, 0, - 77, 77, 0, 0, 79, 79, 79, 79, 79, 79, 79, 0, 0, 0, 0, 79, - 86, 86, 86, 86, 86, 86, 86, 0, 2, 3, 0, 0, 86, 86, 0, 0, - 0, 0, 0, 25, 0, 0, 0, 5, 6, 0, 6, 0, 6, 6, 0, 6, - 6, 0, 6, 6, 0, 0, 0, 7, 7, 7, 1, 1, 0, 0, 7, 7, - 41, 41, 4, 4, 7, 0, 0, 1, 1, 1, 34, 34, 34, 34, 1, 1, - 0, 0, 25, 25, 48, 48, 48, 48, 0, 48, 48, 48, 48, 48, 48, 0, - 48, 48, 0, 48, 48, 48, 0, 0, 3, 0, 0, 0, 1, 41, 0, 0, - 74, 74, 74, 74, 74, 0, 0, 0, 75, 75, 75, 75, 75, 0, 0, 0, - 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 0, 120, 120, 120, 120, - 120, 120, 120, 0, 49, 49, 49, 49, 49, 49, 0, 49, 60, 60, 60, 60, - 60, 60, 0, 0, 40, 40, 40, 40, 50, 50, 50, 50, 51, 51, 51, 51, - 51, 51, 0, 0, 136, 136, 136, 136, 106, 106, 106, 106, 103, 103, 103, 103, - 0, 0, 0, 103, 110, 110, 110, 110, 110, 110, 110, 0, 110, 110, 0, 0, - 52, 52, 52, 52, 52, 52, 0, 0, 52, 0, 52, 52, 52, 52, 0, 52, - 52, 0, 0, 0, 52, 0, 0, 52, 87, 87, 87, 87, 87, 87, 0, 87, - 118, 118, 118, 118, 117, 117, 117, 117, 117, 117, 117, 0, 0, 0, 0, 117, - 128, 128, 128, 128, 128, 128, 128, 0, 128, 128, 0, 0, 0, 0, 0, 128, - 64, 64, 64, 64, 0, 0, 0, 64, 76, 76, 76, 76, 76, 76, 0, 0, - 0, 0, 0, 76, 98, 98, 98, 98, 97, 97, 97, 97, 0, 0, 97, 97, - 61, 61, 61, 61, 0, 61, 61, 0, 0, 61, 61, 61, 61, 61, 61, 0, - 0, 0, 0, 61, 61, 0, 0, 0, 88, 88, 88, 88, 116, 116, 116, 116, - 112, 112, 112, 112, 112, 112, 112, 0, 0, 0, 0, 112, 80, 80, 80, 80, - 80, 80, 0, 0, 0, 80, 80, 80, 89, 89, 89, 89, 89, 89, 0, 0, - 90, 90, 90, 90, 90, 90, 90, 0, 121, 121, 121, 121, 121, 121, 0, 0, - 0, 121, 121, 121, 121, 0, 0, 0, 91, 91, 91, 91, 91, 0, 0, 0, - 130, 130, 130, 130, 130, 130, 130, 0, 0, 0, 130, 130, 7, 7, 7, 0, - 94, 94, 94, 94, 94, 94, 0, 0, 0, 0, 94, 94, 0, 0, 0, 94, - 92, 92, 92, 92, 92, 92, 0, 0, 101, 101, 101, 101, 101, 0, 0, 0, - 101, 101, 0, 0, 96, 96, 96, 96, 96, 0, 96, 96, 111, 111, 111, 111, - 111, 111, 111, 0, 100, 100, 100, 100, 100, 100, 0, 0, 109, 109, 109, 109, - 109, 109, 0, 109, 109, 109, 109, 0, 129, 129, 129, 129, 129, 129, 129, 0, - 129, 0, 129, 129, 129, 129, 0, 129, 129, 129, 0, 0, 123, 123, 123, 123, - 123, 123, 123, 0, 123, 123, 0, 0, 107, 107, 107, 107, 0, 107, 107, 107, - 107, 0, 0, 107, 107, 0, 107, 107, 107, 107, 0, 0, 107, 0, 0, 0, - 0, 0, 0, 107, 0, 0, 107, 107, 135, 135, 135, 135, 135, 135, 0, 135, - 0, 135, 0, 0, 124, 124, 124, 124, 124, 124, 0, 0, 122, 122, 122, 122, - 122, 122, 0, 0, 114, 114, 114, 114, 114, 0, 0, 0, 114, 114, 0, 0, - 32, 0, 0, 0, 102, 102, 102, 102, 102, 102, 0, 0, 126, 126, 126, 126, - 126, 126, 0, 0, 0, 126, 126, 126, 125, 125, 125, 125, 125, 125, 125, 0, - 0, 0, 0, 125, 119, 119, 119, 119, 119, 0, 0, 0, 133, 133, 133, 133, - 133, 0, 133, 133, 133, 133, 133, 0, 133, 133, 0, 0, 133, 0, 0, 0, - 134, 134, 134, 134, 0, 0, 134, 134, 0, 134, 134, 134, 134, 134, 134, 0, - 63, 63, 63, 63, 63, 63, 0, 0, 63, 63, 63, 0, 63, 0, 0, 0, - 81, 81, 81, 81, 81, 81, 81, 0, 127, 127, 127, 127, 127, 127, 127, 0, - 84, 0, 0, 0, 115, 115, 115, 115, 115, 115, 115, 0, 115, 115, 0, 0, - 0, 0, 115, 115, 104, 104, 104, 104, 104, 104, 0, 0, 108, 108, 108, 108, - 108, 108, 0, 0, 108, 108, 0, 108, 0, 108, 108, 108, 99, 99, 99, 99, - 99, 0, 0, 0, 99, 99, 99, 0, 0, 0, 0, 99, 137, 0, 0, 0, - 137, 137, 137, 137, 137, 137, 137, 0, 34, 33, 0, 0, 105, 105, 105, 105, - 105, 105, 105, 0, 105, 0, 0, 0, 105, 105, 0, 0, 1, 1, 1, 41, - 1, 41, 41, 41, 1, 1, 41, 41, 0, 0, 1, 0, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 0, 131, 131, 131, 131, 0, 0, 0, 131, - 0, 131, 131, 131, 57, 0, 0, 57, 57, 57, 0, 57, 57, 0, 57, 57, - 113, 113, 113, 113, 113, 0, 0, 113, 113, 113, 113, 0, 132, 132, 132, 132, - 132, 132, 132, 0, 132, 132, 0, 0, 0, 0, 132, 132, 0, 7, 7, 7, - 0, 7, 7, 0, 7, 0, 0, 7, 0, 7, 0, 7, 0, 0, 7, 0, - 7, 0, 7, 0, 7, 7, 0, 7, 33, 1, 1, 0, 1, 0, 0, 1, - 36, 36, 36, 0, 36, 0, 0, 0, 0, 1, 0, 0, -}; - -/* Script: 11396 bytes. */ - -RE_UINT32 re_get_script(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 11; - code = ch ^ (f << 11); - pos = (RE_UINT32)re_script_stage_1[f] << 4; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_script_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_script_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_script_stage_4[pos + f] << 2; - value = re_script_stage_5[pos + code]; - - return value; -} - -/* Word_Break. */ - -static RE_UINT8 re_word_break_stage_1[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 2, 10, 11, 12, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 13, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_word_break_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 32, 31, 33, 34, 31, 31, 31, 31, 35, 36, 37, 31, - 38, 39, 40, 41, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 1, 1, 1, 1, 42, 1, 43, 44, 45, 46, 47, 48, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 49, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 50, 1, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 1, 60, 61, 62, 63, 64, 65, 31, 31, 31, - 66, 67, 68, 69, 70, 71, 72, 73, 74, 31, 75, 31, 76, 31, 31, 31, - 1, 1, 1, 77, 78, 79, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 1, 1, 1, 1, 80, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 1, 1, 81, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 1, 1, 82, 83, 31, 31, 31, 84, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 85, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 86, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 87, 88, 31, 89, 90, 91, 92, 31, 31, 93, 31, 31, 31, 31, 31, - 94, 31, 31, 31, 31, 31, 31, 31, 95, 96, 31, 31, 31, 31, 97, 31, - 31, 98, 31, 99, 100, 101, 102, 31, 31, 103, 31, 31, 31, 31, 31, 31, - 104, 105, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, -}; - -static RE_UINT16 re_word_break_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 8, 9, 10, 10, 10, 11, 12, 13, 7, 14, - 7, 7, 7, 7, 15, 7, 7, 7, 7, 16, 17, 18, 19, 20, 21, 22, - 23, 7, 24, 25, 7, 7, 26, 27, 28, 29, 30, 7, 7, 31, 32, 33, - 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, - 49, 50, 51, 52, 53, 54, 55, 56, 57, 54, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 37, 80, 81, 37, 37, 82, 83, 37, 84, 85, 86, 87, 88, 89, 90, 37, - 37, 91, 92, 93, 94, 7, 95, 96, 7, 7, 97, 7, 98, 99, 100, 7, - 101, 7, 102, 37, 103, 7, 7, 104, 18, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 105, 3, 7, 7, 106, 107, 108, 109, 110, 37, 39, 111, 112, - 113, 7, 7, 114, 115, 116, 7, 117, 118, 119, 63, 37, 37, 37, 120, 37, - 121, 37, 122, 123, 124, 125, 37, 37, 126, 127, 128, 129, 130, 131, 7, 132, - 7, 133, 134, 135, 136, 37, 137, 138, 7, 7, 7, 7, 7, 7, 10, 139, - 104, 7, 140, 135, 7, 141, 142, 143, 144, 145, 146, 147, 148, 37, 149, 150, - 151, 152, 153, 7, 136, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 154, 7, 155, 156, 37, 37, 37, 37, 37, 37, 157, - 158, 37, 37, 159, 37, 37, 37, 37, 7, 160, 118, 7, 7, 7, 7, 161, - 7, 95, 7, 162, 163, 164, 164, 10, 37, 165, 37, 37, 37, 37, 37, 37, - 166, 167, 37, 37, 168, 169, 169, 170, 171, 172, 7, 7, 173, 174, 37, 175, - 37, 37, 37, 37, 37, 37, 175, 176, 169, 169, 177, 37, 37, 37, 37, 37, - 7, 7, 7, 7, 178, 37, 179, 135, 180, 181, 7, 182, 183, 7, 7, 184, - 185, 186, 7, 7, 187, 188, 37, 185, 189, 190, 7, 191, 192, 127, 193, 194, - 32, 195, 196, 197, 41, 198, 199, 200, 7, 201, 202, 203, 37, 204, 205, 206, - 207, 208, 96, 209, 7, 7, 7, 210, 7, 7, 7, 7, 7, 211, 212, 213, - 214, 215, 216, 7, 7, 217, 218, 7, 7, 135, 179, 7, 219, 7, 220, 221, - 222, 223, 224, 225, 7, 7, 7, 226, 227, 2, 3, 228, 229, 118, 230, 231, - 232, 233, 234, 37, 7, 7, 7, 174, 37, 37, 7, 235, 37, 37, 37, 236, - 37, 37, 37, 37, 197, 7, 237, 238, 7, 179, 239, 240, 135, 7, 241, 37, - 7, 7, 7, 7, 135, 242, 243, 213, 7, 244, 7, 245, 37, 37, 37, 37, - 7, 163, 117, 220, 37, 37, 37, 37, 246, 247, 117, 163, 118, 37, 37, 248, - 117, 249, 37, 37, 7, 250, 37, 37, 251, 252, 37, 197, 197, 37, 86, 253, - 7, 117, 117, 254, 217, 37, 37, 37, 7, 7, 136, 37, 7, 254, 7, 254, - 130, 255, 256, 257, 130, 258, 179, 259, 130, 260, 179, 261, 130, 198, 262, 37, - 263, 264, 37, 37, 265, 266, 267, 268, 269, 54, 270, 271, 37, 37, 37, 37, - 7, 272, 273, 37, 7, 29, 274, 37, 37, 37, 37, 37, 7, 275, 276, 37, - 7, 29, 277, 37, 7, 278, 112, 37, 279, 280, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 7, 7, 281, 37, 37, 37, 37, 37, 37, 7, 282, - 283, 284, 285, 286, 287, 288, 37, 37, 7, 7, 7, 7, 249, 37, 37, 37, - 7, 7, 7, 173, 7, 7, 7, 7, 7, 7, 245, 37, 37, 37, 37, 37, - 7, 173, 37, 37, 37, 37, 37, 37, 7, 7, 289, 37, 37, 37, 37, 37, - 7, 282, 118, 112, 37, 37, 179, 290, 7, 291, 292, 293, 103, 37, 37, 37, - 7, 7, 294, 295, 296, 37, 37, 297, 298, 37, 37, 37, 37, 37, 37, 37, - 7, 7, 7, 299, 300, 301, 37, 37, 37, 37, 37, 302, 303, 304, 37, 37, - 37, 37, 305, 37, 37, 37, 37, 37, 7, 7, 306, 7, 307, 308, 309, 7, - 310, 311, 312, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 313, 314, 96, - 306, 306, 160, 160, 283, 283, 315, 316, 10, 317, 10, 318, 319, 320, 37, 37, - 321, 322, 37, 37, 37, 37, 37, 37, 7, 7, 7, 7, 7, 7, 323, 37, - 7, 7, 324, 37, 37, 37, 37, 37, 309, 325, 326, 327, 328, 329, 37, 37, - 37, 179, 330, 330, 155, 37, 37, 331, 37, 37, 37, 37, 332, 37, 333, 334, - 37, 37, 335, 336, 337, 338, 37, 37, 37, 37, 37, 339, 340, 37, 37, 341, - 37, 37, 342, 37, 37, 343, 344, 37, 345, 346, 37, 37, 37, 37, 37, 37, - 347, 10, 10, 10, 37, 37, 37, 37, 10, 10, 10, 10, 10, 10, 10, 348, -}; - -static RE_UINT8 re_word_break_stage_4[] = { - 0, 0, 1, 2, 0, 0, 0, 0, 3, 4, 0, 5, 6, 6, 7, 0, - 8, 9, 9, 9, 9, 9, 10, 11, 8, 9, 9, 9, 9, 9, 10, 0, - 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 0, 15, 13, 0, - 9, 9, 9, 9, 9, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 16, 17, 9, 9, 16, 18, 0, 0, 9, 19, 0, 20, 0, 0, 0, 0, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 9, 22, 17, 23, - 0, 24, 10, 22, 9, 9, 9, 9, 25, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 25, 9, 9, 26, 21, 27, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 28, 0, - 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 29, 0, 30, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 31, 32, 31, 0, 0, 33, 33, 33, 33, - 33, 33, 34, 0, 35, 36, 0, 0, 37, 38, 0, 39, 21, 21, 40, 41, - 9, 9, 42, 21, 21, 21, 21, 21, 6, 6, 43, 44, 45, 9, 9, 9, - 9, 9, 9, 9, 9, 46, 21, 47, 21, 48, 49, 27, 6, 6, 50, 51, - 0, 0, 0, 52, 53, 9, 9, 9, 9, 9, 9, 9, 21, 21, 21, 21, - 21, 21, 40, 8, 9, 9, 9, 9, 9, 54, 21, 21, 55, 0, 0, 0, - 6, 6, 50, 9, 9, 9, 9, 9, 9, 9, 42, 21, 21, 16, 56, 0, - 9, 9, 9, 9, 9, 54, 57, 21, 21, 58, 58, 59, 0, 0, 0, 0, - 9, 9, 9, 9, 9, 9, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 9, 9, 9, 9, 9, 22, 9, 16, 0, 0, 0, 0, 0, 21, 21, 21, - 60, 21, 21, 21, 21, 21, 21, 21, 21, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 54, 61, 21, 21, 21, 21, 58, 21, 9, 9, - 54, 62, 6, 6, 8, 9, 9, 9, 58, 8, 9, 51, 51, 9, 9, 9, - 9, 9, 22, 9, 20, 17, 16, 61, 21, 63, 63, 64, 0, 65, 0, 25, - 54, 62, 6, 6, 16, 0, 0, 0, 30, 8, 10, 66, 51, 9, 9, 9, - 9, 9, 22, 9, 22, 67, 16, 49, 40, 65, 63, 59, 68, 0, 8, 20, - 0, 62, 6, 6, 27, 69, 0, 0, 30, 8, 9, 25, 25, 9, 9, 9, - 9, 9, 22, 9, 22, 8, 16, 61, 21, 31, 31, 59, 19, 0, 0, 0, - 54, 62, 6, 6, 0, 0, 28, 0, 30, 8, 9, 51, 51, 9, 9, 9, - 21, 63, 63, 59, 0, 70, 0, 25, 54, 62, 6, 6, 28, 0, 0, 0, - 71, 8, 10, 17, 22, 16, 67, 22, 66, 19, 10, 17, 9, 9, 16, 70, - 40, 70, 49, 59, 19, 65, 0, 0, 0, 62, 6, 6, 0, 0, 0, 0, - 21, 8, 9, 22, 22, 9, 9, 9, 9, 9, 22, 9, 9, 9, 16, 46, - 21, 49, 49, 59, 0, 32, 10, 0, 54, 62, 6, 6, 0, 0, 0, 0, - 58, 8, 9, 22, 22, 9, 9, 9, 9, 9, 22, 9, 9, 8, 16, 61, - 21, 49, 49, 59, 0, 32, 0, 13, 54, 62, 6, 6, 67, 0, 0, 0, - 30, 8, 9, 22, 22, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 46, - 21, 49, 49, 64, 0, 42, 0, 66, 54, 62, 6, 6, 0, 0, 17, 9, - 70, 8, 9, 9, 9, 10, 17, 9, 9, 9, 9, 9, 25, 9, 9, 28, - 9, 10, 72, 65, 21, 73, 21, 21, 0, 62, 6, 6, 70, 0, 0, 0, - 0, 0, 0, 0, 68, 21, 40, 0, 0, 65, 21, 40, 6, 6, 74, 0, - 0, 0, 0, 0, 68, 21, 31, 75, 0, 0, 21, 59, 6, 6, 74, 0, - 19, 0, 0, 0, 0, 0, 59, 0, 6, 6, 74, 0, 0, 76, 68, 70, - 9, 9, 8, 9, 9, 9, 9, 9, 9, 9, 9, 19, 30, 21, 21, 21, - 21, 49, 9, 58, 21, 21, 30, 21, 21, 21, 21, 21, 21, 21, 21, 75, - 0, 72, 0, 0, 0, 0, 0, 0, 0, 0, 65, 21, 21, 21, 21, 40, - 6, 6, 74, 0, 0, 70, 59, 70, 49, 63, 21, 59, 30, 75, 0, 0, - 70, 21, 21, 31, 6, 6, 77, 59, 9, 25, 0, 28, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 10, 9, 9, 9, 22, 16, 9, 10, 22, 16, - 9, 9, 22, 16, 9, 9, 9, 9, 9, 9, 9, 9, 22, 16, 9, 10, - 22, 16, 9, 9, 9, 10, 9, 9, 9, 9, 9, 9, 22, 16, 9, 9, - 9, 9, 9, 9, 9, 9, 10, 30, 9, 9, 9, 9, 0, 0, 0, 0, - 9, 9, 9, 9, 9, 16, 9, 16, 9, 9, 9, 51, 9, 9, 9, 9, - 9, 9, 10, 17, 9, 9, 19, 0, 9, 9, 9, 22, 54, 75, 0, 0, - 9, 9, 9, 9, 54, 75, 0, 0, 9, 9, 9, 9, 54, 0, 0, 0, - 9, 9, 9, 22, 78, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 68, - 6, 6, 74, 0, 0, 0, 0, 0, 0, 0, 65, 79, 6, 6, 74, 0, - 9, 9, 9, 9, 9, 9, 0, 0, 9, 80, 9, 9, 9, 9, 9, 9, - 9, 9, 81, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 16, 0, 0, - 9, 9, 9, 9, 9, 9, 9, 10, 21, 21, 21, 0, 21, 21, 21, 0, - 0, 0, 0, 0, 6, 6, 74, 0, 9, 9, 9, 9, 9, 42, 21, 0, - 0, 0, 0, 0, 0, 30, 21, 40, 21, 21, 21, 21, 21, 21, 21, 63, - 6, 6, 74, 0, 6, 6, 74, 0, 0, 0, 0, 0, 21, 21, 21, 40, - 21, 45, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 21, 21, 21, - 21, 45, 9, 0, 6, 6, 74, 0, 0, 0, 65, 21, 21, 0, 0, 0, - 82, 9, 9, 9, 9, 9, 9, 9, 58, 21, 21, 27, 6, 6, 50, 9, - 9, 54, 21, 21, 21, 0, 0, 0, 9, 21, 21, 21, 21, 21, 0, 0, - 6, 6, 74, 8, 6, 6, 50, 9, 9, 9, 9, 9, 9, 9, 9, 16, - 9, 9, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 21, 21, 21, - 21, 21, 45, 53, 54, 83, 59, 0, 21, 21, 21, 21, 21, 59, 65, 21, - 9, 16, 9, 16, 9, 9, 84, 84, 9, 9, 9, 9, 9, 22, 9, 20, - 17, 22, 9, 19, 9, 17, 9, 0, 9, 9, 9, 19, 17, 22, 9, 19, - 0, 0, 0, 85, 0, 0, 86, 0, 0, 87, 88, 89, 0, 0, 0, 11, - 90, 91, 0, 0, 0, 90, 0, 0, 37, 92, 37, 37, 28, 0, 0, 66, - 0, 0, 0, 0, 9, 9, 9, 19, 0, 0, 0, 0, 21, 21, 21, 21, - 21, 21, 21, 21, 75, 0, 0, 0, 13, 66, 17, 9, 9, 28, 8, 16, - 0, 20, 22, 25, 9, 9, 16, 9, 0, 8, 16, 13, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 17, 9, 9, 9, 9, 16, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 93, 0, 0, 0, 0, 0, 0, 93, 0, - 0, 0, 94, 95, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, - 9, 9, 9, 10, 9, 9, 9, 9, 9, 19, 66, 42, 27, 0, 0, 0, - 9, 9, 0, 66, 0, 0, 0, 65, 9, 9, 9, 9, 9, 10, 0, 0, - 9, 10, 9, 10, 9, 10, 9, 10, 0, 0, 0, 66, 0, 0, 0, 0, - 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 70, 21, 97, 98, 66, 19, - 0, 0, 0, 0, 0, 0, 99, 100, 101, 101, 101, 101, 101, 101, 101, 101, - 101, 101, 101, 101, 101, 101, 102, 101, 0, 8, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 16, 8, 9, 9, 9, 9, 9, 9, 10, 0, 0, 0, 0, - 9, 9, 9, 9, 9, 9, 10, 0, 0, 0, 0, 0, 101, 101, 101, 101, - 101, 101, 101, 101, 101, 101, 101, 102, 101, 101, 101, 101, 101, 101, 0, 0, - 9, 9, 9, 19, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, - 9, 9, 9, 19, 9, 9, 9, 9, 6, 6, 50, 0, 0, 0, 0, 0, - 9, 9, 9, 42, 40, 21, 21, 103, 9, 9, 9, 9, 9, 9, 9, 54, - 9, 9, 9, 9, 59, 0, 0, 0, 0, 0, 0, 0, 0, 66, 9, 9, - 17, 9, 9, 9, 9, 9, 9, 9, 9, 9, 51, 9, 9, 9, 9, 9, - 9, 9, 9, 10, 9, 9, 0, 0, 104, 104, 42, 9, 9, 9, 9, 9, - 42, 21, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 0, 0, 0, - 27, 9, 9, 9, 9, 9, 9, 9, 21, 59, 0, 0, 6, 6, 74, 0, - 21, 21, 21, 21, 27, 9, 66, 28, 9, 54, 21, 59, 9, 9, 9, 9, - 9, 42, 21, 21, 21, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 19, - 9, 9, 9, 9, 42, 21, 21, 21, 75, 0, 0, 66, 6, 6, 74, 0, - 0, 68, 0, 0, 6, 6, 74, 0, 9, 9, 58, 21, 21, 40, 0, 0, - 42, 9, 9, 59, 6, 6, 74, 0, 0, 0, 0, 0, 0, 0, 65, 59, - 0, 0, 0, 0, 49, 63, 75, 70, 68, 0, 0, 0, 0, 0, 0, 0, - 9, 9, 42, 21, 17, 105, 0, 0, 8, 10, 8, 10, 8, 10, 0, 0, - 9, 10, 9, 10, 9, 9, 9, 9, 9, 16, 0, 0, 9, 9, 9, 9, - 42, 21, 40, 59, 6, 6, 74, 0, 9, 0, 0, 0, 9, 9, 9, 9, - 9, 10, 66, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, - 9, 10, 0, 0, 66, 9, 0, 106, 33, 33, 107, 33, 33, 34, 33, 108, - 109, 107, 33, 33, 9, 9, 9, 9, 9, 9, 9, 9, 16, 0, 0, 0, - 0, 0, 0, 0, 66, 9, 9, 9, 9, 9, 9, 9, 17, 9, 9, 9, - 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 0, - 21, 21, 21, 21, 110, 91, 0, 0, 21, 21, 21, 21, 11, 90, 0, 0, - 0, 0, 0, 111, 5, 112, 0, 0, 0, 0, 0, 0, 9, 22, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 113, 0, 114, 0, 5, 0, 0, 115, 0, - 0, 116, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 117, - 17, 9, 17, 9, 17, 9, 17, 19, 0, 0, 0, 0, 0, 0, 118, 0, - 9, 9, 9, 8, 9, 9, 9, 9, 9, 10, 9, 9, 9, 9, 10, 25, - 9, 9, 9, 16, 9, 9, 9, 16, 9, 9, 9, 9, 9, 19, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 68, 9, 9, 9, 9, 19, 0, 0, 0, - 75, 0, 0, 0, 0, 0, 0, 0, 9, 9, 10, 0, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 54, 40, 0, 9, 0, 9, 9, 8, 16, 0, 0, - 6, 6, 74, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 9, 9, - 9, 9, 0, 0, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, - 9, 16, 22, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, 19, 51, - 9, 9, 9, 9, 10, 16, 0, 0, 9, 9, 9, 9, 9, 9, 16, 0, - 9, 9, 9, 9, 9, 9, 0, 17, 58, 32, 0, 21, 9, 8, 8, 9, - 9, 9, 9, 9, 9, 0, 40, 65, 9, 105, 0, 0, 0, 0, 0, 0, - 9, 9, 9, 9, 10, 0, 0, 0, 9, 9, 9, 9, 9, 9, 21, 21, - 21, 40, 0, 0, 0, 0, 0, 0, 0, 62, 6, 6, 0, 0, 0, 65, - 9, 9, 9, 9, 21, 21, 40, 14, 9, 9, 19, 0, 6, 6, 74, 0, - 9, 42, 21, 21, 21, 119, 6, 6, 9, 9, 9, 9, 42, 13, 0, 0, - 45, 19, 70, 75, 6, 6, 120, 19, 9, 9, 9, 9, 25, 9, 9, 9, - 9, 9, 9, 21, 21, 21, 0, 72, 9, 10, 22, 25, 9, 9, 9, 25, - 9, 9, 19, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 42, - 21, 21, 40, 0, 6, 6, 74, 0, 21, 8, 9, 51, 51, 9, 9, 9, - 21, 63, 63, 59, 19, 65, 0, 8, 54, 70, 21, 75, 21, 75, 0, 0, - 9, 9, 9, 9, 9, 58, 21, 21, 21, 82, 10, 0, 6, 6, 74, 0, - 21, 25, 0, 0, 6, 6, 74, 0, 9, 9, 9, 42, 21, 59, 21, 21, - 75, 0, 0, 0, 0, 0, 9, 59, 75, 19, 0, 0, 6, 6, 74, 0, - 9, 9, 42, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, - 21, 21, 21, 0, 6, 6, 74, 0, 6, 6, 74, 0, 0, 0, 0, 66, - 9, 9, 9, 9, 9, 9, 19, 0, 9, 9, 22, 9, 9, 9, 9, 9, - 9, 9, 9, 42, 21, 40, 21, 21, 19, 0, 0, 0, 6, 6, 74, 0, - 0, 0, 0, 0, 17, 9, 9, 9, 9, 9, 9, 9, 70, 21, 21, 21, - 21, 21, 30, 21, 21, 40, 0, 0, 9, 10, 0, 0, 0, 0, 0, 0, - 9, 9, 9, 16, 21, 75, 0, 0, 9, 9, 9, 9, 21, 40, 0, 0, - 9, 0, 0, 0, 6, 6, 74, 0, 66, 9, 9, 9, 9, 9, 0, 8, - 9, 19, 0, 0, 58, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 40, - 0, 0, 0, 65, 82, 9, 9, 9, 19, 0, 0, 0, 0, 0, 0, 0, - 100, 0, 0, 0, 0, 0, 0, 0, 9, 9, 10, 0, 9, 9, 9, 19, - 9, 9, 19, 0, 9, 9, 16, 32, 37, 0, 0, 0, 0, 0, 0, 0, - 0, 30, 59, 30, 121, 37, 122, 21, 40, 30, 21, 0, 0, 0, 0, 0, - 0, 0, 70, 59, 0, 0, 0, 0, 70, 75, 0, 0, 0, 0, 0, 0, - 9, 9, 9, 9, 9, 22, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, - 13, 67, 8, 22, 9, 9, 25, 8, 9, 8, 9, 9, 9, 9, 9, 9, - 9, 25, 10, 8, 9, 22, 9, 22, 9, 9, 9, 9, 9, 9, 25, 10, - 9, 20, 17, 9, 22, 9, 9, 9, 9, 16, 9, 9, 9, 9, 9, 9, - 22, 9, 9, 9, 9, 9, 10, 9, 10, 9, 9, 62, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 21, 21, 21, 21, 21, 40, 65, 21, - 21, 21, 21, 75, 0, 68, 0, 0, 0, 75, 0, 0, 0, 0, 65, 21, - 30, 21, 21, 21, 0, 0, 0, 0, 21, 40, 21, 21, 21, 21, 63, 21, - 31, 49, 40, 0, 0, 0, 0, 0, 9, 19, 0, 0, 21, 40, 0, 0, - 9, 21, 40, 0, 6, 6, 74, 0, 67, 51, 8, 9, 10, 9, 84, 0, - 13, 66, 84, 8, 67, 51, 84, 84, 67, 51, 10, 9, 10, 9, 8, 20, - 9, 9, 25, 9, 9, 9, 9, 0, 8, 8, 25, 9, 9, 9, 9, 0, - 9, 9, 16, 0, 9, 9, 9, 9, 0, 123, 124, 124, 124, 124, 124, 124, - 0, 93, 0, 0, 0, 0, 0, 0, 125, 126, 94, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 127, 128, 94, 94, 129, 129, 126, 0, 0, 0, - 0, 130, 131, 132, 129, 129, 126, 126, 133, 133, 134, 0, 0, 0, 0, 0, - 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93, 132, 0, - 0, 0, 0, 0, 126, 135, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, - 0, 133, 125, 129, 0, 0, 0, 0, 125, 0, 0, 0, 0, 136, 0, 0, - 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 129, 136, - 0, 132, 0, 0, 137, 129, 95, 136, 14, 0, 0, 0, 0, 0, 0, 0, - 21, 21, 21, 21, 0, 0, 0, 0, -}; - -static RE_UINT8 re_word_break_stage_5[] = { - 0, 0, 0, 0, 0, 0, 5, 6, 6, 4, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 2, 13, 0, 14, 0, 15, 15, 15, 15, 15, 15, 12, 13, - 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0, 0, 0, 16, - 0, 6, 0, 0, 0, 0, 11, 0, 0, 9, 0, 0, 0, 11, 0, 12, - 11, 11, 0, 0, 0, 0, 11, 11, 0, 0, 0, 12, 11, 0, 0, 0, - 11, 0, 11, 0, 7, 7, 7, 7, 11, 0, 11, 11, 11, 11, 13, 11, - 0, 0, 11, 12, 11, 11, 0, 11, 11, 11, 0, 7, 7, 7, 11, 11, - 0, 11, 0, 0, 0, 13, 0, 0, 0, 7, 7, 7, 7, 7, 0, 7, - 0, 7, 7, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 11, - 12, 0, 0, 0, 9, 9, 9, 9, 9, 9, 0, 0, 13, 13, 0, 0, - 7, 7, 7, 0, 9, 0, 0, 0, 11, 11, 11, 7, 15, 15, 0, 15, - 13, 0, 11, 11, 7, 11, 11, 11, 0, 11, 7, 7, 7, 9, 0, 7, - 7, 11, 11, 7, 7, 0, 7, 7, 15, 15, 11, 11, 11, 0, 0, 11, - 0, 0, 0, 9, 11, 7, 11, 11, 11, 11, 7, 7, 7, 11, 0, 0, - 13, 0, 11, 0, 7, 7, 11, 7, 11, 7, 7, 7, 7, 7, 0, 0, - 7, 7, 9, 7, 7, 11, 7, 7, 0, 0, 15, 15, 7, 0, 0, 7, - 7, 7, 11, 0, 0, 0, 0, 7, 0, 0, 0, 11, 0, 11, 11, 0, - 0, 7, 0, 0, 11, 7, 0, 0, 0, 0, 7, 7, 0, 0, 7, 11, - 0, 0, 7, 0, 7, 0, 7, 0, 15, 15, 0, 0, 7, 0, 0, 0, - 0, 7, 0, 7, 15, 15, 7, 7, 11, 0, 7, 7, 7, 7, 9, 0, - 11, 7, 7, 11, 11, 7, 11, 0, 7, 7, 7, 11, 7, 11, 11, 0, - 0, 11, 0, 11, 7, 19, 9, 9, 14, 14, 0, 0, 14, 0, 0, 12, - 6, 6, 9, 9, 9, 9, 9, 16, 16, 0, 0, 0, 13, 0, 0, 0, - 9, 0, 9, 9, 0, 17, 0, 0, 0, 0, 17, 17, 17, 17, 0, 0, - 20, 0, 0, 0, 0, 10, 10, 10, 10, 10, 0, 0, 0, 7, 7, 10, - 10, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 0, 7, 7, 0, 11, - 11, 11, 7, 11, 11, 7, 7, 0, 0, 3, 7, 3, 3, 0, 3, 3, - 3, 0, 3, 0, 3, 3, 0, 3, 13, 0, 0, 12, 0, 16, 16, 16, - 13, 12, 0, 0, 11, 0, 0, 9, 0, 0, 0, 14, 0, 0, 12, 13, - 0, 0, 10, 10, 10, 10, 7, 7, 0, 9, 9, 9, 7, 0, 15, 15, - 15, 15, 11, 0, 7, 7, 7, 9, 9, 9, 9, 7, 0, 0, 8, 8, - 8, 8, 8, 8, 0, 0, 0, 17, 17, 0, 0, 0, 0, 0, 0, 18, - 18, 18, 18, 18, 17, 17, 17, 17, 0, 0, 21, 21, 21, 21, 0, 0, - 0, 0, 17, 0, 0, 17, 17, 17, 0, 0, 0, 20, 0, 17, 17, 0, - 17, 17, 17, 0, 17, 0, 0, 17, -}; - -/* Word_Break: 5624 bytes. */ - -RE_UINT32 re_get_word_break(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_word_break_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_word_break_stage_2[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_word_break_stage_3[pos + f] << 3; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_word_break_stage_4[pos + f] << 2; - value = re_word_break_stage_5[pos + code]; - - return value; -} - -/* Grapheme_Cluster_Break. */ - -static RE_UINT8 re_grapheme_cluster_break_stage_1[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 6, 2, 2, 7, 2, 8, 9, 10, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_grapheme_cluster_break_stage_2[] = { - 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 1, 17, 1, 1, 1, 18, 19, 20, 21, 22, 23, 24, 1, 1, - 25, 1, 1, 1, 1, 1, 26, 27, 1, 1, 1, 1, 28, 29, 1, 1, - 30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 31, 1, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 36, 37, 38, 39, 40, 41, 42, 36, 37, 38, 39, 40, 41, - 42, 36, 37, 38, 39, 40, 41, 42, 36, 37, 38, 39, 40, 41, 42, 36, - 37, 38, 39, 40, 41, 42, 36, 43, 44, 44, 44, 44, 44, 44, 44, 44, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45, 1, 1, 46, 47, - 1, 48, 49, 50, 1, 1, 1, 1, 1, 1, 51, 1, 1, 1, 1, 1, - 52, 53, 54, 55, 56, 57, 58, 59, 1, 1, 1, 1, 60, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 61, 62, 1, 1, 1, 63, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 65, 66, 1, 1, 1, 1, 1, 1, 1, 67, 1, 1, 1, 1, 1, - 68, 1, 1, 1, 1, 1, 1, 1, 69, 70, 1, 1, 1, 1, 1, 1, - 1, 71, 1, 72, 73, 74, 75, 1, 1, 76, 1, 1, 1, 1, 1, 1, - 77, 78, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_grapheme_cluster_break_stage_3[] = { - 0, 1, 2, 2, 2, 2, 2, 3, 1, 1, 4, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 5, 8, 9, 2, 2, 2, - 10, 11, 2, 2, 12, 5, 2, 13, 2, 2, 2, 2, 2, 14, 15, 2, - 16, 17, 2, 5, 18, 2, 2, 2, 2, 2, 19, 13, 2, 2, 12, 20, - 2, 21, 22, 2, 2, 23, 2, 2, 2, 2, 2, 2, 2, 24, 25, 5, - 26, 2, 2, 27, 28, 29, 30, 2, 31, 2, 2, 32, 33, 34, 30, 2, - 35, 2, 2, 36, 37, 17, 2, 38, 35, 2, 2, 36, 39, 2, 30, 2, - 31, 2, 2, 40, 33, 41, 30, 2, 42, 2, 2, 43, 44, 34, 2, 2, - 45, 2, 2, 46, 47, 48, 30, 2, 31, 2, 2, 49, 50, 48, 30, 2, - 31, 2, 2, 43, 51, 34, 30, 2, 52, 2, 2, 2, 53, 54, 2, 52, - 2, 2, 2, 55, 56, 2, 2, 2, 2, 2, 2, 57, 58, 2, 2, 2, - 2, 59, 2, 60, 2, 2, 2, 61, 62, 63, 5, 64, 65, 2, 2, 2, - 2, 2, 66, 67, 2, 68, 13, 69, 70, 71, 2, 2, 2, 2, 2, 2, - 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 74, 75, 75, 75, 75, 75, - 2, 2, 2, 2, 2, 66, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 76, 2, 76, 2, 30, 2, 30, 2, 2, 2, 77, 78, 79, 2, 2, - 80, 2, 2, 2, 2, 2, 2, 2, 48, 2, 81, 2, 2, 2, 2, 2, - 2, 2, 82, 83, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 84, 2, 2, 2, 85, 86, 87, 2, 2, 2, 88, 2, 2, 2, 2, - 89, 2, 2, 90, 91, 2, 12, 20, 92, 2, 93, 2, 2, 2, 94, 95, - 2, 2, 96, 97, 2, 2, 2, 2, 2, 2, 2, 2, 2, 98, 99, 100, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 101, - 102, 2, 103, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 5, 5, 13, - 2, 104, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 105, - 106, 2, 2, 2, 2, 2, 107, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 108, 109, - 2, 2, 2, 2, 2, 2, 2, 108, 2, 2, 2, 2, 2, 2, 5, 5, - 2, 2, 110, 2, 2, 2, 2, 2, 2, 111, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 108, 112, 2, 46, 2, 2, 2, 2, 2, 109, - 113, 2, 114, 2, 2, 2, 2, 2, 115, 2, 2, 116, 117, 2, 5, 109, - 2, 2, 118, 2, 119, 95, 72, 120, 26, 2, 2, 121, 122, 2, 123, 2, - 2, 2, 124, 125, 126, 2, 2, 127, 2, 2, 2, 128, 17, 2, 129, 130, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 131, 2, - 132, 133, 134, 135, 134, 136, 134, 132, 133, 134, 135, 134, 136, 134, 132, 133, - 134, 135, 134, 136, 134, 132, 133, 134, 135, 134, 136, 134, 132, 133, 134, 135, - 134, 136, 134, 132, 133, 134, 135, 134, 136, 134, 132, 133, 134, 135, 134, 136, - 134, 132, 133, 134, 135, 134, 136, 134, 132, 133, 134, 135, 134, 136, 134, 132, - 133, 134, 135, 134, 136, 134, 132, 133, 134, 135, 134, 136, 134, 132, 133, 134, - 135, 134, 136, 134, 132, 133, 134, 135, 134, 136, 134, 132, 133, 134, 135, 134, - 136, 134, 132, 133, 134, 135, 134, 136, 134, 132, 133, 134, 135, 134, 136, 134, - 134, 135, 134, 136, 134, 132, 133, 134, 135, 134, 137, 73, 138, 75, 75, 139, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 140, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 5, 2, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 46, 2, 2, 2, 2, 2, 141, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 71, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 13, 2, - 2, 2, 2, 2, 2, 2, 2, 142, 2, 2, 2, 2, 2, 2, 2, 2, - 143, 2, 2, 144, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48, 2, - 145, 2, 2, 146, 147, 2, 2, 108, 92, 2, 2, 148, 2, 2, 2, 2, - 149, 2, 150, 151, 2, 2, 2, 152, 92, 2, 2, 153, 154, 2, 2, 2, - 2, 2, 155, 156, 2, 2, 2, 2, 2, 2, 2, 2, 2, 108, 157, 2, - 95, 2, 2, 32, 158, 34, 159, 151, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 160, 161, 2, 2, 2, 2, 2, 2, 162, 163, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 108, 164, 13, 165, 2, 2, - 2, 2, 2, 166, 13, 2, 2, 2, 2, 2, 167, 168, 2, 2, 2, 2, - 2, 66, 169, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 170, 171, 2, 2, 2, 2, 2, 172, 173, 174, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 151, - 2, 2, 2, 147, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 175, 176, 177, 108, 149, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 178, 179, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 180, 181, 182, 2, 183, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 76, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 5, 5, 5, 184, 5, 5, 64, 123, 185, 12, 7, 2, 2, 2, 2, 2, - 186, 187, 188, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 147, 2, 2, - 2, 2, 2, 2, 189, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 190, 191, - 2, 2, 2, 2, 2, 2, 2, 2, 192, 2, 2, 2, 193, 2, 2, 194, - 2, 2, 2, 2, 195, 196, 197, 198, 199, 2, 200, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 201, 2, 202, 2, 2, 2, 2, 203, 2, - 2, 2, 2, 2, 204, 2, 2, 2, 2, 2, 205, 206, 196, 2, 2, 2, - 2, 207, 208, 209, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, -}; - -static RE_UINT8 re_grapheme_cluster_break_stage_4[] = { - 0, 0, 1, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 4, - 3, 3, 3, 5, 6, 6, 6, 6, 7, 6, 8, 3, 9, 6, 6, 6, - 6, 6, 6, 10, 11, 10, 3, 3, 12, 13, 3, 3, 6, 6, 14, 15, - 3, 3, 7, 6, 16, 3, 3, 3, 3, 17, 6, 18, 6, 19, 20, 8, - 3, 3, 3, 21, 22, 3, 3, 3, 6, 6, 14, 3, 3, 17, 6, 6, - 6, 3, 3, 3, 3, 17, 10, 6, 6, 9, 9, 8, 3, 3, 9, 3, - 3, 6, 6, 6, 23, 6, 6, 6, 24, 3, 3, 3, 3, 3, 25, 26, - 27, 6, 28, 29, 9, 6, 3, 3, 17, 3, 3, 3, 30, 3, 3, 3, - 3, 3, 3, 31, 27, 32, 33, 34, 3, 7, 3, 3, 35, 3, 3, 3, - 3, 3, 3, 26, 36, 7, 19, 8, 8, 22, 3, 3, 27, 10, 37, 34, - 3, 3, 3, 20, 3, 17, 3, 3, 38, 3, 3, 3, 3, 3, 3, 25, - 39, 40, 41, 34, 28, 3, 3, 3, 3, 3, 3, 17, 28, 42, 20, 8, - 3, 11, 3, 3, 3, 3, 3, 43, 44, 45, 41, 8, 27, 26, 41, 46, - 40, 3, 3, 3, 3, 3, 38, 7, 47, 48, 49, 50, 51, 6, 14, 3, - 3, 7, 6, 14, 51, 6, 10, 16, 3, 3, 6, 8, 3, 3, 8, 3, - 3, 52, 22, 40, 9, 6, 6, 24, 6, 20, 3, 9, 6, 6, 9, 6, - 6, 6, 6, 16, 3, 38, 3, 3, 3, 3, 3, 9, 53, 6, 35, 36, - 3, 40, 8, 17, 9, 16, 3, 3, 38, 36, 3, 22, 3, 3, 3, 22, - 54, 54, 54, 54, 55, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, 56, - 17, 16, 3, 3, 3, 57, 6, 58, 49, 44, 27, 6, 6, 3, 3, 22, - 3, 3, 7, 59, 3, 3, 22, 3, 24, 50, 28, 3, 44, 49, 27, 3, - 3, 7, 60, 3, 3, 61, 6, 14, 48, 9, 6, 28, 50, 6, 6, 19, - 6, 6, 6, 14, 6, 62, 3, 3, 3, 53, 24, 28, 44, 62, 3, 3, - 63, 3, 3, 3, 64, 58, 57, 8, 3, 25, 58, 65, 58, 3, 3, 3, - 3, 49, 49, 6, 6, 47, 3, 3, 14, 6, 6, 6, 53, 6, 16, 22, - 40, 16, 8, 3, 6, 8, 7, 6, 3, 3, 4, 66, 3, 3, 0, 67, - 3, 3, 3, 68, 3, 3, 68, 3, 3, 3, 69, 70, 3, 71, 3, 3, - 3, 3, 3, 7, 8, 3, 3, 3, 3, 3, 17, 6, 3, 3, 11, 3, - 14, 6, 6, 8, 38, 38, 7, 3, 72, 73, 3, 3, 74, 3, 3, 3, - 3, 49, 49, 49, 49, 8, 3, 3, 3, 17, 6, 8, 3, 7, 6, 6, - 54, 54, 54, 75, 7, 47, 58, 28, 62, 3, 3, 3, 3, 22, 3, 3, - 3, 3, 9, 24, 73, 36, 3, 3, 7, 3, 3, 76, 3, 3, 3, 16, - 20, 19, 16, 17, 3, 3, 72, 58, 3, 77, 3, 3, 72, 29, 39, 34, - 78, 79, 79, 79, 79, 79, 79, 78, 79, 79, 79, 79, 79, 79, 78, 79, - 79, 78, 79, 79, 79, 3, 3, 3, 55, 80, 81, 56, 56, 56, 56, 3, - 3, 3, 3, 38, 0, 0, 0, 3, 3, 17, 14, 3, 9, 11, 3, 6, - 3, 3, 14, 7, 82, 3, 3, 3, 3, 3, 6, 6, 6, 14, 3, 3, - 50, 24, 36, 83, 14, 3, 3, 3, 3, 7, 6, 27, 6, 16, 3, 3, - 7, 3, 3, 3, 72, 47, 6, 24, 84, 3, 17, 16, 3, 3, 3, 50, - 58, 53, 3, 38, 50, 6, 14, 3, 28, 33, 33, 74, 40, 17, 6, 16, - 3, 85, 6, 6, 47, 86, 3, 3, 60, 6, 87, 65, 53, 3, 3, 3, - 47, 8, 49, 57, 3, 3, 3, 8, 50, 6, 24, 65, 3, 3, 7, 29, - 6, 57, 3, 3, 47, 57, 6, 3, 3, 3, 3, 72, 6, 14, 6, 57, - 17, 6, 6, 6, 6, 6, 64, 6, 53, 36, 3, 3, 85, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 88, 3, 3, 3, 11, 0, 3, 3, 3, - 3, 89, 8, 64, 90, 0, 91, 6, 14, 9, 6, 3, 3, 3, 17, 8, - 6, 14, 7, 6, 3, 16, 3, 3, 6, 14, 6, 6, 6, 6, 19, 6, - 10, 20, 14, 3, 3, 6, 14, 3, 3, 92, 93, 93, 93, 93, 93, 93, - 3, 68, 3, 3, 94, 95, 69, 3, 3, 3, 96, 97, 69, 69, 98, 98, - 95, 3, 3, 3, 3, 99, 100, 101, 98, 98, 95, 95, 102, 102, 103, 3, - 3, 3, 101, 3, 3, 68, 101, 3, 95, 104, 3, 3, 3, 3, 71, 3, - 3, 102, 94, 98, 94, 3, 3, 3, 3, 105, 3, 3, 3, 3, 98, 105, - 3, 101, 3, 3, 106, 98, 70, 105, -}; - -static RE_UINT8 re_grapheme_cluster_break_stage_5[] = { - 4, 4, 4, 4, 4, 4, 3, 4, 4, 2, 4, 4, 0, 0, 0, 0, - 0, 0, 0, 4, 0, 4, 0, 0, 5, 5, 5, 5, 0, 0, 0, 5, - 5, 5, 0, 0, 0, 5, 5, 5, 5, 5, 0, 5, 0, 5, 5, 0, - 1, 1, 1, 1, 1, 1, 0, 0, 5, 5, 5, 0, 4, 0, 0, 0, - 5, 0, 0, 0, 0, 0, 5, 5, 5, 1, 0, 5, 5, 0, 0, 5, - 5, 0, 5, 5, 0, 0, 0, 1, 0, 5, 0, 0, 5, 5, 1, 5, - 5, 5, 5, 7, 0, 0, 5, 7, 5, 0, 7, 7, 7, 5, 5, 5, - 5, 7, 7, 7, 7, 5, 7, 7, 0, 5, 7, 7, 5, 0, 5, 7, - 5, 0, 0, 7, 7, 0, 0, 7, 7, 5, 0, 0, 0, 5, 5, 7, - 7, 5, 5, 0, 5, 7, 0, 7, 0, 0, 5, 0, 5, 7, 7, 0, - 0, 0, 7, 7, 7, 0, 7, 7, 7, 0, 5, 5, 5, 0, 7, 5, - 7, 7, 5, 7, 7, 0, 5, 7, 7, 5, 1, 0, 7, 7, 5, 5, - 5, 0, 5, 0, 7, 7, 7, 7, 7, 7, 7, 5, 0, 5, 0, 7, - 0, 5, 0, 5, 5, 7, 5, 5, 8, 8, 8, 8, 9, 9, 9, 9, - 10, 10, 10, 10, 5, 5, 7, 5, 5, 5, 7, 7, 5, 5, 4, 0, - 5, 7, 7, 5, 0, 7, 5, 7, 7, 0, 0, 0, 5, 5, 7, 0, - 0, 7, 5, 5, 7, 5, 7, 5, 5, 15, 4, 4, 4, 4, 4, 0, - 0, 13, 0, 0, 0, 0, 13, 13, 13, 13, 0, 0, 16, 0, 0, 0, - 0, 0, 0, 7, 7, 5, 5, 7, 7, 7, 0, 0, 8, 0, 0, 0, - 5, 7, 0, 0, 0, 7, 5, 0, 11, 12, 12, 12, 12, 12, 12, 12, - 9, 9, 9, 0, 0, 0, 0, 10, 7, 5, 7, 0, 0, 1, 0, 0, - 7, 0, 1, 1, 0, 7, 7, 7, 5, 7, 5, 0, 5, 7, 5, 7, - 7, 7, 7, 0, 0, 5, 7, 5, 5, 5, 5, 4, 4, 4, 4, 5, - 0, 0, 6, 6, 6, 6, 6, 6, 0, 0, 0, 13, 13, 0, 0, 0, - 0, 0, 0, 14, 14, 14, 14, 14, 13, 13, 13, 13, 0, 0, 17, 17, - 17, 17, 0, 0, 0, 0, 13, 0, 0, 13, 13, 13, 0, 0, 0, 16, - 0, 13, 13, 0, 13, 13, 13, 0, 13, 0, 0, 13, -}; - -/* Grapheme_Cluster_Break: 3052 bytes. */ - -RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_grapheme_cluster_break_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_grapheme_cluster_break_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_grapheme_cluster_break_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_grapheme_cluster_break_stage_4[pos + f] << 2; - value = re_grapheme_cluster_break_stage_5[pos + code]; - - return value; -} - -/* Sentence_Break. */ - -static RE_UINT8 re_sentence_break_stage_1[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 6, 7, 5, 5, 8, 9, 10, - 11, 12, 13, 14, 15, 9, 16, 5, 17, 9, 9, 18, 9, 19, 20, 21, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 22, 23, 24, 9, 9, 25, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 26, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, -}; - -static RE_UINT8 re_sentence_break_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 33, 33, 36, 33, 37, 33, 33, 38, 39, 40, 33, - 41, 42, 33, 33, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 43, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 44, - 17, 17, 17, 17, 45, 17, 46, 47, 48, 49, 50, 51, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 52, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 17, 53, 54, 17, 55, 56, 57, - 58, 59, 60, 61, 62, 63, 17, 64, 65, 66, 67, 68, 69, 33, 33, 33, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 33, 79, 33, 80, 33, 33, 33, - 17, 17, 17, 81, 82, 83, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 17, 17, 17, 17, 84, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 17, 17, 85, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 86, 87, 33, 33, 33, 88, - 17, 17, 17, 17, 17, 17, 17, 89, 17, 17, 90, 33, 33, 33, 33, 33, - 91, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 92, 33, 33, 33, - 33, 93, 94, 33, 95, 96, 97, 98, 33, 33, 99, 33, 33, 33, 33, 33, - 100, 33, 33, 33, 33, 33, 33, 33, 101, 102, 33, 33, 33, 33, 103, 33, - 33, 104, 33, 33, 33, 33, 105, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 17, 17, 17, 17, 17, 17, 106, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 107, 108, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 109, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 110, 33, 33, 33, 33, 33, - 111, 112, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, -}; - -static RE_UINT16 re_sentence_break_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 8, 16, 17, 18, 19, 20, 21, 22, 23, 23, 23, 24, 25, 26, 27, 28, - 29, 30, 18, 8, 31, 8, 32, 8, 8, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 41, 41, 44, 45, 46, 47, 48, 41, 41, 49, 50, 51, - 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, - 67, 68, 69, 70, 71, 72, 73, 74, 75, 72, 76, 77, 78, 79, 80, 81, - 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, - 98, 99, 100, 55, 101, 102, 103, 55, 104, 105, 106, 107, 108, 109, 110, 55, - 41, 111, 112, 113, 114, 29, 115, 116, 41, 41, 41, 41, 41, 41, 41, 41, - 41, 41, 117, 41, 118, 119, 120, 41, 121, 41, 122, 123, 124, 29, 29, 125, - 98, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 126, 127, 41, 41, 128, - 129, 130, 131, 132, 41, 133, 134, 135, 136, 41, 41, 137, 138, 139, 41, 140, - 141, 142, 143, 144, 41, 145, 146, 55, 147, 41, 148, 149, 150, 151, 55, 55, - 152, 133, 153, 154, 155, 156, 41, 157, 41, 158, 159, 160, 161, 55, 162, 163, - 18, 18, 18, 18, 18, 18, 23, 164, 8, 8, 8, 8, 165, 8, 8, 8, - 166, 167, 168, 169, 167, 170, 171, 172, 173, 174, 175, 176, 177, 55, 178, 179, - 180, 181, 182, 30, 183, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, - 184, 185, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 186, 30, 187, - 55, 55, 188, 189, 55, 55, 190, 191, 55, 55, 55, 55, 192, 55, 193, 194, - 29, 195, 196, 197, 8, 8, 8, 198, 18, 199, 41, 200, 201, 202, 202, 23, - 203, 204, 205, 55, 55, 55, 55, 55, 206, 207, 98, 41, 208, 98, 41, 116, - 209, 210, 41, 41, 211, 212, 55, 213, 41, 41, 41, 41, 41, 140, 55, 55, - 41, 41, 41, 41, 41, 41, 140, 55, 41, 41, 41, 41, 214, 55, 213, 215, - 216, 217, 8, 218, 219, 41, 41, 220, 221, 222, 8, 223, 224, 225, 55, 226, - 227, 228, 41, 229, 230, 133, 231, 232, 50, 233, 234, 235, 59, 236, 237, 238, - 41, 239, 240, 241, 41, 242, 243, 244, 245, 246, 247, 248, 18, 18, 41, 249, - 41, 41, 41, 41, 41, 250, 251, 252, 41, 41, 41, 253, 41, 41, 254, 55, - 255, 256, 257, 41, 41, 258, 259, 41, 41, 260, 213, 41, 261, 41, 262, 263, - 264, 265, 266, 267, 41, 41, 41, 268, 269, 2, 270, 271, 272, 141, 273, 274, - 275, 276, 277, 55, 41, 41, 41, 212, 55, 55, 41, 278, 55, 55, 55, 279, - 55, 55, 55, 55, 235, 41, 280, 281, 41, 213, 282, 283, 284, 41, 285, 55, - 29, 286, 287, 41, 284, 288, 289, 290, 41, 291, 41, 292, 55, 55, 55, 55, - 41, 201, 140, 262, 55, 55, 55, 55, 293, 294, 140, 201, 141, 55, 55, 295, - 140, 254, 55, 55, 41, 296, 55, 55, 297, 298, 299, 235, 235, 55, 106, 300, - 41, 140, 140, 301, 258, 55, 55, 55, 41, 41, 302, 55, 29, 303, 18, 304, - 155, 305, 306, 307, 155, 308, 309, 310, 155, 311, 312, 313, 155, 236, 314, 55, - 315, 316, 55, 55, 317, 318, 319, 320, 321, 72, 322, 323, 55, 55, 55, 55, - 41, 324, 325, 55, 41, 47, 326, 55, 55, 55, 55, 55, 41, 327, 328, 55, - 41, 47, 329, 55, 41, 330, 135, 55, 331, 332, 55, 55, 55, 55, 55, 55, - 55, 55, 55, 55, 55, 29, 18, 333, 55, 55, 55, 55, 55, 55, 41, 334, - 335, 336, 337, 338, 339, 340, 55, 55, 41, 41, 41, 41, 254, 55, 55, 55, - 41, 41, 41, 211, 41, 41, 41, 41, 41, 41, 292, 55, 55, 55, 55, 55, - 41, 211, 55, 55, 55, 55, 55, 55, 41, 41, 341, 55, 55, 55, 55, 55, - 41, 334, 141, 342, 55, 55, 213, 343, 41, 344, 345, 346, 124, 55, 55, 55, - 41, 41, 347, 348, 349, 55, 55, 350, 41, 41, 41, 41, 41, 41, 41, 214, - 41, 41, 41, 41, 41, 41, 41, 301, 351, 55, 55, 55, 55, 55, 55, 55, - 41, 41, 41, 352, 353, 354, 55, 55, 55, 55, 55, 355, 356, 357, 55, 55, - 55, 55, 358, 55, 55, 55, 55, 55, 359, 360, 361, 362, 363, 364, 365, 366, - 367, 368, 369, 370, 371, 359, 360, 372, 362, 373, 374, 375, 366, 376, 377, 378, - 379, 380, 381, 195, 382, 383, 384, 385, 23, 386, 23, 387, 388, 389, 55, 55, - 390, 391, 55, 55, 55, 55, 55, 55, 41, 41, 41, 41, 41, 41, 392, 55, - 29, 393, 394, 55, 55, 55, 55, 55, 395, 396, 397, 398, 399, 400, 55, 55, - 55, 401, 402, 402, 403, 55, 55, 55, 55, 55, 55, 404, 55, 55, 55, 55, - 41, 41, 41, 41, 41, 41, 201, 55, 41, 278, 41, 41, 41, 41, 41, 41, - 284, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 351, 55, 55, - 284, 55, 55, 55, 55, 55, 55, 55, 405, 23, 23, 23, 55, 55, 55, 55, - 23, 23, 23, 23, 23, 23, 23, 406, -}; - -static RE_UINT8 re_sentence_break_stage_4[] = { - 0, 0, 1, 2, 0, 0, 0, 0, 3, 4, 5, 6, 7, 7, 8, 9, - 10, 11, 11, 11, 11, 11, 12, 13, 14, 15, 15, 15, 15, 15, 16, 13, - 0, 17, 0, 0, 0, 0, 0, 0, 18, 0, 19, 20, 0, 21, 19, 0, - 11, 11, 11, 11, 11, 22, 11, 23, 15, 15, 15, 15, 15, 24, 15, 15, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, - 26, 26, 27, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 29, - 30, 31, 32, 33, 28, 31, 34, 28, 25, 31, 29, 31, 32, 26, 35, 34, - 36, 28, 31, 26, 26, 26, 26, 27, 25, 25, 25, 25, 30, 31, 25, 25, - 25, 25, 25, 25, 25, 15, 33, 30, 26, 23, 25, 25, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 37, 15, 15, - 15, 15, 15, 15, 15, 15, 38, 36, 39, 40, 36, 36, 41, 0, 0, 0, - 15, 42, 0, 43, 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 25, 45, 46, 47, 0, 48, 22, 49, 32, 11, 11, 11, - 50, 11, 11, 15, 15, 15, 15, 15, 15, 15, 15, 51, 33, 34, 25, 25, - 25, 25, 25, 25, 15, 52, 30, 32, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 15, 15, 15, 15, 53, 44, 54, 25, 25, 25, 25, 25, - 28, 26, 26, 29, 25, 25, 25, 25, 25, 25, 25, 25, 10, 11, 11, 11, - 11, 11, 11, 11, 11, 22, 55, 56, 14, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 57, 0, 58, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 59, - 60, 59, 0, 0, 36, 36, 36, 36, 36, 36, 61, 0, 36, 0, 0, 0, - 62, 63, 0, 64, 44, 44, 65, 66, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 67, 44, 44, 44, 44, 44, 7, 7, 68, 69, 70, 36, 36, 36, - 36, 36, 36, 36, 36, 71, 44, 72, 44, 73, 74, 75, 7, 7, 76, 77, - 78, 0, 0, 79, 80, 36, 36, 36, 36, 36, 36, 36, 44, 44, 44, 44, - 44, 44, 65, 81, 36, 36, 36, 36, 36, 82, 44, 44, 83, 0, 0, 0, - 7, 7, 76, 36, 36, 36, 36, 36, 36, 36, 67, 44, 44, 41, 84, 0, - 36, 36, 36, 36, 36, 82, 85, 44, 44, 86, 86, 87, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 36, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 88, 36, 41, 0, 0, 0, 0, 0, 44, 44, 44, - 89, 44, 44, 44, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 82, 90, 44, 44, 44, 44, 86, 44, 36, 36, - 82, 91, 7, 7, 81, 36, 36, 36, 86, 81, 36, 77, 77, 36, 36, 36, - 36, 36, 88, 36, 43, 40, 41, 90, 44, 92, 92, 93, 0, 94, 0, 95, - 82, 96, 7, 7, 41, 0, 0, 0, 58, 81, 61, 97, 77, 36, 36, 36, - 36, 36, 88, 36, 88, 98, 41, 74, 65, 94, 92, 87, 99, 0, 81, 43, - 0, 96, 7, 7, 75, 100, 0, 0, 58, 81, 36, 95, 95, 36, 36, 36, - 36, 36, 88, 36, 88, 81, 41, 90, 44, 59, 59, 87, 101, 0, 0, 0, - 82, 96, 7, 7, 0, 0, 55, 0, 58, 81, 36, 77, 77, 36, 36, 36, - 44, 92, 92, 87, 0, 102, 0, 95, 82, 96, 7, 7, 55, 0, 0, 0, - 103, 81, 61, 40, 88, 41, 98, 88, 97, 101, 61, 40, 36, 36, 41, 102, - 65, 102, 74, 87, 101, 94, 0, 0, 0, 96, 7, 7, 0, 0, 0, 0, - 44, 81, 36, 88, 88, 36, 36, 36, 36, 36, 88, 36, 36, 36, 41, 104, - 44, 74, 74, 87, 0, 60, 61, 0, 82, 96, 7, 7, 0, 0, 0, 0, - 86, 81, 36, 88, 88, 36, 36, 36, 36, 36, 88, 36, 36, 81, 41, 90, - 44, 74, 74, 87, 0, 60, 0, 105, 82, 96, 7, 7, 98, 0, 0, 0, - 58, 81, 36, 88, 88, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 104, - 44, 74, 74, 93, 0, 67, 0, 97, 82, 96, 7, 7, 0, 0, 40, 36, - 102, 81, 36, 36, 36, 61, 40, 36, 36, 36, 36, 36, 95, 36, 36, 55, - 36, 61, 106, 94, 44, 107, 44, 44, 0, 96, 7, 7, 102, 0, 0, 0, - 81, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 80, 44, 65, 0, - 36, 67, 44, 65, 7, 7, 108, 0, 98, 77, 43, 55, 0, 36, 81, 36, - 81, 109, 40, 81, 80, 44, 59, 83, 36, 43, 44, 87, 7, 7, 108, 36, - 101, 0, 0, 0, 0, 0, 87, 0, 7, 7, 108, 0, 0, 110, 111, 112, - 36, 36, 81, 36, 36, 36, 36, 36, 36, 36, 36, 101, 58, 44, 44, 44, - 44, 74, 36, 86, 44, 44, 58, 44, 44, 44, 44, 44, 44, 44, 44, 113, - 0, 106, 0, 0, 0, 0, 0, 0, 36, 36, 67, 44, 44, 44, 44, 114, - 7, 7, 115, 0, 36, 82, 75, 82, 90, 73, 44, 75, 86, 70, 36, 36, - 82, 44, 44, 85, 7, 7, 116, 87, 11, 50, 0, 117, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 61, 36, 36, 36, 88, 41, 36, 61, 88, 41, - 36, 36, 88, 41, 36, 36, 36, 36, 36, 36, 36, 36, 88, 41, 36, 61, - 88, 41, 36, 36, 36, 61, 36, 36, 36, 36, 36, 36, 88, 41, 36, 36, - 36, 36, 36, 36, 36, 36, 61, 58, 118, 9, 119, 0, 0, 0, 0, 0, - 36, 36, 36, 36, 0, 0, 0, 0, 11, 11, 11, 11, 11, 120, 15, 39, - 36, 36, 36, 121, 36, 36, 36, 36, 122, 36, 36, 36, 36, 36, 123, 124, - 36, 36, 61, 40, 36, 36, 101, 0, 36, 36, 36, 88, 82, 113, 0, 0, - 36, 36, 36, 36, 82, 125, 0, 0, 36, 36, 36, 36, 82, 0, 0, 0, - 36, 36, 36, 88, 126, 0, 0, 0, 36, 36, 36, 36, 36, 44, 44, 44, - 44, 44, 44, 44, 44, 97, 0, 100, 7, 7, 108, 0, 0, 0, 0, 0, - 127, 0, 128, 129, 7, 7, 108, 0, 36, 36, 36, 36, 36, 36, 0, 0, - 36, 130, 36, 36, 36, 36, 36, 36, 36, 36, 131, 0, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 41, 0, 0, 36, 36, 36, 36, 36, 36, 36, 61, - 44, 44, 44, 0, 44, 44, 44, 0, 0, 91, 7, 7, 36, 36, 36, 36, - 36, 36, 36, 41, 36, 101, 0, 0, 36, 36, 36, 0, 36, 36, 36, 36, - 36, 36, 41, 0, 7, 7, 108, 0, 36, 36, 36, 36, 36, 67, 44, 0, - 36, 36, 36, 36, 36, 86, 44, 65, 44, 44, 44, 44, 44, 44, 44, 92, - 7, 7, 108, 0, 7, 7, 108, 0, 0, 97, 132, 0, 44, 44, 44, 65, - 44, 70, 36, 36, 36, 36, 36, 36, 44, 70, 36, 0, 7, 7, 115, 133, - 0, 0, 94, 44, 44, 0, 0, 0, 114, 36, 36, 36, 36, 36, 36, 36, - 86, 44, 44, 75, 7, 7, 76, 36, 36, 82, 44, 44, 44, 0, 0, 0, - 36, 44, 44, 44, 44, 44, 9, 119, 7, 7, 108, 81, 7, 7, 76, 36, - 36, 36, 36, 36, 36, 36, 36, 134, 15, 15, 42, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 65, 44, 44, 44, 44, 44, 70, 80, 82, 135, 87, 0, - 44, 44, 44, 44, 44, 87, 94, 44, 25, 25, 25, 25, 25, 34, 15, 27, - 15, 15, 11, 11, 15, 39, 11, 120, 15, 15, 11, 11, 15, 15, 11, 11, - 15, 39, 11, 120, 15, 15, 136, 136, 15, 15, 11, 11, 15, 15, 15, 39, - 15, 15, 11, 11, 15, 137, 11, 138, 46, 137, 11, 139, 15, 46, 11, 0, - 15, 15, 11, 139, 46, 137, 11, 139, 140, 140, 141, 142, 143, 144, 145, 145, - 0, 146, 147, 148, 0, 0, 149, 150, 0, 151, 150, 0, 0, 0, 0, 152, - 62, 153, 62, 62, 21, 0, 0, 154, 0, 0, 0, 149, 15, 15, 15, 42, - 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, 44, 113, 0, 0, 0, - 48, 155, 156, 157, 23, 117, 10, 120, 0, 158, 49, 159, 11, 38, 160, 33, - 0, 161, 39, 162, 0, 0, 0, 0, 163, 38, 101, 0, 0, 0, 0, 0, - 0, 0, 145, 0, 0, 0, 0, 0, 0, 0, 149, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 164, 11, 11, 15, 15, 39, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 4, 145, 124, 0, 145, 145, 145, 5, 0, 0, - 0, 149, 0, 0, 0, 0, 0, 0, 0, 165, 145, 145, 0, 0, 0, 0, - 4, 145, 145, 145, 145, 145, 124, 0, 0, 0, 0, 0, 0, 0, 145, 0, - 0, 0, 0, 0, 0, 0, 0, 5, 11, 11, 11, 22, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 24, 31, 166, 26, 32, 25, 29, 15, 33, - 25, 42, 155, 167, 54, 0, 0, 0, 15, 168, 0, 21, 36, 36, 36, 36, - 36, 36, 0, 97, 0, 0, 0, 94, 36, 36, 36, 36, 36, 61, 0, 0, - 36, 61, 36, 61, 36, 61, 36, 61, 145, 145, 145, 5, 0, 0, 0, 5, - 145, 145, 5, 169, 0, 0, 0, 119, 170, 0, 0, 0, 0, 0, 0, 0, - 171, 81, 145, 145, 5, 145, 145, 172, 81, 36, 82, 44, 81, 41, 36, 101, - 36, 36, 36, 36, 36, 61, 60, 81, 0, 81, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 41, 81, 36, 36, 36, 36, 36, 36, 61, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 36, 61, 0, 0, 0, 0, 0, 36, 36, 36, 36, - 36, 36, 36, 101, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 173, - 36, 36, 36, 174, 36, 36, 36, 36, 7, 7, 76, 0, 0, 0, 0, 0, - 25, 25, 25, 175, 65, 44, 44, 176, 25, 25, 25, 25, 25, 25, 25, 177, - 36, 36, 36, 36, 178, 9, 0, 0, 0, 0, 0, 0, 0, 97, 36, 36, - 179, 25, 25, 25, 27, 25, 25, 25, 25, 25, 25, 25, 15, 15, 26, 30, - 25, 25, 180, 181, 25, 27, 25, 25, 25, 25, 31, 22, 11, 25, 0, 0, - 0, 0, 0, 0, 0, 97, 182, 36, 183, 183, 67, 36, 36, 36, 36, 36, - 67, 44, 0, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 133, 0, 0, - 75, 36, 36, 36, 36, 36, 36, 36, 44, 87, 0, 133, 7, 7, 108, 0, - 44, 44, 44, 44, 75, 36, 97, 55, 36, 82, 44, 178, 36, 36, 36, 36, - 36, 67, 44, 44, 44, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 101, - 36, 36, 36, 36, 67, 44, 44, 44, 113, 0, 150, 97, 7, 7, 108, 0, - 36, 80, 36, 36, 7, 7, 76, 61, 36, 36, 86, 44, 44, 65, 0, 0, - 67, 36, 36, 87, 7, 7, 108, 184, 36, 36, 36, 36, 36, 61, 185, 75, - 36, 36, 36, 36, 90, 73, 70, 82, 131, 0, 0, 0, 0, 0, 97, 41, - 36, 36, 67, 44, 186, 187, 0, 0, 81, 61, 81, 61, 81, 61, 0, 0, - 36, 61, 36, 61, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 24, 15, - 15, 39, 0, 0, 15, 15, 15, 15, 67, 44, 188, 87, 7, 7, 108, 0, - 36, 0, 0, 0, 36, 36, 36, 36, 36, 61, 97, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 0, 36, 36, 36, 41, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 41, 0, 15, 24, 0, 0, 189, 15, 0, 190, - 36, 36, 88, 36, 36, 61, 36, 43, 95, 88, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 41, 0, 0, 0, 0, 0, 0, 0, 97, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 191, 36, 36, 36, 36, 40, 36, 36, 36, - 36, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 36, 36, 0, - 44, 44, 44, 44, 192, 4, 124, 0, 44, 44, 44, 44, 193, 172, 145, 145, - 145, 194, 124, 0, 6, 195, 196, 197, 143, 0, 0, 0, 36, 88, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 198, 57, 0, 5, 6, 0, 0, 199, 9, - 14, 15, 15, 15, 15, 15, 16, 200, 201, 202, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 82, 40, 36, 40, 36, 40, 36, 40, 101, - 0, 0, 0, 0, 0, 0, 203, 0, 36, 36, 36, 81, 36, 36, 36, 36, - 36, 61, 36, 36, 36, 36, 61, 95, 36, 36, 36, 41, 36, 36, 36, 41, - 36, 36, 36, 36, 36, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, - 36, 36, 36, 36, 101, 0, 0, 0, 113, 0, 0, 0, 0, 0, 0, 0, - 36, 36, 61, 0, 36, 36, 36, 36, 36, 36, 36, 36, 36, 82, 65, 0, - 36, 36, 36, 36, 36, 36, 36, 41, 36, 0, 36, 36, 81, 41, 0, 0, - 11, 11, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 36, 36, 36, 36, - 7, 7, 108, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 0, 36, 36, 0, 0, 36, 36, 36, 36, - 36, 0, 0, 0, 0, 0, 0, 0, 36, 41, 88, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 95, 101, 77, 36, 36, 36, 36, 61, 41, 0, 0, - 36, 36, 36, 36, 36, 36, 0, 40, 86, 60, 0, 44, 36, 81, 81, 36, - 36, 36, 36, 36, 36, 0, 65, 94, 0, 0, 0, 0, 0, 133, 0, 0, - 36, 187, 0, 0, 0, 0, 0, 0, 36, 36, 36, 36, 61, 0, 0, 0, - 36, 36, 101, 0, 0, 0, 0, 0, 11, 11, 11, 11, 22, 0, 0, 0, - 15, 15, 15, 15, 24, 0, 0, 0, 36, 36, 36, 36, 36, 36, 44, 44, - 44, 188, 119, 0, 0, 0, 0, 0, 0, 96, 7, 7, 0, 0, 0, 94, - 36, 36, 36, 36, 44, 44, 65, 204, 150, 0, 0, 0, 36, 36, 36, 36, - 36, 36, 101, 0, 7, 7, 108, 0, 36, 67, 44, 44, 44, 205, 7, 7, - 184, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 36, 67, 105, 0, 0, - 70, 206, 102, 207, 7, 7, 208, 174, 36, 36, 36, 36, 95, 36, 36, 36, - 36, 36, 36, 44, 44, 44, 209, 210, 36, 61, 88, 95, 36, 36, 36, 95, - 36, 36, 211, 0, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 67, - 44, 44, 65, 0, 7, 7, 108, 0, 44, 81, 36, 77, 77, 36, 36, 36, - 44, 92, 92, 87, 101, 94, 0, 81, 82, 102, 44, 113, 44, 113, 0, 0, - 36, 36, 36, 36, 36, 86, 44, 44, 44, 114, 212, 119, 7, 7, 108, 0, - 44, 95, 0, 0, 7, 7, 108, 0, 36, 36, 36, 67, 44, 87, 44, 44, - 213, 0, 184, 132, 132, 132, 36, 87, 125, 101, 0, 0, 7, 7, 108, 0, - 36, 36, 67, 44, 44, 44, 0, 0, 36, 36, 36, 36, 36, 36, 41, 58, - 44, 44, 44, 0, 7, 7, 108, 78, 7, 7, 108, 0, 0, 0, 0, 97, - 36, 36, 36, 36, 36, 36, 101, 0, 36, 36, 88, 36, 36, 36, 36, 36, - 36, 36, 36, 67, 44, 65, 44, 44, 206, 0, 0, 0, 7, 7, 108, 0, - 0, 0, 0, 0, 40, 36, 36, 36, 36, 36, 36, 36, 102, 44, 44, 44, - 44, 44, 58, 44, 44, 65, 0, 0, 36, 61, 0, 0, 0, 0, 0, 0, - 7, 7, 108, 133, 0, 0, 0, 0, 36, 36, 36, 41, 44, 207, 0, 0, - 36, 36, 36, 36, 44, 188, 119, 0, 36, 119, 0, 0, 7, 7, 108, 0, - 97, 36, 36, 36, 36, 36, 0, 81, 36, 101, 0, 0, 86, 44, 44, 44, - 44, 44, 44, 44, 44, 44, 44, 65, 0, 0, 0, 94, 114, 36, 36, 36, - 101, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 0, 0, 0, 0, - 36, 36, 61, 0, 36, 36, 36, 101, 36, 36, 101, 0, 36, 36, 41, 214, - 62, 0, 0, 0, 0, 0, 0, 0, 0, 58, 87, 58, 215, 62, 216, 44, - 65, 58, 44, 0, 0, 0, 0, 0, 0, 0, 102, 87, 0, 0, 0, 0, - 102, 113, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 157, 15, - 15, 15, 15, 15, 15, 11, 11, 11, 11, 11, 11, 157, 15, 137, 15, 15, - 15, 15, 11, 11, 11, 11, 11, 11, 157, 15, 15, 15, 15, 15, 15, 49, - 48, 217, 10, 49, 11, 157, 168, 14, 15, 14, 15, 15, 11, 11, 11, 11, - 11, 11, 157, 15, 15, 15, 15, 15, 15, 50, 22, 10, 11, 49, 11, 218, - 15, 15, 15, 15, 15, 15, 50, 22, 11, 158, 164, 11, 218, 15, 15, 15, - 15, 15, 15, 11, 11, 11, 11, 11, 11, 157, 15, 15, 15, 15, 15, 15, - 11, 11, 11, 157, 15, 15, 15, 15, 157, 15, 15, 15, 15, 15, 15, 11, - 11, 11, 11, 11, 11, 157, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, - 15, 39, 11, 11, 11, 11, 11, 11, 218, 15, 15, 15, 15, 15, 24, 15, - 33, 11, 11, 11, 11, 11, 22, 15, 15, 15, 15, 15, 15, 137, 15, 11, - 11, 11, 11, 11, 11, 218, 15, 15, 15, 15, 15, 24, 15, 33, 11, 11, - 15, 15, 137, 15, 11, 11, 11, 11, 11, 11, 218, 15, 15, 15, 15, 15, - 24, 15, 27, 96, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 44, 44, 44, 44, 44, 65, 94, 44, 44, 44, 44, 113, 0, 99, 0, 0, - 0, 113, 119, 0, 0, 0, 94, 44, 58, 44, 44, 44, 0, 0, 0, 0, - 44, 65, 44, 44, 44, 44, 92, 44, 59, 74, 65, 0, 0, 0, 0, 0, - 36, 101, 0, 0, 44, 65, 0, 0, 157, 15, 15, 15, 15, 15, 15, 15, - 15, 44, 65, 0, 7, 7, 108, 0, 36, 81, 36, 36, 36, 36, 36, 36, - 98, 77, 81, 36, 61, 36, 109, 0, 105, 97, 109, 81, 98, 77, 109, 109, - 98, 77, 61, 36, 61, 36, 81, 43, 36, 36, 95, 36, 36, 36, 36, 0, - 81, 81, 95, 36, 36, 36, 36, 0, 0, 0, 0, 0, 11, 11, 11, 11, - 11, 11, 120, 0, 11, 11, 11, 11, 11, 11, 120, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 165, 124, 0, 20, 0, 0, 0, 0, 0, 0, 0, - 44, 44, 44, 44, 0, 0, 0, 0, -}; - -static RE_UINT8 re_sentence_break_stage_5[] = { - 0, 0, 0, 0, 0, 6, 2, 6, 6, 1, 0, 0, 6, 12, 13, 0, - 0, 0, 0, 13, 13, 13, 0, 0, 14, 14, 11, 0, 10, 10, 10, 10, - 10, 10, 14, 0, 0, 0, 0, 12, 0, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 13, 0, 13, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 13, 0, 4, 0, 0, 6, 0, 0, 0, 0, 0, 7, 13, - 0, 5, 0, 0, 0, 7, 0, 0, 8, 8, 8, 0, 8, 8, 8, 7, - 7, 7, 7, 0, 8, 7, 8, 7, 7, 8, 7, 8, 7, 7, 8, 7, - 8, 8, 7, 8, 7, 8, 7, 7, 7, 8, 8, 7, 8, 7, 8, 8, - 7, 8, 8, 8, 7, 7, 8, 8, 8, 7, 7, 7, 8, 7, 7, 9, - 9, 9, 9, 9, 9, 7, 7, 7, 7, 9, 9, 9, 7, 7, 0, 0, - 0, 0, 9, 9, 9, 9, 0, 0, 7, 0, 0, 0, 9, 0, 9, 0, - 3, 3, 3, 3, 9, 0, 8, 7, 0, 0, 7, 7, 7, 7, 0, 8, - 0, 0, 8, 0, 8, 0, 8, 8, 8, 8, 0, 8, 7, 7, 7, 8, - 8, 7, 0, 8, 8, 7, 0, 3, 3, 3, 8, 7, 0, 9, 0, 0, - 0, 14, 0, 0, 0, 12, 0, 0, 0, 3, 3, 3, 3, 3, 0, 3, - 0, 3, 3, 0, 9, 9, 9, 0, 5, 5, 5, 5, 5, 5, 0, 0, - 14, 14, 0, 0, 3, 3, 3, 0, 5, 0, 0, 12, 9, 9, 9, 3, - 10, 10, 0, 10, 10, 0, 9, 9, 3, 9, 9, 9, 12, 9, 3, 3, - 3, 5, 0, 3, 3, 9, 9, 3, 3, 0, 3, 3, 3, 3, 9, 9, - 10, 10, 9, 9, 9, 0, 0, 9, 12, 12, 12, 0, 0, 0, 0, 5, - 9, 3, 9, 9, 0, 9, 9, 9, 9, 9, 3, 3, 3, 9, 0, 0, - 14, 12, 9, 0, 3, 3, 9, 3, 9, 3, 3, 3, 3, 3, 0, 0, - 9, 0, 9, 9, 3, 3, 5, 3, 3, 9, 3, 3, 12, 12, 10, 10, - 3, 0, 0, 3, 3, 3, 9, 0, 0, 0, 0, 3, 9, 9, 0, 9, - 0, 0, 10, 10, 0, 0, 0, 9, 0, 9, 9, 0, 0, 3, 0, 0, - 9, 3, 0, 0, 9, 0, 0, 0, 0, 0, 3, 3, 0, 0, 3, 9, - 0, 9, 3, 3, 0, 0, 9, 0, 0, 0, 3, 0, 3, 0, 3, 0, - 10, 10, 0, 0, 0, 9, 0, 9, 0, 3, 0, 3, 0, 3, 13, 13, - 13, 13, 3, 3, 3, 0, 0, 0, 3, 3, 3, 9, 10, 10, 12, 12, - 10, 10, 3, 3, 0, 8, 0, 0, 0, 0, 12, 0, 12, 0, 0, 0, - 8, 8, 0, 0, 9, 0, 12, 9, 6, 9, 9, 9, 9, 9, 9, 13, - 13, 0, 0, 0, 3, 12, 12, 0, 9, 0, 3, 3, 0, 0, 14, 12, - 14, 12, 0, 3, 3, 3, 5, 0, 9, 3, 3, 9, 9, 3, 9, 0, - 12, 12, 12, 12, 0, 0, 12, 12, 9, 9, 12, 12, 3, 9, 9, 0, - 0, 8, 0, 8, 7, 0, 7, 7, 8, 0, 7, 0, 8, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 6, 5, 3, 3, 5, 5, 0, 0, 0, 14, - 14, 0, 0, 0, 13, 13, 13, 13, 11, 0, 0, 0, 4, 4, 5, 5, - 5, 5, 5, 6, 0, 13, 13, 0, 12, 12, 0, 0, 0, 13, 13, 12, - 0, 0, 0, 6, 5, 0, 5, 5, 0, 13, 13, 7, 0, 0, 0, 8, - 0, 0, 7, 8, 8, 8, 7, 7, 8, 0, 8, 0, 8, 8, 0, 7, - 9, 7, 0, 0, 0, 8, 7, 7, 0, 0, 7, 0, 9, 9, 9, 8, - 0, 0, 8, 8, 0, 0, 13, 13, 8, 7, 7, 8, 7, 8, 7, 3, - 7, 7, 0, 7, 0, 0, 12, 9, 0, 0, 13, 0, 6, 14, 12, 0, - 0, 13, 13, 13, 9, 9, 0, 12, 9, 0, 12, 12, 8, 7, 9, 3, - 3, 3, 0, 9, 7, 7, 3, 3, 3, 3, 0, 12, 0, 0, 8, 7, - 9, 0, 0, 8, 7, 8, 7, 9, 7, 7, 7, 9, 9, 9, 3, 9, - 0, 12, 12, 12, 0, 0, 9, 3, 12, 12, 9, 9, 9, 3, 3, 0, - 3, 3, 3, 12, 0, 0, 0, 7, 0, 9, 3, 9, 9, 9, 13, 13, - 14, 14, 0, 14, 0, 14, 14, 0, 13, 0, 0, 13, 0, 14, 12, 12, - 14, 13, 13, 13, 13, 13, 13, 0, 9, 0, 0, 5, 0, 0, 14, 0, - 0, 13, 0, 13, 13, 12, 13, 13, 14, 0, 9, 9, 0, 5, 5, 5, - 0, 5, 12, 12, 3, 0, 10, 10, 9, 12, 12, 0, 3, 12, 0, 0, - 10, 10, 9, 0, 12, 12, 0, 12, 12, 0, 3, 0, 9, 12, 0, 0, - 9, 9, 9, 12, 3, 0, 12, 12, 0, 3, 3, 12, 3, 3, 3, 5, - 5, 5, 5, 3, 0, 8, 8, 0, 8, 0, 7, 7, -}; - -/* Sentence_Break: 6644 bytes. */ - -RE_UINT32 re_get_sentence_break(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_sentence_break_stage_1[f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_sentence_break_stage_2[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_sentence_break_stage_3[pos + f] << 3; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_sentence_break_stage_4[pos + f] << 2; - value = re_sentence_break_stage_5[pos + code]; - - return value; -} - -/* Math. */ - -static RE_UINT8 re_math_stage_1[] = { - 0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, -}; - -static RE_UINT8 re_math_stage_2[] = { - 0, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 6, 1, 1, -}; - -static RE_UINT8 re_math_stage_3[] = { - 0, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 4, 5, 6, 7, 1, 8, 9, 10, 1, 6, 6, 11, 1, 1, 1, 1, - 1, 1, 1, 12, 1, 1, 13, 14, 1, 1, 1, 1, 15, 16, 17, 18, - 1, 1, 1, 1, 1, 1, 19, 1, -}; - -static RE_UINT8 re_math_stage_4[] = { - 0, 1, 2, 3, 0, 4, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, - 9, 10, 11, 12, 13, 0, 14, 15, 16, 17, 18, 0, 19, 20, 21, 22, - 23, 23, 23, 23, 23, 23, 23, 23, 24, 25, 0, 26, 27, 28, 29, 30, - 0, 0, 0, 0, 0, 31, 32, 33, 34, 0, 35, 36, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 23, 23, 0, 19, 37, 0, 0, 0, 0, 0, - 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, - 1, 3, 3, 0, 0, 0, 0, 40, 23, 23, 41, 23, 42, 43, 44, 23, - 45, 46, 47, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 48, 23, 23, - 23, 23, 23, 23, 23, 23, 49, 23, 44, 50, 51, 52, 53, 54, 0, 55, -}; - -static RE_UINT8 re_math_stage_5[] = { - 0, 0, 0, 0, 0, 8, 0, 112, 0, 0, 0, 64, 0, 0, 0, 80, - 0, 16, 2, 0, 0, 0, 128, 0, 0, 0, 39, 0, 0, 0, 115, 0, - 192, 1, 0, 0, 0, 0, 64, 0, 0, 0, 28, 0, 17, 0, 4, 0, - 30, 0, 0, 124, 0, 124, 0, 0, 0, 0, 255, 31, 98, 248, 0, 0, - 132, 252, 47, 63, 16, 179, 251, 241, 255, 11, 0, 0, 0, 0, 255, 255, - 255, 126, 195, 240, 255, 255, 255, 47, 48, 0, 240, 255, 255, 255, 255, 255, - 0, 15, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 248, - 255, 255, 191, 0, 0, 0, 1, 240, 7, 0, 0, 0, 3, 192, 255, 240, - 195, 140, 15, 0, 148, 31, 0, 255, 96, 0, 0, 0, 5, 0, 0, 0, - 15, 224, 0, 0, 159, 31, 0, 0, 0, 2, 0, 0, 126, 1, 0, 0, - 4, 30, 0, 0, 255, 255, 223, 255, 255, 255, 255, 223, 100, 222, 255, 235, - 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, - 63, 255, 255, 255, 255, 207, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, - 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, 0, 0, 3, 0, -}; - -/* Math: 538 bytes. */ - -RE_UINT32 re_get_math(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_math_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_math_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_math_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_math_stage_4[pos + f] << 5; - pos += code; - value = (re_math_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Alphabetic. */ - -static RE_UINT8 re_alphabetic_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_alphabetic_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 18, 19, 13, 20, 13, 21, 13, 13, 13, 13, 22, 7, 7, - 23, 24, 13, 13, 13, 13, 25, 26, 13, 13, 27, 13, 28, 29, 30, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 31, 7, 32, 33, 7, 34, 13, 13, 13, 13, 13, 35, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_alphabetic_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, - 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, - 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 1, 59, - 60, 61, 62, 63, 64, 31, 31, 31, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 31, 74, 31, 75, 31, 31, 31, 1, 1, 1, 76, 77, 78, 31, 31, - 1, 1, 1, 1, 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 80, 31, - 1, 1, 81, 82, 31, 31, 31, 83, 1, 1, 1, 1, 1, 1, 1, 84, - 1, 1, 85, 31, 31, 31, 31, 31, 86, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 87, 31, 31, 31, 31, 31, 31, 31, 88, 89, 90, 91, - 92, 31, 31, 31, 31, 31, 31, 31, 93, 94, 31, 31, 31, 31, 95, 31, - 31, 96, 31, 31, 31, 31, 31, 31, 1, 1, 1, 1, 1, 1, 97, 1, - 1, 1, 1, 1, 1, 1, 1, 98, 99, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 100, 31, 1, 1, 101, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_alphabetic_stage_4[] = { - 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 7, 8, 9, 10, 4, 11, - 4, 4, 4, 4, 12, 4, 4, 4, 4, 13, 14, 15, 16, 17, 18, 19, - 20, 4, 21, 22, 4, 4, 23, 24, 25, 4, 26, 4, 4, 27, 28, 29, - 30, 31, 32, 0, 0, 33, 34, 35, 4, 36, 37, 38, 39, 40, 41, 42, - 43, 44, 45, 46, 47, 48, 49, 50, 51, 48, 52, 53, 54, 55, 56, 0, - 57, 58, 59, 60, 57, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, - 15, 72, 73, 0, 74, 75, 76, 0, 77, 0, 78, 79, 80, 81, 0, 0, - 4, 82, 25, 83, 84, 4, 85, 86, 4, 4, 87, 4, 88, 89, 90, 4, - 91, 4, 92, 0, 93, 4, 4, 94, 15, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 95, 1, 4, 4, 96, 97, 98, 98, 99, 4, 100, 101, 0, - 0, 4, 4, 102, 4, 103, 4, 104, 105, 106, 25, 107, 4, 108, 109, 0, - 110, 4, 105, 111, 0, 112, 0, 0, 4, 113, 114, 0, 4, 115, 4, 116, - 4, 104, 117, 118, 119, 0, 0, 120, 4, 4, 4, 4, 4, 4, 0, 121, - 94, 4, 122, 118, 4, 123, 124, 125, 0, 0, 0, 126, 127, 0, 0, 0, - 128, 129, 130, 4, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 131, 4, 109, 4, 132, 105, 4, 4, 4, 4, 133, - 4, 85, 4, 134, 135, 136, 136, 4, 0, 137, 0, 0, 0, 0, 0, 0, - 138, 139, 15, 4, 140, 15, 4, 86, 141, 142, 4, 4, 143, 72, 0, 25, - 4, 4, 4, 4, 4, 104, 0, 0, 4, 4, 4, 4, 4, 4, 104, 0, - 4, 4, 4, 4, 31, 0, 25, 118, 144, 145, 4, 146, 4, 4, 4, 93, - 147, 148, 4, 4, 149, 150, 0, 147, 151, 16, 4, 98, 4, 4, 152, 153, - 28, 103, 154, 81, 4, 155, 137, 156, 4, 135, 157, 158, 4, 105, 159, 160, - 161, 162, 86, 163, 4, 4, 4, 164, 4, 4, 4, 4, 4, 165, 166, 110, - 4, 4, 4, 167, 4, 4, 168, 0, 169, 170, 171, 4, 4, 27, 172, 4, - 4, 118, 25, 4, 173, 4, 16, 174, 0, 0, 0, 175, 4, 4, 4, 81, - 0, 1, 1, 176, 4, 105, 177, 0, 178, 179, 180, 0, 4, 4, 4, 72, - 0, 0, 4, 181, 0, 0, 0, 0, 0, 0, 0, 0, 81, 4, 182, 0, - 4, 25, 103, 72, 118, 4, 183, 0, 4, 4, 4, 4, 118, 25, 184, 110, - 4, 185, 4, 60, 0, 0, 0, 0, 4, 135, 104, 16, 0, 0, 0, 0, - 186, 187, 104, 135, 105, 0, 0, 188, 104, 168, 0, 0, 4, 189, 0, 0, - 190, 98, 0, 81, 81, 0, 78, 191, 4, 104, 104, 154, 27, 0, 0, 0, - 4, 4, 119, 0, 4, 154, 4, 154, 4, 4, 192, 0, 148, 32, 25, 119, - 4, 154, 25, 193, 4, 4, 194, 0, 195, 196, 0, 0, 197, 198, 4, 119, - 39, 48, 199, 60, 0, 0, 0, 0, 4, 4, 200, 0, 4, 4, 201, 0, - 0, 0, 0, 0, 4, 202, 203, 0, 4, 105, 204, 0, 4, 104, 0, 0, - 205, 164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 206, - 0, 0, 0, 0, 0, 0, 4, 32, 207, 208, 77, 209, 173, 210, 0, 0, - 4, 4, 4, 4, 168, 0, 0, 0, 4, 4, 4, 143, 4, 4, 4, 4, - 4, 4, 60, 0, 0, 0, 0, 0, 4, 143, 0, 0, 0, 0, 0, 0, - 4, 4, 211, 0, 0, 0, 0, 0, 4, 32, 105, 0, 0, 0, 25, 157, - 4, 135, 60, 212, 93, 0, 0, 0, 4, 4, 213, 105, 172, 0, 0, 77, - 4, 4, 4, 4, 4, 4, 4, 31, 4, 4, 4, 4, 4, 4, 4, 154, - 214, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 215, 216, 0, 0, 0, - 4, 4, 217, 4, 218, 219, 220, 4, 221, 222, 223, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 224, 225, 86, 217, 217, 132, 132, 207, 207, 226, 0, - 227, 228, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 191, 0, - 4, 4, 229, 0, 0, 0, 0, 0, 220, 230, 231, 232, 233, 234, 0, 0, - 0, 25, 235, 235, 109, 0, 0, 0, 4, 4, 4, 4, 4, 4, 135, 0, - 4, 181, 4, 4, 4, 4, 4, 4, 118, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 214, 0, 0, 118, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_alphabetic_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, - 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 32, 0, 0, 0, - 0, 0, 223, 188, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, - 3, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, - 255, 0, 0, 0, 0, 0, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, - 0, 0, 255, 7, 255, 255, 255, 254, 0, 192, 255, 255, 255, 255, 239, 31, - 254, 225, 0, 156, 0, 0, 255, 255, 0, 224, 255, 255, 255, 255, 3, 0, - 0, 252, 255, 255, 255, 7, 48, 4, 255, 255, 255, 252, 255, 31, 0, 0, - 255, 255, 255, 1, 255, 255, 223, 63, 0, 0, 240, 255, 248, 3, 255, 255, - 255, 255, 255, 239, 255, 223, 225, 255, 15, 0, 254, 255, 239, 159, 249, 255, - 255, 253, 197, 227, 159, 89, 128, 176, 15, 0, 3, 0, 238, 135, 249, 255, - 255, 253, 109, 195, 135, 25, 2, 94, 0, 0, 63, 0, 238, 191, 251, 255, - 255, 253, 237, 227, 191, 27, 1, 0, 15, 0, 0, 2, 238, 159, 249, 255, - 159, 25, 192, 176, 15, 0, 2, 0, 236, 199, 61, 214, 24, 199, 255, 195, - 199, 29, 129, 0, 239, 223, 253, 255, 255, 253, 255, 227, 223, 29, 96, 7, - 15, 0, 0, 0, 255, 253, 239, 227, 223, 29, 96, 64, 15, 0, 6, 0, - 238, 223, 253, 255, 255, 255, 255, 231, 223, 93, 240, 128, 15, 0, 0, 252, - 236, 255, 127, 252, 255, 255, 251, 47, 127, 128, 95, 255, 0, 0, 12, 0, - 255, 255, 255, 7, 127, 32, 0, 0, 150, 37, 240, 254, 174, 236, 255, 59, - 95, 32, 0, 240, 1, 0, 0, 0, 255, 254, 255, 255, 255, 31, 254, 255, - 3, 255, 255, 254, 255, 255, 255, 31, 255, 255, 127, 249, 231, 193, 255, 255, - 127, 64, 0, 48, 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, - 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, - 255, 255, 255, 135, 255, 255, 0, 0, 255, 255, 63, 63, 255, 159, 255, 255, - 255, 199, 255, 1, 255, 223, 15, 0, 255, 255, 15, 0, 255, 223, 13, 0, - 255, 255, 207, 255, 255, 1, 128, 16, 255, 255, 255, 0, 255, 7, 255, 255, - 255, 255, 63, 0, 255, 255, 255, 127, 255, 15, 255, 1, 255, 63, 31, 0, - 255, 15, 255, 255, 255, 3, 0, 0, 255, 255, 255, 15, 254, 255, 31, 0, - 128, 0, 0, 0, 255, 255, 239, 255, 239, 15, 0, 0, 255, 243, 0, 252, - 191, 255, 3, 0, 0, 224, 0, 252, 255, 255, 255, 63, 255, 1, 0, 0, - 0, 222, 111, 0, 128, 255, 31, 0, 63, 63, 255, 170, 255, 255, 223, 95, - 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, - 132, 252, 47, 62, 80, 189, 255, 243, 224, 67, 0, 0, 0, 0, 192, 255, - 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, 255, 255, 127, 0, - 127, 127, 127, 127, 0, 128, 0, 0, 224, 0, 0, 0, 254, 3, 62, 31, - 255, 255, 127, 224, 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, - 255, 31, 255, 255, 0, 12, 0, 0, 255, 127, 240, 143, 0, 0, 128, 255, - 252, 255, 255, 255, 255, 249, 255, 255, 255, 127, 255, 0, 187, 247, 255, 255, - 47, 0, 0, 0, 0, 0, 252, 40, 255, 255, 7, 0, 255, 255, 247, 255, - 223, 255, 0, 124, 255, 63, 0, 0, 255, 255, 127, 196, 5, 0, 0, 56, - 255, 255, 60, 0, 126, 126, 126, 0, 127, 127, 255, 255, 63, 0, 255, 255, - 255, 7, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, 255, 63, 255, 255, - 255, 255, 255, 3, 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, - 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 15, 0, 0, 223, 255, - 192, 255, 255, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, - 255, 63, 255, 63, 255, 255, 31, 0, 255, 255, 1, 0, 15, 255, 62, 0, - 255, 255, 15, 255, 255, 0, 255, 255, 63, 253, 255, 255, 255, 255, 191, 145, - 255, 255, 55, 0, 255, 255, 255, 192, 111, 240, 239, 254, 31, 0, 0, 0, - 63, 0, 0, 0, 255, 255, 71, 0, 30, 0, 0, 20, 255, 255, 251, 255, - 255, 255, 159, 64, 127, 189, 255, 191, 255, 1, 255, 255, 159, 25, 129, 224, - 187, 7, 0, 0, 179, 0, 0, 0, 255, 255, 63, 127, 0, 0, 0, 63, - 17, 0, 0, 0, 255, 255, 255, 227, 0, 0, 0, 128, 255, 253, 255, 255, - 255, 255, 127, 127, 0, 0, 252, 255, 255, 254, 127, 0, 127, 0, 0, 0, - 248, 255, 255, 224, 31, 0, 255, 255, 3, 0, 0, 0, 255, 7, 255, 31, - 255, 1, 255, 67, 255, 255, 223, 255, 255, 255, 255, 223, 100, 222, 255, 235, - 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, - 63, 255, 255, 255, 253, 255, 255, 247, 247, 15, 0, 0, 127, 255, 255, 249, - 219, 7, 0, 0, 143, 0, 0, 0, 150, 254, 247, 10, 132, 234, 150, 170, - 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, 255, 3, 255, 255, -}; - -/* Alphabetic: 2193 bytes. */ - -RE_UINT32 re_get_alphabetic(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_alphabetic_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_alphabetic_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_alphabetic_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_alphabetic_stage_4[pos + f] << 5; - pos += code; - value = (re_alphabetic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Lowercase. */ - -static RE_UINT8 re_lowercase_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_lowercase_stage_2[] = { - 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 6, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, - 9, 10, 1, 11, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12, 1, 1, 13, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_lowercase_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, 11, - 12, 13, 6, 6, 14, 6, 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, - 6, 6, 6, 6, 6, 6, 17, 18, 6, 6, 6, 19, 6, 6, 6, 6, - 6, 6, 6, 20, 6, 6, 6, 21, 6, 6, 6, 6, 22, 6, 6, 6, - 6, 6, 6, 6, 23, 6, 6, 6, 24, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 25, 26, 27, 28, 6, 29, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_lowercase_stage_4[] = { - 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 5, 13, 14, 15, 16, 17, 18, 19, 0, 0, 20, 21, 22, 23, 24, 25, - 0, 26, 15, 5, 27, 5, 28, 5, 5, 29, 0, 30, 31, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, - 0, 0, 0, 0, 33, 0, 0, 0, 15, 15, 15, 15, 15, 15, 0, 0, - 5, 5, 5, 5, 34, 5, 5, 5, 35, 36, 37, 38, 36, 39, 40, 41, - 0, 0, 0, 42, 43, 0, 0, 0, 44, 45, 46, 26, 47, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 26, 48, 0, 26, 49, 50, 5, 5, 5, 51, - 15, 52, 0, 0, 0, 0, 0, 0, 0, 0, 5, 53, 54, 0, 0, 0, - 0, 55, 5, 56, 57, 58, 0, 59, 0, 26, 60, 61, 15, 15, 0, 0, - 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 63, 64, 0, 0, 0, 65, 66, 0, 0, 0, 0, 0, 0, 15, 67, - 0, 0, 0, 0, 0, 0, 15, 0, 68, 69, 70, 31, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 68, 69, 80, 31, 71, 81, 64, 74, 82, 83, 84, - 85, 81, 86, 26, 87, 74, 88, 0, 0, 89, 90, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_lowercase_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 0, 0, 0, 128, - 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 85, 85, 171, 170, 170, - 170, 170, 170, 212, 41, 49, 36, 78, 42, 45, 81, 230, 64, 82, 85, 181, - 170, 170, 41, 170, 170, 170, 250, 147, 133, 170, 255, 255, 255, 255, 255, 255, - 255, 255, 239, 255, 255, 255, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, - 32, 0, 0, 0, 0, 0, 138, 60, 0, 0, 1, 0, 0, 240, 255, 255, - 255, 127, 227, 170, 170, 170, 47, 25, 0, 0, 255, 255, 2, 168, 170, 170, - 84, 213, 170, 170, 170, 170, 0, 0, 254, 255, 255, 255, 255, 0, 0, 0, - 0, 0, 0, 63, 255, 1, 0, 0, 170, 170, 234, 191, 255, 0, 63, 0, - 255, 0, 255, 0, 63, 0, 255, 0, 255, 0, 255, 63, 255, 0, 223, 64, - 220, 0, 207, 0, 255, 0, 220, 0, 0, 0, 2, 128, 0, 0, 255, 31, - 0, 196, 8, 0, 0, 128, 16, 50, 192, 67, 0, 0, 16, 0, 0, 0, - 255, 3, 0, 0, 255, 255, 255, 127, 98, 21, 218, 63, 26, 80, 8, 0, - 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 58, 168, 170, 171, 170, - 170, 170, 255, 149, 170, 80, 186, 170, 170, 2, 160, 0, 0, 0, 0, 7, - 255, 255, 255, 247, 63, 0, 255, 255, 127, 0, 248, 0, 0, 255, 255, 255, - 255, 255, 0, 0, 0, 0, 0, 255, 255, 255, 255, 15, 255, 255, 7, 0, - 0, 0, 0, 252, 255, 255, 15, 0, 0, 192, 223, 255, 252, 255, 255, 15, - 0, 0, 192, 235, 239, 255, 0, 0, 0, 252, 255, 255, 15, 0, 0, 192, - 255, 255, 255, 0, 0, 0, 252, 255, 255, 15, 0, 0, 192, 255, 255, 255, - 0, 192, 255, 255, 0, 0, 192, 255, 63, 0, 0, 0, 252, 255, 255, 247, - 3, 0, 0, 240, 255, 255, 223, 15, 255, 127, 63, 0, 255, 253, 0, 0, - 247, 11, 0, 0, 252, 255, 255, 255, 15, 0, 0, 0, -}; - -/* Lowercase: 829 bytes. */ - -RE_UINT32 re_get_lowercase(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_lowercase_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_lowercase_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_lowercase_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_lowercase_stage_4[pos + f] << 5; - pos += code; - value = (re_lowercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Uppercase. */ - -static RE_UINT8 re_uppercase_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_uppercase_stage_2[] = { - 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, - 8, 9, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, 1, 1, 12, 13, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_uppercase_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 10, - 6, 11, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 14, 15, 6, 6, 6, 6, 6, 6, 6, 16, - 6, 6, 6, 6, 17, 6, 6, 6, 6, 6, 6, 6, 18, 6, 6, 6, - 19, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 20, 21, 22, 23, - 6, 24, 6, 6, 6, 6, 6, 6, 6, 25, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_uppercase_stage_4[] = { - 0, 0, 1, 0, 0, 0, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, - 3, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, - 18, 19, 0, 3, 20, 3, 21, 3, 3, 22, 23, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 24, 0, - 0, 0, 0, 0, 0, 18, 18, 25, 3, 3, 3, 3, 26, 3, 3, 3, - 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 19, 37, 0, 0, 0, - 0, 0, 0, 0, 0, 38, 19, 0, 18, 39, 0, 40, 3, 3, 3, 41, - 0, 0, 3, 42, 43, 0, 0, 0, 0, 44, 3, 45, 46, 47, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 0, 18, 48, 0, 0, 0, 49, 50, 0, - 0, 0, 0, 0, 18, 51, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, - 52, 53, 54, 55, 56, 57, 49, 58, 59, 60, 61, 62, 63, 52, 53, 54, - 55, 64, 25, 49, 58, 55, 65, 66, 67, 68, 38, 39, 49, 69, 70, 0, - 18, 71, 0, 0, 0, 0, 0, 0, 0, 49, 72, 72, 58, 0, 0, 0, -}; - -static RE_UINT8 re_uppercase_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 255, 255, 127, 127, 85, 85, 85, 85, - 85, 85, 85, 170, 170, 84, 85, 85, 85, 85, 85, 43, 214, 206, 219, 177, - 213, 210, 174, 17, 144, 164, 170, 74, 85, 85, 210, 85, 85, 85, 5, 108, - 122, 85, 0, 0, 0, 0, 69, 128, 64, 215, 254, 255, 251, 15, 0, 0, - 0, 128, 28, 85, 85, 85, 144, 230, 255, 255, 255, 255, 255, 255, 0, 0, - 1, 84, 85, 85, 171, 42, 85, 85, 85, 85, 254, 255, 255, 255, 127, 0, - 191, 32, 0, 0, 255, 255, 63, 0, 85, 85, 21, 64, 0, 255, 0, 63, - 0, 255, 0, 255, 0, 63, 0, 170, 0, 255, 0, 0, 0, 0, 0, 15, - 0, 15, 0, 15, 0, 31, 0, 15, 132, 56, 39, 62, 80, 61, 15, 192, - 32, 0, 0, 0, 8, 0, 0, 0, 0, 0, 192, 255, 255, 127, 0, 0, - 157, 234, 37, 192, 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 5, - 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 69, 85, 85, 125, 95, 0, - 255, 0, 0, 0, 0, 0, 255, 255, 255, 255, 15, 0, 255, 255, 7, 0, - 255, 255, 255, 3, 0, 0, 240, 255, 255, 63, 0, 0, 0, 255, 255, 255, - 3, 0, 0, 208, 100, 222, 63, 0, 255, 3, 0, 0, 176, 231, 223, 31, - 0, 0, 0, 123, 95, 252, 1, 0, 0, 240, 255, 255, 63, 0, 0, 0, - 3, 0, 0, 240, 1, 0, 0, 0, 252, 255, 255, 7, 0, 0, 0, 240, - 255, 255, 31, 0, 255, 1, 0, 0, 0, 4, 0, 0, 3, 0, 0, 0, - 255, 3, 255, 255, -}; - -/* Uppercase: 725 bytes. */ - -RE_UINT32 re_get_uppercase(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_uppercase_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_uppercase_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_uppercase_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_uppercase_stage_4[pos + f] << 5; - pos += code; - value = (re_uppercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Cased. */ - -static RE_UINT8 re_cased_stage_1[] = { - 0, 1, 2, 3, 4, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, -}; - -static RE_UINT8 re_cased_stage_2[] = { - 0, 1, 2, 2, 3, 2, 2, 4, 5, 6, 2, 7, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 9, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 11, - 2, 12, 2, 13, 2, 2, 14, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 15, 2, 2, 2, 2, 16, 2, 17, 2, 2, 2, -}; - -static RE_UINT8 re_cased_stage_3[] = { - 0, 1, 2, 3, 2, 4, 5, 6, 2, 7, 8, 9, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 10, 10, 10, 10, 10, 12, - 10, 13, 2, 14, 2, 2, 15, 16, 17, 18, 19, 20, 10, 10, 10, 10, - 10, 21, 10, 10, 10, 10, 10, 10, 22, 23, 24, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 25, 26, 27, 28, 10, 10, 10, 10, 10, 10, 29, 14, - 10, 10, 10, 10, 10, 10, 30, 10, 10, 10, 10, 10, 10, 10, 31, 10, - 32, 33, 10, 10, 10, 10, 10, 10, 10, 34, 10, 10, 10, 10, 10, 10, - 10, 35, 10, 10, 10, 10, 10, 10, 36, 37, 38, 2, 2, 39, 40, 41, - 10, 10, 42, 10, 10, 10, 10, 10, 10, 10, 43, 44, 10, 10, 10, 10, -}; - -static RE_UINT8 re_cased_stage_4[] = { - 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 5, 6, 4, - 7, 8, 9, 10, 0, 0, 11, 12, 13, 14, 4, 15, 16, 4, 4, 4, - 4, 17, 18, 19, 20, 0, 0, 0, 0, 0, 0, 0, 0, 4, 21, 0, - 0, 4, 4, 22, 23, 0, 0, 0, 4, 4, 0, 0, 22, 4, 24, 25, - 4, 26, 27, 28, 0, 0, 0, 29, 30, 0, 0, 0, 31, 32, 33, 4, - 34, 0, 0, 0, 0, 35, 4, 36, 4, 37, 38, 4, 4, 4, 4, 39, - 4, 21, 0, 0, 0, 0, 4, 40, 25, 0, 0, 0, 0, 41, 4, 4, - 42, 43, 0, 44, 0, 45, 5, 46, 47, 0, 0, 0, 0, 1, 1, 0, - 4, 4, 48, 0, 0, 45, 49, 50, 4, 51, 4, 51, 0, 4, 4, 0, - 4, 4, 52, 4, 53, 54, 55, 4, 56, 57, 58, 4, 4, 59, 60, 5, - 52, 52, 37, 37, 61, 61, 62, 0, 4, 4, 63, 0, 0, 45, 64, 64, - 36, 0, 0, 0, -}; - -static RE_UINT8 re_cased_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, - 255, 255, 255, 255, 255, 255, 255, 247, 240, 255, 255, 255, 255, 255, 239, 255, - 255, 255, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, 32, 0, 0, 0, - 0, 0, 207, 188, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, - 3, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, - 255, 0, 0, 0, 191, 32, 0, 0, 255, 255, 63, 63, 255, 1, 0, 0, - 63, 63, 255, 170, 255, 255, 255, 63, 255, 255, 223, 95, 220, 31, 207, 15, - 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, - 80, 189, 31, 242, 224, 67, 0, 0, 24, 0, 0, 0, 0, 0, 192, 255, - 255, 3, 0, 0, 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, - 255, 63, 0, 0, 252, 255, 255, 255, 255, 120, 255, 255, 255, 127, 255, 0, - 0, 0, 0, 7, 0, 0, 255, 255, 63, 0, 255, 255, 127, 0, 248, 0, - 255, 255, 0, 0, 255, 255, 15, 255, 255, 255, 255, 15, 255, 255, 7, 0, - 255, 255, 223, 255, 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, - 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, - 253, 255, 255, 247, 255, 253, 255, 255, 247, 15, 0, 0, 15, 0, 0, 0, - 255, 3, 255, 255, -}; - -/* Cased: 748 bytes. */ - -RE_UINT32 re_get_cased(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 14; - code = ch ^ (f << 14); - pos = (RE_UINT32)re_cased_stage_1[f] << 4; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_cased_stage_2[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_cased_stage_3[pos + f] << 2; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_cased_stage_4[pos + f] << 5; - pos += code; - value = (re_cased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Case_Ignorable. */ - -static RE_UINT8 re_case_ignorable_stage_1[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, - 4, 4, -}; - -static RE_UINT8 re_case_ignorable_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 8, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, - 11, 12, 13, 14, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 16, 7, 7, 17, 18, 19, 20, 21, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 22, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - -static RE_UINT8 re_case_ignorable_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 1, 17, 1, 1, 1, 18, 19, 20, 21, 22, 23, 24, 1, 25, - 26, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 28, 29, 1, - 30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 31, 1, 1, 1, 32, 1, 33, 34, 35, 36, 37, 38, 1, 1, 1, 1, - 1, 1, 1, 39, 1, 1, 40, 41, 1, 42, 43, 44, 1, 1, 1, 1, - 1, 1, 45, 1, 1, 1, 1, 1, 46, 47, 48, 49, 50, 51, 52, 53, - 1, 1, 1, 1, 54, 1, 1, 1, 1, 1, 55, 56, 1, 1, 1, 57, - 1, 1, 1, 1, 58, 1, 1, 1, 1, 59, 60, 1, 1, 1, 1, 1, - 1, 1, 61, 1, 1, 1, 1, 1, 62, 1, 1, 1, 1, 1, 1, 1, - 63, 64, 1, 1, 1, 1, 1, 1, 1, 1, 1, 65, 1, 1, 1, 1, - 66, 67, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_case_ignorable_stage_4[] = { - 0, 1, 2, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 5, 6, 6, 6, 6, 6, 7, 8, 0, 0, 0, - 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 10, 0, 11, 12, 13, 14, - 15, 0, 16, 17, 0, 0, 18, 19, 20, 5, 21, 0, 0, 22, 0, 23, - 24, 25, 26, 0, 0, 0, 27, 6, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 33, 37, 38, 36, 33, 39, 35, 32, 40, 41, 35, 42, 0, 43, 0, - 3, 44, 45, 35, 32, 40, 46, 35, 32, 0, 34, 35, 0, 0, 47, 0, - 0, 48, 49, 0, 0, 50, 51, 0, 52, 53, 0, 54, 55, 56, 57, 0, - 0, 58, 59, 60, 61, 0, 0, 33, 0, 0, 62, 0, 0, 0, 0, 0, - 63, 63, 64, 64, 0, 65, 66, 0, 67, 0, 68, 0, 69, 70, 0, 0, - 0, 71, 0, 0, 0, 0, 0, 0, 72, 0, 73, 74, 0, 75, 0, 0, - 76, 77, 42, 78, 79, 80, 0, 81, 0, 82, 0, 83, 0, 0, 84, 85, - 0, 86, 6, 87, 88, 6, 6, 89, 0, 0, 0, 0, 0, 90, 91, 92, - 93, 94, 0, 95, 96, 0, 5, 97, 0, 0, 0, 98, 0, 0, 0, 99, - 0, 0, 0, 100, 0, 0, 0, 6, 0, 101, 0, 0, 0, 0, 0, 0, - 102, 103, 0, 0, 104, 0, 0, 105, 106, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 83, 107, 0, 0, 108, 109, 0, 0, 110, - 6, 79, 0, 17, 111, 0, 0, 52, 112, 69, 0, 0, 0, 0, 113, 114, - 0, 115, 116, 0, 28, 117, 101, 69, 0, 118, 119, 120, 0, 121, 122, 123, - 0, 0, 88, 0, 0, 0, 0, 124, 2, 0, 0, 0, 0, 125, 79, 0, - 126, 127, 128, 0, 0, 0, 0, 129, 1, 2, 3, 17, 44, 0, 0, 130, - 0, 0, 0, 0, 0, 0, 0, 131, 0, 0, 0, 0, 0, 0, 0, 3, - 0, 0, 0, 132, 0, 0, 0, 0, 133, 134, 0, 0, 0, 0, 0, 69, - 32, 135, 136, 129, 79, 137, 0, 0, 28, 138, 0, 139, 79, 140, 141, 0, - 0, 142, 0, 0, 0, 0, 129, 143, 79, 33, 3, 144, 0, 0, 0, 0, - 0, 135, 145, 0, 0, 146, 147, 0, 0, 0, 0, 0, 0, 148, 149, 0, - 0, 150, 3, 0, 0, 151, 0, 0, 62, 152, 0, 0, 0, 0, 0, 0, - 0, 153, 0, 0, 125, 154, 0, 0, 0, 0, 0, 0, 0, 0, 0, 155, - 0, 156, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 157, 0, 0, 3, - 0, 0, 0, 0, 158, 76, 0, 0, 0, 0, 0, 159, 160, 161, 0, 0, - 0, 0, 162, 0, 0, 0, 0, 0, 6, 163, 6, 164, 165, 166, 0, 0, - 167, 168, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 156, 0, - 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, - 32, 6, 6, 6, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 127, -}; - -static RE_UINT8 re_case_ignorable_stage_5[] = { - 0, 0, 0, 0, 128, 64, 0, 4, 0, 0, 0, 64, 1, 0, 0, 0, - 0, 161, 144, 1, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 48, 4, - 176, 0, 0, 0, 248, 3, 0, 0, 0, 0, 0, 2, 0, 0, 254, 255, - 255, 255, 255, 191, 182, 0, 0, 0, 0, 0, 16, 0, 63, 0, 255, 23, - 1, 248, 255, 255, 0, 0, 1, 0, 0, 0, 192, 191, 255, 61, 0, 0, - 0, 128, 2, 0, 255, 7, 0, 0, 192, 255, 1, 0, 0, 248, 63, 4, - 0, 0, 192, 255, 255, 63, 0, 0, 0, 0, 0, 14, 0, 0, 240, 255, - 7, 0, 0, 0, 0, 0, 0, 20, 254, 33, 254, 0, 12, 0, 2, 0, - 2, 0, 0, 0, 0, 0, 0, 16, 30, 32, 0, 0, 12, 0, 0, 0, - 6, 0, 0, 0, 134, 57, 2, 0, 0, 0, 35, 0, 190, 33, 0, 0, - 0, 0, 0, 144, 30, 32, 64, 0, 4, 0, 0, 0, 1, 32, 0, 0, - 0, 0, 0, 192, 193, 61, 96, 0, 64, 48, 0, 0, 0, 4, 92, 0, - 0, 0, 242, 7, 192, 127, 0, 0, 0, 0, 242, 27, 64, 63, 0, 0, - 0, 0, 0, 3, 0, 0, 160, 2, 0, 0, 254, 127, 223, 224, 255, 254, - 255, 255, 255, 31, 64, 0, 0, 0, 0, 224, 253, 102, 0, 0, 0, 195, - 1, 0, 30, 0, 100, 32, 0, 32, 0, 0, 0, 224, 0, 0, 28, 0, - 0, 0, 12, 0, 0, 0, 176, 63, 64, 254, 143, 32, 0, 120, 0, 0, - 8, 0, 0, 0, 96, 0, 0, 0, 0, 2, 0, 0, 135, 1, 4, 14, - 0, 0, 128, 9, 0, 0, 64, 127, 229, 31, 248, 159, 128, 0, 255, 127, - 15, 0, 0, 0, 0, 0, 208, 23, 0, 248, 15, 0, 3, 0, 0, 0, - 60, 59, 0, 0, 64, 163, 3, 0, 0, 240, 207, 0, 0, 0, 0, 63, - 0, 0, 247, 255, 253, 33, 16, 3, 0, 240, 255, 255, 255, 7, 0, 1, - 0, 0, 0, 248, 255, 255, 63, 248, 0, 0, 0, 160, 3, 224, 0, 224, - 0, 224, 0, 96, 0, 248, 0, 3, 144, 124, 0, 0, 223, 255, 2, 128, - 0, 0, 255, 31, 255, 255, 1, 0, 0, 0, 0, 48, 0, 128, 3, 0, - 0, 128, 0, 128, 0, 128, 0, 0, 32, 0, 0, 0, 0, 60, 62, 8, - 0, 0, 0, 126, 0, 0, 0, 112, 0, 0, 32, 0, 0, 16, 0, 0, - 0, 128, 247, 191, 0, 0, 0, 240, 0, 0, 3, 0, 0, 7, 0, 0, - 68, 8, 0, 0, 48, 0, 0, 0, 255, 255, 3, 0, 192, 63, 0, 0, - 128, 255, 3, 0, 0, 0, 200, 19, 0, 126, 102, 0, 8, 16, 0, 0, - 0, 0, 1, 16, 0, 0, 157, 193, 2, 0, 0, 32, 0, 48, 88, 0, - 32, 33, 0, 0, 0, 0, 252, 255, 255, 255, 8, 0, 255, 255, 0, 0, - 0, 0, 36, 0, 0, 0, 0, 128, 8, 0, 0, 14, 0, 0, 0, 32, - 0, 0, 192, 7, 110, 240, 0, 0, 0, 0, 0, 135, 0, 0, 0, 255, - 127, 0, 0, 0, 0, 0, 120, 38, 128, 239, 31, 0, 0, 0, 8, 0, - 0, 0, 192, 127, 0, 28, 0, 0, 0, 128, 211, 64, 248, 7, 0, 0, - 192, 31, 31, 0, 92, 0, 0, 0, 0, 0, 248, 133, 13, 0, 0, 0, - 0, 0, 60, 176, 1, 0, 0, 48, 0, 0, 248, 167, 0, 40, 191, 0, - 188, 15, 0, 0, 0, 0, 127, 191, 255, 252, 109, 0, 0, 0, 31, 0, - 0, 0, 127, 0, 0, 128, 255, 255, 0, 0, 0, 96, 128, 3, 248, 255, - 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 127, 248, - 255, 31, 32, 0, 16, 0, 0, 248, 254, 255, 0, 0, 127, 255, 255, 249, - 219, 7, 0, 0, 240, 7, 0, 0, -}; - -/* Case_Ignorable: 1538 bytes. */ - -RE_UINT32 re_get_case_ignorable(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_case_ignorable_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_case_ignorable_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_case_ignorable_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_case_ignorable_stage_4[pos + f] << 5; - pos += code; - value = (re_case_ignorable_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Changes_When_Lowercased. */ - -static RE_UINT8 re_changes_when_lowercased_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_changes_when_lowercased_stage_2[] = { - 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, - 8, 9, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_changes_when_lowercased_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 10, - 6, 11, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 14, 15, 6, 6, 6, 6, 6, 6, 6, 16, - 6, 6, 6, 6, 17, 6, 6, 6, 6, 6, 6, 6, 18, 6, 6, 6, - 19, 6, 6, 6, 6, 6, 6, 6, 6, 20, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_changes_when_lowercased_stage_4[] = { - 0, 0, 1, 0, 0, 0, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, - 3, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, - 18, 19, 0, 3, 20, 3, 21, 3, 3, 22, 23, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 24, 0, - 0, 0, 0, 0, 0, 18, 18, 25, 3, 3, 3, 3, 26, 3, 3, 3, - 27, 28, 29, 30, 28, 31, 32, 33, 0, 34, 0, 19, 35, 0, 0, 0, - 0, 0, 0, 0, 0, 36, 19, 0, 18, 37, 0, 38, 3, 3, 3, 39, - 0, 0, 3, 40, 41, 0, 0, 0, 0, 42, 3, 43, 44, 45, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 0, 18, 46, 0, 0, 0, 47, 48, 0, - 0, 0, 0, 0, 18, 49, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, - 18, 50, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_changes_when_lowercased_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 255, 255, 127, 127, 85, 85, 85, 85, - 85, 85, 85, 170, 170, 84, 85, 85, 85, 85, 85, 43, 214, 206, 219, 177, - 213, 210, 174, 17, 176, 173, 170, 74, 85, 85, 214, 85, 85, 85, 5, 108, - 122, 85, 0, 0, 0, 0, 69, 128, 64, 215, 254, 255, 251, 15, 0, 0, - 0, 128, 0, 85, 85, 85, 144, 230, 255, 255, 255, 255, 255, 255, 0, 0, - 1, 84, 85, 85, 171, 42, 85, 85, 85, 85, 254, 255, 255, 255, 127, 0, - 191, 32, 0, 0, 255, 255, 63, 0, 85, 85, 21, 64, 0, 255, 0, 63, - 0, 255, 0, 255, 0, 63, 0, 170, 0, 255, 0, 0, 0, 255, 0, 31, - 0, 31, 0, 15, 0, 31, 0, 31, 64, 12, 4, 0, 8, 0, 0, 0, - 0, 0, 192, 255, 255, 127, 0, 0, 157, 234, 37, 192, 5, 40, 4, 0, - 85, 21, 0, 0, 85, 85, 85, 5, 84, 85, 84, 85, 85, 85, 0, 106, - 85, 40, 69, 85, 85, 125, 95, 0, 255, 0, 0, 0, 0, 0, 255, 255, - 255, 255, 15, 0, 255, 255, 7, 0, 3, 0, 0, 0, -}; - -/* Changes_When_Lowercased: 581 bytes. */ - -RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_changes_when_lowercased_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_changes_when_lowercased_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_changes_when_lowercased_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_changes_when_lowercased_stage_4[pos + f] << 5; - pos += code; - value = (re_changes_when_lowercased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Changes_When_Uppercased. */ - -static RE_UINT8 re_changes_when_uppercased_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_changes_when_uppercased_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_changes_when_uppercased_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, 11, - 6, 12, 6, 6, 13, 6, 6, 6, 6, 6, 6, 6, 14, 15, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 16, 17, 6, 6, 6, 18, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 19, 6, 6, 6, 20, - 6, 6, 6, 6, 21, 6, 6, 6, 6, 6, 6, 6, 22, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 23, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 24, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_changes_when_uppercased_stage_4[] = { - 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 5, 13, 14, 15, 16, 0, 0, 0, 0, 0, 17, 18, 19, 20, 21, 22, - 0, 23, 24, 5, 25, 5, 26, 5, 5, 27, 0, 28, 29, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, - 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, - 5, 5, 5, 5, 33, 5, 5, 5, 34, 35, 36, 37, 24, 38, 39, 40, - 0, 0, 41, 23, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 43, - 0, 23, 44, 45, 5, 5, 5, 46, 24, 47, 0, 0, 0, 0, 0, 0, - 0, 0, 5, 48, 49, 0, 0, 0, 0, 50, 5, 51, 52, 53, 0, 0, - 0, 0, 54, 23, 24, 24, 0, 0, 55, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 56, 57, 0, 0, 0, 58, 59, - 0, 0, 0, 0, 0, 0, 24, 60, 0, 0, 0, 0, 0, 0, 24, 0, - 0, 61, 62, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_changes_when_uppercased_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 0, 0, 0, 128, - 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 84, 85, 171, 170, 170, - 170, 170, 170, 212, 41, 17, 36, 70, 42, 33, 81, 162, 96, 91, 85, 181, - 170, 170, 45, 170, 168, 170, 10, 144, 133, 170, 223, 26, 107, 159, 38, 32, - 137, 31, 4, 96, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, - 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 47, 9, 0, 0, 255, 255, - 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 170, 0, 0, - 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 63, 255, 1, 0, 0, - 0, 0, 0, 34, 170, 170, 234, 15, 255, 0, 63, 0, 255, 0, 255, 0, - 63, 0, 255, 0, 255, 0, 255, 63, 255, 255, 223, 80, 220, 16, 207, 0, - 255, 0, 220, 16, 0, 64, 0, 0, 16, 0, 0, 0, 255, 3, 0, 0, - 255, 255, 255, 127, 98, 21, 72, 0, 10, 80, 8, 0, 191, 32, 0, 0, - 170, 42, 0, 0, 170, 170, 170, 10, 168, 170, 168, 170, 170, 170, 0, 148, - 170, 16, 138, 170, 170, 2, 160, 0, 0, 0, 8, 0, 127, 0, 248, 0, - 0, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 255, 255, 255, 255, 15, - 255, 255, 7, 0, 252, 255, 255, 255, 15, 0, 0, 0, -}; - -/* Changes_When_Uppercased: 661 bytes. */ - -RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_changes_when_uppercased_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_changes_when_uppercased_stage_2[pos + f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_changes_when_uppercased_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_changes_when_uppercased_stage_4[pos + f] << 5; - pos += code; - value = (re_changes_when_uppercased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Changes_When_Titlecased. */ - -static RE_UINT8 re_changes_when_titlecased_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_changes_when_titlecased_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_changes_when_titlecased_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, 11, - 6, 12, 6, 6, 13, 6, 6, 6, 6, 6, 6, 6, 14, 15, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 16, 17, 6, 6, 6, 18, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 19, 6, 6, 6, 20, - 6, 6, 6, 6, 21, 6, 6, 6, 6, 6, 6, 6, 22, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 23, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 24, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_changes_when_titlecased_stage_4[] = { - 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 5, 13, 14, 15, 16, 0, 0, 0, 0, 0, 17, 18, 19, 20, 21, 22, - 0, 23, 24, 5, 25, 5, 26, 5, 5, 27, 0, 28, 29, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, - 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, - 5, 5, 5, 5, 33, 5, 5, 5, 34, 35, 36, 37, 35, 38, 39, 40, - 0, 0, 41, 23, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 43, - 0, 23, 44, 45, 5, 5, 5, 46, 24, 47, 0, 0, 0, 0, 0, 0, - 0, 0, 5, 48, 49, 0, 0, 0, 0, 50, 5, 51, 52, 53, 0, 0, - 0, 0, 54, 23, 24, 24, 0, 0, 55, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 56, 57, 0, 0, 0, 58, 59, - 0, 0, 0, 0, 0, 0, 24, 60, 0, 0, 0, 0, 0, 0, 24, 0, - 0, 61, 62, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_changes_when_titlecased_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 0, 0, 0, 128, - 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 84, 85, 171, 170, 170, - 170, 170, 170, 212, 41, 17, 36, 70, 42, 33, 81, 162, 208, 86, 85, 181, - 170, 170, 43, 170, 168, 170, 10, 144, 133, 170, 223, 26, 107, 159, 38, 32, - 137, 31, 4, 96, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, - 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 47, 9, 0, 0, 255, 255, - 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 170, 0, 0, - 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 63, 255, 1, 0, 0, - 0, 0, 0, 34, 170, 170, 234, 15, 255, 0, 63, 0, 255, 0, 255, 0, - 63, 0, 255, 0, 255, 0, 255, 63, 255, 0, 223, 64, 220, 0, 207, 0, - 255, 0, 220, 0, 0, 64, 0, 0, 16, 0, 0, 0, 255, 3, 0, 0, - 255, 255, 255, 127, 98, 21, 72, 0, 10, 80, 8, 0, 191, 32, 0, 0, - 170, 42, 0, 0, 170, 170, 170, 10, 168, 170, 168, 170, 170, 170, 0, 148, - 170, 16, 138, 170, 170, 2, 160, 0, 0, 0, 8, 0, 127, 0, 248, 0, - 0, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 255, 255, 255, 255, 15, - 255, 255, 7, 0, 252, 255, 255, 255, 15, 0, 0, 0, -}; - -/* Changes_When_Titlecased: 661 bytes. */ - -RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_changes_when_titlecased_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_changes_when_titlecased_stage_2[pos + f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_changes_when_titlecased_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_changes_when_titlecased_stage_4[pos + f] << 5; - pos += code; - value = (re_changes_when_titlecased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Changes_When_Casefolded. */ - -static RE_UINT8 re_changes_when_casefolded_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_changes_when_casefolded_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_changes_when_casefolded_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 10, 11, - 6, 12, 6, 6, 13, 6, 6, 6, 6, 6, 6, 6, 14, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, 6, 17, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 18, 6, 6, 6, 19, - 6, 6, 6, 6, 20, 6, 6, 6, 6, 6, 6, 6, 21, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 22, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 23, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_changes_when_casefolded_stage_4[] = { - 0, 0, 1, 0, 0, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, - 4, 12, 13, 0, 0, 0, 0, 0, 0, 0, 14, 15, 16, 17, 18, 19, - 20, 21, 0, 4, 22, 4, 23, 4, 4, 24, 25, 0, 26, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 27, 0, - 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, 29, 0, 0, 0, - 4, 4, 4, 4, 30, 4, 4, 4, 31, 32, 33, 34, 20, 35, 36, 37, - 0, 38, 0, 21, 39, 0, 0, 0, 0, 0, 0, 0, 0, 40, 21, 0, - 20, 41, 0, 42, 4, 4, 4, 43, 0, 0, 4, 44, 45, 0, 0, 0, - 0, 46, 4, 47, 48, 49, 0, 0, 0, 0, 0, 50, 20, 20, 0, 0, - 51, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 20, 52, 0, 0, 0, 50, 53, 0, 0, 0, 0, 0, 20, 54, 0, 0, - 0, 0, 0, 0, 0, 20, 0, 0, 20, 55, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_changes_when_casefolded_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 255, 255, 127, 255, - 85, 85, 85, 85, 85, 85, 85, 170, 170, 86, 85, 85, 85, 85, 85, 171, - 214, 206, 219, 177, 213, 210, 174, 17, 176, 173, 170, 74, 85, 85, 214, 85, - 85, 85, 5, 108, 122, 85, 0, 0, 32, 0, 0, 0, 0, 0, 69, 128, - 64, 215, 254, 255, 251, 15, 0, 0, 4, 128, 99, 85, 85, 85, 179, 230, - 255, 255, 255, 255, 255, 255, 0, 0, 1, 84, 85, 85, 171, 42, 85, 85, - 85, 85, 254, 255, 255, 255, 127, 0, 128, 0, 0, 0, 191, 32, 0, 0, - 0, 0, 0, 63, 255, 1, 0, 0, 85, 85, 21, 76, 0, 255, 0, 63, - 0, 255, 0, 255, 0, 63, 0, 170, 0, 255, 0, 0, 255, 255, 156, 31, - 156, 31, 0, 15, 0, 31, 156, 31, 64, 12, 4, 0, 8, 0, 0, 0, - 0, 0, 192, 255, 255, 127, 0, 0, 157, 234, 37, 192, 5, 40, 4, 0, - 85, 21, 0, 0, 85, 85, 85, 5, 84, 85, 84, 85, 85, 85, 0, 106, - 85, 40, 69, 85, 85, 125, 95, 0, 0, 0, 255, 255, 127, 0, 248, 0, - 255, 0, 0, 0, 255, 255, 15, 0, 255, 255, 7, 0, 3, 0, 0, 0, -}; - -/* Changes_When_Casefolded: 625 bytes. */ - -RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_changes_when_casefolded_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_changes_when_casefolded_stage_2[pos + f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_changes_when_casefolded_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_changes_when_casefolded_stage_4[pos + f] << 5; - pos += code; - value = (re_changes_when_casefolded_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Changes_When_Casemapped. */ - -static RE_UINT8 re_changes_when_casemapped_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_changes_when_casemapped_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_changes_when_casemapped_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 9, 10, 11, 12, - 6, 13, 6, 6, 14, 6, 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 17, 18, 6, 6, 6, 19, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 20, 6, 6, 6, 21, - 6, 6, 6, 6, 22, 6, 6, 6, 6, 6, 6, 6, 23, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 24, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 25, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_changes_when_casemapped_stage_4[] = { - 0, 0, 1, 1, 0, 2, 3, 3, 4, 5, 4, 4, 6, 7, 8, 4, - 4, 9, 10, 11, 12, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, 18, - 4, 4, 4, 4, 19, 4, 4, 4, 4, 20, 21, 22, 23, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0, - 0, 0, 0, 0, 0, 4, 4, 25, 0, 0, 0, 0, 26, 0, 0, 0, - 0, 0, 0, 27, 0, 0, 0, 0, 4, 4, 4, 4, 28, 4, 4, 4, - 25, 4, 29, 30, 4, 31, 32, 33, 0, 34, 35, 4, 36, 0, 0, 0, - 0, 0, 0, 0, 0, 37, 4, 38, 4, 39, 40, 41, 4, 4, 4, 42, - 4, 24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 43, 44, 0, 0, 0, - 0, 45, 4, 46, 47, 48, 0, 0, 0, 0, 49, 50, 4, 4, 0, 0, - 51, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, - 4, 4, 52, 0, 0, 50, 53, 44, 0, 0, 0, 0, 4, 54, 4, 54, - 0, 0, 0, 0, 0, 4, 4, 0, 4, 4, 55, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_changes_when_casemapped_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 255, 255, 127, 255, - 255, 255, 255, 255, 255, 255, 255, 254, 255, 223, 255, 247, 255, 243, 255, 179, - 240, 255, 255, 255, 253, 255, 15, 252, 255, 255, 223, 26, 107, 159, 38, 32, - 137, 31, 4, 96, 32, 0, 0, 0, 0, 0, 207, 184, 64, 215, 255, 255, - 251, 255, 255, 255, 255, 255, 227, 255, 255, 255, 191, 239, 3, 252, 255, 255, - 255, 255, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, 255, 0, 0, 0, - 191, 32, 0, 0, 255, 255, 63, 63, 255, 1, 0, 0, 0, 0, 0, 34, - 255, 255, 255, 79, 63, 63, 255, 170, 255, 255, 255, 63, 255, 255, 223, 95, - 220, 31, 207, 15, 255, 31, 220, 31, 64, 12, 4, 0, 0, 64, 0, 0, - 24, 0, 0, 0, 0, 0, 192, 255, 255, 3, 0, 0, 255, 127, 255, 255, - 255, 255, 255, 127, 255, 255, 109, 192, 15, 120, 12, 0, 255, 63, 0, 0, - 255, 255, 255, 15, 252, 255, 252, 255, 255, 255, 0, 254, 255, 56, 207, 255, - 255, 127, 255, 0, 0, 0, 8, 0, 0, 0, 255, 255, 127, 0, 248, 0, - 255, 255, 0, 0, 255, 255, 15, 255, 255, 255, 7, 0, 15, 0, 0, 0, -}; - -/* Changes_When_Casemapped: 641 bytes. */ - -RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_changes_when_casemapped_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_changes_when_casemapped_stage_2[pos + f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_changes_when_casemapped_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_changes_when_casemapped_stage_4[pos + f] << 5; - pos += code; - value = (re_changes_when_casemapped_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* ID_Start. */ - -static RE_UINT8 re_id_start_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_id_start_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 18, 19, 13, 20, 13, 21, 13, 13, 13, 13, 22, 7, 7, - 23, 24, 13, 13, 13, 13, 25, 26, 13, 13, 27, 13, 13, 28, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 29, 7, 30, 31, 7, 32, 13, 13, 13, 13, 13, 33, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_id_start_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, - 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, - 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 56, 1, 57, - 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 65, 66, 67, 68, 69, 70, - 71, 31, 72, 31, 73, 31, 31, 31, 1, 1, 1, 74, 75, 76, 31, 31, - 1, 1, 1, 1, 77, 31, 31, 31, 31, 31, 31, 31, 1, 1, 78, 31, - 1, 1, 79, 80, 31, 31, 31, 81, 1, 1, 1, 1, 1, 1, 1, 82, - 1, 1, 83, 31, 31, 31, 31, 31, 84, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 85, 31, 31, 31, 31, 31, 31, 31, 86, 87, 88, 89, - 90, 76, 31, 31, 31, 31, 91, 31, 1, 1, 1, 1, 1, 1, 92, 1, - 1, 1, 1, 1, 1, 1, 1, 93, 94, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 95, 31, 1, 1, 96, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_id_start_stage_4[] = { - 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 0, 7, 8, 9, 4, 10, - 4, 4, 4, 4, 11, 4, 4, 4, 4, 12, 13, 14, 15, 0, 16, 17, - 0, 4, 18, 19, 4, 4, 20, 21, 22, 23, 24, 4, 4, 25, 26, 27, - 28, 29, 30, 0, 0, 31, 0, 0, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 45, 49, 50, 51, 52, 46, 0, - 53, 54, 55, 56, 57, 58, 59, 60, 53, 61, 62, 63, 64, 65, 66, 0, - 14, 67, 66, 0, 68, 69, 70, 0, 71, 0, 72, 73, 74, 0, 0, 0, - 4, 75, 76, 77, 78, 4, 79, 80, 4, 4, 81, 4, 82, 83, 84, 4, - 85, 4, 86, 0, 23, 4, 4, 87, 14, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 88, 1, 4, 4, 89, 90, 91, 91, 92, 4, 93, 94, 0, - 0, 4, 4, 95, 4, 96, 4, 97, 98, 0, 16, 99, 4, 100, 101, 0, - 102, 4, 103, 0, 0, 104, 0, 0, 105, 93, 106, 0, 107, 108, 4, 109, - 4, 110, 111, 112, 113, 0, 0, 114, 4, 4, 4, 4, 4, 4, 0, 0, - 87, 4, 115, 112, 4, 116, 117, 118, 0, 0, 0, 119, 120, 0, 0, 0, - 121, 122, 123, 4, 113, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 124, 98, 4, 4, 4, 4, 125, 4, 79, 4, 126, 102, 127, 127, 0, - 128, 129, 14, 4, 130, 14, 4, 80, 105, 131, 4, 4, 132, 86, 0, 16, - 4, 4, 4, 4, 4, 97, 0, 0, 4, 4, 4, 4, 4, 4, 97, 0, - 4, 4, 4, 4, 73, 0, 16, 112, 133, 134, 4, 135, 112, 4, 4, 23, - 136, 137, 4, 4, 138, 139, 0, 136, 140, 141, 4, 93, 137, 93, 0, 142, - 26, 143, 66, 144, 32, 145, 146, 147, 4, 113, 148, 149, 4, 150, 151, 152, - 153, 154, 80, 143, 4, 4, 4, 141, 4, 4, 4, 4, 4, 155, 156, 157, - 4, 4, 4, 158, 4, 4, 159, 0, 160, 161, 162, 4, 4, 91, 163, 4, - 4, 112, 16, 4, 164, 4, 15, 165, 0, 0, 0, 166, 4, 4, 4, 144, - 0, 1, 1, 167, 4, 98, 168, 0, 169, 170, 171, 0, 4, 4, 4, 86, - 0, 0, 4, 103, 0, 0, 0, 0, 0, 0, 0, 0, 144, 4, 172, 0, - 4, 16, 173, 97, 112, 4, 174, 0, 4, 4, 4, 4, 112, 16, 175, 157, - 4, 176, 4, 110, 0, 0, 0, 0, 4, 102, 97, 15, 0, 0, 0, 0, - 177, 178, 97, 102, 98, 0, 0, 179, 97, 159, 0, 0, 4, 180, 0, 0, - 181, 93, 0, 144, 144, 0, 72, 182, 4, 97, 97, 145, 91, 0, 0, 0, - 4, 4, 113, 0, 4, 145, 4, 145, 107, 95, 0, 0, 107, 23, 16, 113, - 107, 66, 16, 183, 107, 145, 184, 0, 185, 186, 0, 0, 187, 188, 98, 0, - 48, 45, 189, 56, 0, 0, 0, 0, 4, 103, 190, 0, 4, 23, 191, 0, - 0, 0, 0, 0, 4, 132, 192, 0, 4, 23, 193, 0, 4, 18, 0, 0, - 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 194, - 0, 0, 0, 0, 0, 0, 4, 30, 195, 132, 71, 196, 23, 0, 0, 0, - 4, 4, 4, 4, 159, 0, 0, 0, 4, 4, 4, 132, 4, 4, 4, 4, - 4, 4, 110, 0, 0, 0, 0, 0, 4, 132, 0, 0, 0, 0, 0, 0, - 4, 4, 66, 0, 0, 0, 0, 0, 4, 30, 98, 0, 0, 0, 16, 197, - 4, 23, 110, 198, 23, 0, 0, 0, 4, 4, 199, 0, 163, 0, 0, 71, - 4, 4, 4, 4, 4, 4, 4, 73, 4, 4, 4, 4, 4, 4, 4, 145, - 56, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 200, 201, 0, 0, 0, - 4, 4, 202, 4, 203, 204, 205, 4, 206, 207, 208, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 209, 210, 80, 202, 202, 124, 124, 195, 195, 148, 0, - 4, 4, 4, 4, 4, 4, 182, 0, 205, 211, 212, 213, 214, 215, 0, 0, - 4, 4, 4, 4, 4, 4, 102, 0, 4, 103, 4, 4, 4, 4, 4, 4, - 112, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 56, 0, 0, - 112, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_id_start_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, - 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 188, - 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, 3, 252, 255, 255, - 255, 255, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, - 0, 0, 255, 255, 255, 7, 7, 0, 255, 7, 0, 0, 0, 192, 254, 255, - 255, 255, 47, 0, 96, 192, 0, 156, 0, 0, 253, 255, 255, 255, 0, 0, - 0, 224, 255, 255, 63, 0, 2, 0, 0, 252, 255, 255, 255, 7, 48, 4, - 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 255, 255, 223, 63, - 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 255, - 225, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, - 224, 135, 249, 255, 255, 253, 109, 3, 0, 0, 0, 94, 0, 0, 28, 0, - 224, 191, 251, 255, 255, 253, 237, 35, 0, 0, 1, 0, 3, 0, 0, 2, - 224, 159, 249, 255, 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, - 24, 199, 255, 3, 224, 223, 253, 255, 255, 253, 255, 35, 0, 0, 0, 7, - 3, 0, 0, 0, 225, 223, 253, 255, 255, 253, 239, 35, 0, 0, 0, 64, - 3, 0, 6, 0, 255, 255, 255, 39, 0, 64, 112, 128, 3, 0, 0, 252, - 224, 255, 127, 252, 255, 255, 251, 47, 127, 0, 0, 0, 255, 255, 13, 0, - 150, 37, 240, 254, 174, 236, 13, 32, 95, 0, 0, 240, 1, 0, 0, 0, - 255, 254, 255, 255, 255, 31, 0, 0, 0, 31, 0, 0, 255, 7, 0, 128, - 0, 0, 63, 60, 98, 192, 225, 255, 3, 64, 0, 0, 191, 32, 255, 255, - 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 63, 63, - 255, 159, 255, 255, 255, 199, 255, 1, 255, 223, 3, 0, 255, 255, 3, 0, - 255, 223, 1, 0, 255, 255, 15, 0, 0, 0, 128, 16, 255, 255, 255, 0, - 255, 5, 255, 255, 255, 255, 63, 0, 255, 255, 255, 127, 255, 63, 31, 0, - 255, 15, 255, 255, 255, 3, 0, 0, 255, 255, 127, 0, 255, 255, 31, 0, - 128, 0, 0, 0, 224, 255, 255, 255, 224, 15, 0, 0, 248, 255, 255, 255, - 1, 192, 0, 252, 63, 0, 0, 0, 15, 0, 0, 0, 0, 224, 0, 252, - 255, 255, 255, 63, 255, 1, 0, 0, 0, 222, 99, 0, 63, 63, 255, 170, - 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, - 0, 0, 255, 31, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, - 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, 127, 127, 127, 127, - 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 248, 255, 63, 254, 255, - 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, 255, 127, 0, 128, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 249, 255, 255, 255, 127, 255, 0, - 187, 247, 255, 255, 7, 0, 0, 0, 0, 0, 252, 40, 63, 0, 255, 255, - 255, 255, 255, 31, 255, 255, 7, 0, 0, 128, 0, 0, 223, 255, 0, 124, - 247, 15, 0, 0, 255, 255, 127, 196, 255, 255, 98, 62, 5, 0, 0, 56, - 255, 7, 28, 0, 126, 126, 126, 0, 127, 127, 255, 255, 15, 0, 255, 255, - 127, 248, 255, 255, 255, 255, 255, 15, 255, 63, 255, 255, 255, 255, 255, 3, - 127, 0, 248, 160, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, - 255, 255, 252, 255, 0, 0, 255, 15, 0, 0, 223, 255, 192, 255, 255, 255, - 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, - 255, 255, 1, 0, 255, 7, 255, 255, 15, 255, 62, 0, 255, 255, 15, 255, - 255, 0, 255, 255, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 55, 0, - 255, 255, 255, 192, 1, 0, 239, 254, 31, 0, 0, 0, 255, 255, 71, 0, - 30, 0, 0, 20, 255, 255, 251, 255, 255, 15, 0, 0, 127, 189, 255, 191, - 255, 1, 255, 255, 0, 0, 1, 224, 128, 7, 0, 0, 176, 0, 0, 0, - 0, 0, 0, 15, 16, 0, 0, 0, 0, 0, 0, 128, 255, 253, 255, 255, - 0, 0, 252, 255, 255, 63, 0, 0, 248, 255, 255, 224, 31, 0, 1, 0, - 255, 7, 255, 31, 255, 1, 255, 3, 255, 255, 223, 255, 255, 255, 255, 223, - 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, - 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 150, 254, 247, 10, - 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, -}; - -/* ID_Start: 2057 bytes. */ - -RE_UINT32 re_get_id_start(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_id_start_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_id_start_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_id_start_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_id_start_stage_4[pos + f] << 5; - pos += code; - value = (re_id_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* ID_Continue. */ - -static RE_UINT8 re_id_continue_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, - 6, 6, -}; - -static RE_UINT8 re_id_continue_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 18, 19, 13, 20, 13, 21, 13, 13, 13, 13, 22, 7, 7, - 23, 24, 13, 13, 13, 13, 25, 26, 13, 13, 27, 28, 29, 30, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 31, 7, 32, 33, 7, 34, 13, 13, 13, 13, 13, 35, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 36, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_id_continue_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, - 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, - 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 56, 1, 57, - 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 65, 66, 67, 68, 69, 70, - 71, 31, 72, 31, 73, 31, 31, 31, 1, 1, 1, 74, 75, 76, 31, 31, - 1, 1, 1, 1, 77, 31, 31, 31, 31, 31, 31, 31, 1, 1, 78, 31, - 1, 1, 79, 80, 31, 31, 31, 81, 1, 1, 1, 1, 1, 1, 1, 82, - 1, 1, 83, 31, 31, 31, 31, 31, 84, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 85, 31, 31, 31, 31, 86, 87, 31, 88, 89, 90, 91, - 31, 31, 92, 31, 31, 31, 31, 31, 93, 31, 31, 31, 31, 31, 31, 31, - 94, 95, 31, 31, 31, 31, 96, 31, 1, 1, 1, 1, 1, 1, 97, 1, - 1, 1, 1, 1, 1, 1, 1, 98, 99, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 100, 31, 1, 1, 101, 31, 31, 31, 31, 31, - 31, 102, 31, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_id_continue_stage_4[] = { - 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, - 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 17, 18, 19, 20, - 21, 6, 6, 22, 6, 6, 23, 24, 25, 6, 26, 6, 6, 27, 6, 28, - 6, 29, 30, 0, 0, 31, 32, 11, 6, 6, 6, 33, 34, 35, 36, 37, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 43, 47, 48, 49, 50, 51, 52, - 53, 54, 55, 56, 53, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, - 16, 68, 69, 0, 70, 71, 72, 0, 73, 74, 75, 76, 77, 78, 79, 0, - 6, 6, 80, 6, 81, 6, 82, 83, 6, 6, 84, 6, 85, 86, 87, 6, - 88, 6, 61, 89, 90, 6, 6, 91, 16, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 92, 3, 6, 6, 93, 94, 95, 96, 97, 6, 6, 98, 99, - 100, 6, 6, 101, 6, 102, 6, 103, 104, 105, 106, 107, 6, 108, 109, 0, - 30, 6, 104, 110, 111, 112, 0, 0, 6, 6, 113, 114, 6, 6, 6, 96, - 6, 101, 115, 81, 116, 0, 117, 118, 6, 6, 6, 6, 6, 6, 6, 119, - 91, 6, 120, 81, 6, 121, 122, 123, 0, 124, 125, 126, 127, 0, 127, 128, - 129, 130, 131, 6, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 132, 104, 6, 6, 6, 6, 133, 6, 82, 6, 134, 135, 136, 136, 6, - 137, 138, 16, 6, 139, 16, 6, 83, 140, 141, 6, 6, 142, 68, 0, 25, - 6, 6, 6, 6, 6, 103, 0, 0, 6, 6, 6, 6, 6, 6, 103, 0, - 6, 6, 6, 6, 143, 0, 25, 81, 144, 145, 6, 146, 6, 6, 6, 27, - 147, 148, 6, 6, 149, 150, 0, 147, 6, 151, 6, 96, 6, 6, 152, 153, - 6, 154, 96, 78, 6, 6, 155, 104, 6, 135, 156, 157, 6, 6, 158, 159, - 160, 161, 83, 162, 6, 6, 6, 163, 6, 6, 6, 6, 6, 164, 165, 30, - 6, 6, 6, 154, 6, 6, 166, 0, 167, 168, 169, 6, 6, 27, 170, 6, - 6, 81, 25, 6, 171, 6, 151, 172, 90, 173, 174, 175, 6, 6, 6, 78, - 1, 2, 3, 106, 6, 104, 176, 0, 177, 178, 179, 0, 6, 6, 6, 68, - 0, 0, 6, 95, 0, 0, 0, 180, 0, 0, 0, 0, 78, 6, 181, 182, - 6, 25, 102, 68, 81, 6, 183, 0, 6, 6, 6, 6, 81, 80, 184, 30, - 6, 185, 6, 186, 0, 0, 0, 0, 6, 135, 103, 151, 0, 0, 0, 0, - 187, 188, 103, 135, 104, 0, 0, 189, 103, 166, 0, 0, 6, 190, 0, 0, - 191, 192, 0, 78, 78, 0, 75, 193, 6, 103, 103, 194, 27, 0, 0, 0, - 6, 6, 116, 0, 6, 194, 6, 194, 6, 6, 193, 195, 6, 68, 25, 196, - 6, 197, 25, 198, 6, 6, 199, 0, 200, 201, 0, 0, 202, 203, 6, 204, - 34, 43, 205, 206, 0, 0, 0, 0, 6, 6, 204, 0, 6, 6, 207, 0, - 0, 0, 0, 0, 6, 208, 209, 0, 6, 6, 210, 0, 6, 101, 99, 0, - 211, 113, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 212, - 0, 0, 0, 0, 0, 0, 6, 213, 214, 5, 215, 216, 171, 217, 0, 0, - 6, 6, 6, 6, 166, 0, 0, 0, 6, 6, 6, 142, 6, 6, 6, 6, - 6, 6, 186, 0, 0, 0, 0, 0, 6, 142, 0, 0, 0, 0, 0, 0, - 6, 6, 193, 0, 0, 0, 0, 0, 6, 213, 104, 99, 0, 0, 25, 107, - 6, 135, 218, 219, 90, 0, 0, 0, 6, 6, 220, 104, 221, 0, 0, 182, - 6, 6, 6, 6, 6, 6, 6, 143, 6, 6, 6, 6, 6, 6, 6, 194, - 222, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 223, 224, 0, 0, 0, - 0, 0, 0, 225, 226, 227, 0, 0, 0, 0, 228, 0, 0, 0, 0, 0, - 6, 6, 197, 6, 229, 230, 231, 6, 232, 233, 234, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 235, 236, 83, 197, 197, 132, 132, 214, 214, 237, 6, - 6, 238, 6, 239, 240, 241, 0, 0, 242, 243, 0, 0, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 244, 0, 6, 6, 204, 0, 0, 0, 0, 0, - 231, 245, 246, 247, 248, 249, 0, 0, 6, 6, 6, 6, 6, 6, 135, 0, - 6, 95, 6, 6, 6, 6, 6, 6, 81, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 222, 0, 0, 81, 0, 0, 0, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 6, 90, -}; - -static RE_UINT8 re_id_continue_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, - 0, 4, 160, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, - 31, 80, 0, 0, 255, 255, 223, 188, 192, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 251, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 2, - 254, 255, 255, 255, 255, 0, 254, 255, 255, 255, 255, 191, 182, 0, 255, 255, - 255, 7, 7, 0, 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, - 255, 253, 255, 159, 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, - 255, 255, 63, 4, 255, 63, 0, 0, 255, 255, 255, 15, 255, 255, 223, 63, - 0, 0, 240, 255, 207, 255, 254, 255, 239, 159, 249, 255, 255, 253, 197, 243, - 159, 121, 128, 176, 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, - 135, 57, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, - 191, 59, 1, 0, 207, 255, 0, 2, 238, 159, 249, 255, 159, 57, 192, 176, - 207, 255, 2, 0, 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, - 192, 255, 0, 0, 239, 223, 253, 255, 255, 253, 255, 227, 223, 61, 96, 7, - 207, 255, 0, 0, 255, 253, 239, 243, 223, 61, 96, 64, 207, 255, 6, 0, - 238, 223, 253, 255, 255, 255, 255, 231, 223, 125, 240, 128, 207, 255, 0, 252, - 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, 192, 255, 12, 0, - 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, 174, 236, 255, 59, - 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, 255, 254, 255, 255, - 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, 64, 0, 0, 0, - 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, 255, 255, 255, 247, - 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, - 255, 255, 61, 255, 0, 254, 3, 0, 255, 255, 0, 0, 255, 255, 63, 63, - 255, 159, 255, 255, 255, 199, 255, 1, 255, 223, 31, 0, 255, 255, 31, 0, - 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, - 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, - 255, 255, 255, 127, 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, - 255, 15, 255, 255, 255, 3, 255, 7, 255, 255, 255, 159, 255, 3, 255, 3, - 128, 0, 255, 63, 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, - 255, 1, 0, 0, 0, 0, 247, 255, 255, 255, 127, 3, 255, 255, 63, 248, - 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, - 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, - 226, 255, 1, 0, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, - 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, 255, 255, 127, 0, - 127, 127, 127, 127, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 254, - 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, - 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 240, 191, 0, 0, 128, 255, - 252, 255, 255, 255, 255, 249, 255, 255, 255, 127, 255, 0, 255, 0, 0, 0, - 63, 0, 255, 3, 255, 255, 255, 40, 255, 63, 255, 255, 1, 128, 255, 3, - 255, 63, 255, 3, 255, 255, 127, 252, 7, 0, 0, 56, 255, 255, 124, 0, - 126, 126, 126, 0, 127, 127, 255, 255, 63, 0, 255, 255, 255, 55, 255, 3, - 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, - 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, - 0, 0, 255, 15, 255, 255, 24, 0, 0, 224, 0, 0, 0, 0, 223, 255, - 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, - 0, 0, 0, 32, 255, 255, 1, 0, 1, 0, 0, 0, 15, 255, 62, 0, - 255, 255, 15, 255, 255, 0, 255, 255, 15, 0, 0, 0, 63, 253, 255, 255, - 255, 255, 191, 145, 255, 255, 55, 0, 255, 255, 255, 192, 111, 240, 239, 254, - 255, 255, 15, 135, 127, 0, 0, 0, 255, 255, 7, 0, 192, 255, 0, 128, - 255, 1, 255, 3, 255, 255, 223, 255, 255, 255, 79, 0, 31, 28, 255, 23, - 255, 255, 251, 255, 255, 255, 255, 64, 127, 189, 255, 191, 255, 1, 255, 255, - 255, 7, 255, 3, 159, 57, 129, 224, 207, 31, 31, 0, 191, 0, 255, 3, - 255, 255, 63, 255, 1, 0, 0, 63, 17, 0, 255, 3, 255, 255, 255, 227, - 255, 3, 0, 128, 255, 255, 255, 1, 255, 253, 255, 255, 1, 0, 255, 3, - 0, 0, 252, 255, 255, 254, 127, 0, 15, 0, 255, 3, 248, 255, 255, 224, - 31, 0, 255, 255, 0, 128, 255, 255, 3, 0, 0, 0, 255, 7, 255, 31, - 255, 1, 255, 99, 224, 227, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, - 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, - 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, - 253, 255, 255, 247, 247, 207, 255, 255, 255, 255, 127, 248, 255, 31, 32, 0, - 16, 0, 0, 248, 254, 255, 0, 0, 127, 255, 255, 249, 219, 7, 0, 0, - 31, 0, 127, 0, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, - 255, 251, 255, 15, 238, 251, 255, 15, -}; - -/* ID_Continue: 2282 bytes. */ - -RE_UINT32 re_get_id_continue(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_id_continue_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_id_continue_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_id_continue_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_id_continue_stage_4[pos + f] << 5; - pos += code; - value = (re_id_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* XID_Start. */ - -static RE_UINT8 re_xid_start_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_xid_start_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 18, 19, 13, 20, 13, 21, 13, 13, 13, 13, 22, 7, 7, - 23, 24, 13, 13, 13, 13, 25, 26, 13, 13, 27, 13, 13, 28, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 29, 7, 30, 31, 7, 32, 13, 13, 13, 13, 13, 33, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_xid_start_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, - 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, - 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 1, 58, - 59, 60, 61, 62, 63, 31, 31, 31, 64, 65, 66, 67, 68, 69, 70, 71, - 72, 31, 73, 31, 74, 31, 31, 31, 1, 1, 1, 75, 76, 77, 31, 31, - 1, 1, 1, 1, 78, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, - 1, 1, 80, 81, 31, 31, 31, 82, 1, 1, 1, 1, 1, 1, 1, 83, - 1, 1, 84, 31, 31, 31, 31, 31, 85, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 86, 31, 31, 31, 31, 31, 31, 31, 87, 88, 89, 90, - 91, 77, 31, 31, 31, 31, 92, 31, 1, 1, 1, 1, 1, 1, 93, 1, - 1, 1, 1, 1, 1, 1, 1, 94, 95, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 96, 31, 1, 1, 97, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_xid_start_stage_4[] = { - 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 0, 7, 8, 9, 4, 10, - 4, 4, 4, 4, 11, 4, 4, 4, 4, 12, 13, 14, 15, 0, 16, 17, - 0, 4, 18, 19, 4, 4, 20, 21, 22, 23, 24, 4, 4, 25, 26, 27, - 28, 29, 30, 0, 0, 31, 0, 0, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 45, 49, 50, 51, 52, 46, 0, - 53, 54, 55, 56, 57, 58, 59, 60, 53, 61, 62, 63, 64, 65, 66, 0, - 14, 67, 66, 0, 68, 69, 70, 0, 71, 0, 72, 73, 74, 0, 0, 0, - 4, 75, 76, 77, 78, 4, 79, 80, 4, 4, 81, 4, 82, 83, 84, 4, - 85, 4, 86, 0, 23, 4, 4, 87, 14, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 88, 1, 4, 4, 89, 90, 91, 91, 92, 4, 93, 94, 0, - 0, 4, 4, 95, 4, 96, 4, 97, 98, 0, 16, 99, 4, 100, 101, 0, - 102, 4, 103, 0, 0, 104, 0, 0, 105, 93, 106, 0, 107, 108, 4, 109, - 4, 110, 111, 112, 113, 0, 0, 114, 4, 4, 4, 4, 4, 4, 0, 0, - 87, 4, 115, 112, 4, 116, 117, 118, 0, 0, 0, 119, 120, 0, 0, 0, - 121, 122, 123, 4, 113, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 124, 98, 4, 4, 4, 4, 125, 4, 79, 4, 126, 102, 127, 127, 0, - 128, 129, 14, 4, 130, 14, 4, 80, 105, 131, 4, 4, 132, 86, 0, 16, - 4, 4, 4, 4, 4, 97, 0, 0, 4, 4, 4, 4, 4, 4, 97, 0, - 4, 4, 4, 4, 73, 0, 16, 112, 133, 134, 4, 135, 112, 4, 4, 23, - 136, 137, 4, 4, 138, 139, 0, 136, 140, 141, 4, 93, 137, 93, 0, 142, - 26, 143, 66, 144, 32, 145, 146, 147, 4, 113, 148, 149, 4, 150, 151, 152, - 153, 154, 80, 143, 4, 4, 4, 141, 4, 4, 4, 4, 4, 155, 156, 157, - 4, 4, 4, 158, 4, 4, 159, 0, 160, 161, 162, 4, 4, 91, 163, 4, - 4, 4, 112, 32, 4, 4, 4, 4, 4, 112, 16, 4, 164, 4, 15, 165, - 0, 0, 0, 166, 4, 4, 4, 144, 0, 1, 1, 167, 112, 98, 168, 0, - 169, 170, 171, 0, 4, 4, 4, 86, 0, 0, 4, 103, 0, 0, 0, 0, - 0, 0, 0, 0, 144, 4, 172, 0, 4, 16, 173, 97, 112, 4, 174, 0, - 4, 4, 4, 4, 112, 16, 175, 157, 4, 176, 4, 110, 0, 0, 0, 0, - 4, 102, 97, 15, 0, 0, 0, 0, 177, 178, 97, 102, 98, 0, 0, 179, - 97, 159, 0, 0, 4, 180, 0, 0, 181, 93, 0, 144, 144, 0, 72, 182, - 4, 97, 97, 145, 91, 0, 0, 0, 4, 4, 113, 0, 4, 145, 4, 145, - 107, 95, 0, 0, 107, 23, 16, 113, 107, 66, 16, 183, 107, 145, 184, 0, - 185, 186, 0, 0, 187, 188, 98, 0, 48, 45, 189, 56, 0, 0, 0, 0, - 4, 103, 190, 0, 4, 23, 191, 0, 0, 0, 0, 0, 4, 132, 192, 0, - 4, 23, 193, 0, 4, 18, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 4, 4, 194, 0, 0, 0, 0, 0, 0, 4, 30, - 195, 132, 71, 196, 23, 0, 0, 0, 4, 4, 4, 4, 159, 0, 0, 0, - 4, 4, 4, 132, 4, 4, 4, 4, 4, 4, 110, 0, 0, 0, 0, 0, - 4, 132, 0, 0, 0, 0, 0, 0, 4, 4, 66, 0, 0, 0, 0, 0, - 4, 30, 98, 0, 0, 0, 16, 197, 4, 23, 110, 198, 23, 0, 0, 0, - 4, 4, 199, 0, 163, 0, 0, 71, 4, 4, 4, 4, 4, 4, 4, 73, - 4, 4, 4, 4, 4, 4, 4, 145, 56, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 4, 200, 201, 0, 0, 0, 4, 4, 202, 4, 203, 204, 205, 4, - 206, 207, 208, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 209, 210, 80, - 202, 202, 124, 124, 195, 195, 148, 0, 4, 4, 4, 4, 4, 4, 182, 0, - 205, 211, 212, 213, 214, 215, 0, 0, 4, 4, 4, 4, 4, 4, 102, 0, - 4, 103, 4, 4, 4, 4, 4, 4, 112, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 56, 0, 0, 112, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_xid_start_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, - 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 184, - 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, 3, 252, 255, 255, - 255, 255, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, - 0, 0, 255, 255, 255, 7, 7, 0, 255, 7, 0, 0, 0, 192, 254, 255, - 255, 255, 47, 0, 96, 192, 0, 156, 0, 0, 253, 255, 255, 255, 0, 0, - 0, 224, 255, 255, 63, 0, 2, 0, 0, 252, 255, 255, 255, 7, 48, 4, - 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 255, 255, 223, 63, - 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 255, - 225, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, - 224, 135, 249, 255, 255, 253, 109, 3, 0, 0, 0, 94, 0, 0, 28, 0, - 224, 191, 251, 255, 255, 253, 237, 35, 0, 0, 1, 0, 3, 0, 0, 2, - 224, 159, 249, 255, 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, - 24, 199, 255, 3, 224, 223, 253, 255, 255, 253, 255, 35, 0, 0, 0, 7, - 3, 0, 0, 0, 225, 223, 253, 255, 255, 253, 239, 35, 0, 0, 0, 64, - 3, 0, 6, 0, 255, 255, 255, 39, 0, 64, 112, 128, 3, 0, 0, 252, - 224, 255, 127, 252, 255, 255, 251, 47, 127, 0, 0, 0, 255, 255, 5, 0, - 150, 37, 240, 254, 174, 236, 5, 32, 95, 0, 0, 240, 1, 0, 0, 0, - 255, 254, 255, 255, 255, 31, 0, 0, 0, 31, 0, 0, 255, 7, 0, 128, - 0, 0, 63, 60, 98, 192, 225, 255, 3, 64, 0, 0, 191, 32, 255, 255, - 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 63, 63, - 255, 159, 255, 255, 255, 199, 255, 1, 255, 223, 3, 0, 255, 255, 3, 0, - 255, 223, 1, 0, 255, 255, 15, 0, 0, 0, 128, 16, 255, 255, 255, 0, - 255, 5, 255, 255, 255, 255, 63, 0, 255, 255, 255, 127, 255, 63, 31, 0, - 255, 15, 255, 255, 255, 3, 0, 0, 255, 255, 127, 0, 255, 255, 31, 0, - 128, 0, 0, 0, 224, 255, 255, 255, 224, 15, 0, 0, 248, 255, 255, 255, - 1, 192, 0, 252, 63, 0, 0, 0, 15, 0, 0, 0, 0, 224, 0, 252, - 255, 255, 255, 63, 255, 1, 0, 0, 0, 222, 99, 0, 63, 63, 255, 170, - 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, - 0, 0, 255, 31, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, - 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, 127, 127, 127, 127, - 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 224, 255, 63, 254, 255, - 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, 255, 127, 0, 128, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 249, 255, 255, 255, 127, 255, 0, - 187, 247, 255, 255, 7, 0, 0, 0, 0, 0, 252, 40, 63, 0, 255, 255, - 255, 255, 255, 31, 255, 255, 7, 0, 0, 128, 0, 0, 223, 255, 0, 124, - 247, 15, 0, 0, 255, 255, 127, 196, 255, 255, 98, 62, 5, 0, 0, 56, - 255, 7, 28, 0, 126, 126, 126, 0, 127, 127, 255, 255, 15, 0, 255, 255, - 127, 248, 255, 255, 255, 255, 255, 15, 255, 63, 255, 255, 255, 255, 255, 3, - 127, 0, 248, 160, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, - 255, 255, 252, 255, 0, 0, 255, 3, 0, 0, 138, 170, 192, 255, 255, 255, - 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, - 255, 255, 1, 0, 255, 7, 255, 255, 15, 255, 62, 0, 255, 255, 15, 255, - 255, 0, 255, 255, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 55, 0, - 255, 255, 255, 192, 1, 0, 239, 254, 31, 0, 0, 0, 255, 255, 71, 0, - 30, 0, 0, 20, 255, 255, 251, 255, 255, 15, 0, 0, 127, 189, 255, 191, - 255, 1, 255, 255, 0, 0, 1, 224, 128, 7, 0, 0, 176, 0, 0, 0, - 0, 0, 0, 15, 16, 0, 0, 0, 0, 0, 0, 128, 255, 253, 255, 255, - 0, 0, 252, 255, 255, 63, 0, 0, 248, 255, 255, 224, 31, 0, 1, 0, - 255, 7, 255, 31, 255, 1, 255, 3, 255, 255, 223, 255, 255, 255, 255, 223, - 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, - 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 150, 254, 247, 10, - 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, -}; - -/* XID_Start: 2065 bytes. */ - -RE_UINT32 re_get_xid_start(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_xid_start_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_xid_start_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_xid_start_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_xid_start_stage_4[pos + f] << 5; - pos += code; - value = (re_xid_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* XID_Continue. */ - -static RE_UINT8 re_xid_continue_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, - 6, 6, -}; - -static RE_UINT8 re_xid_continue_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 18, 19, 13, 20, 13, 21, 13, 13, 13, 13, 22, 7, 7, - 23, 24, 13, 13, 13, 13, 25, 26, 13, 13, 27, 28, 29, 30, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 31, 7, 32, 33, 7, 34, 13, 13, 13, 13, 13, 35, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 36, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_xid_continue_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, - 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, - 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 1, 58, - 59, 60, 61, 62, 63, 31, 31, 31, 64, 65, 66, 67, 68, 69, 70, 71, - 72, 31, 73, 31, 74, 31, 31, 31, 1, 1, 1, 75, 76, 77, 31, 31, - 1, 1, 1, 1, 78, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, - 1, 1, 80, 81, 31, 31, 31, 82, 1, 1, 1, 1, 1, 1, 1, 83, - 1, 1, 84, 31, 31, 31, 31, 31, 85, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 86, 31, 31, 31, 31, 87, 88, 31, 89, 90, 91, 92, - 31, 31, 93, 31, 31, 31, 31, 31, 94, 31, 31, 31, 31, 31, 31, 31, - 95, 96, 31, 31, 31, 31, 97, 31, 1, 1, 1, 1, 1, 1, 98, 1, - 1, 1, 1, 1, 1, 1, 1, 99, 100, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 101, 31, 1, 1, 102, 31, 31, 31, 31, 31, - 31, 103, 31, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_xid_continue_stage_4[] = { - 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, - 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 17, 18, 19, 20, - 21, 6, 6, 22, 6, 6, 23, 24, 25, 6, 26, 6, 6, 27, 6, 28, - 6, 29, 30, 0, 0, 31, 32, 11, 6, 6, 6, 33, 34, 35, 36, 37, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 43, 47, 48, 49, 50, 51, 52, - 53, 54, 55, 56, 53, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, - 16, 68, 69, 0, 70, 71, 72, 0, 73, 74, 75, 76, 77, 78, 79, 0, - 6, 6, 80, 6, 81, 6, 82, 83, 6, 6, 84, 6, 85, 86, 87, 6, - 88, 6, 61, 89, 90, 6, 6, 91, 16, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 92, 3, 6, 6, 93, 94, 95, 96, 97, 6, 6, 98, 99, - 100, 6, 6, 101, 6, 102, 6, 103, 104, 105, 106, 107, 6, 108, 109, 0, - 30, 6, 104, 110, 111, 112, 0, 0, 6, 6, 113, 114, 6, 6, 6, 96, - 6, 101, 115, 81, 116, 0, 117, 118, 6, 6, 6, 6, 6, 6, 6, 119, - 91, 6, 120, 81, 6, 121, 122, 123, 0, 124, 125, 126, 127, 0, 127, 128, - 129, 130, 131, 6, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 132, 104, 6, 6, 6, 6, 133, 6, 82, 6, 134, 135, 136, 136, 6, - 137, 138, 16, 6, 139, 16, 6, 83, 140, 141, 6, 6, 142, 68, 0, 25, - 6, 6, 6, 6, 6, 103, 0, 0, 6, 6, 6, 6, 6, 6, 103, 0, - 6, 6, 6, 6, 143, 0, 25, 81, 144, 145, 6, 146, 6, 6, 6, 27, - 147, 148, 6, 6, 149, 150, 0, 147, 6, 151, 6, 96, 6, 6, 152, 153, - 6, 154, 96, 78, 6, 6, 155, 104, 6, 135, 156, 157, 6, 6, 158, 159, - 160, 161, 83, 162, 6, 6, 6, 163, 6, 6, 6, 6, 6, 164, 165, 30, - 6, 6, 6, 154, 6, 6, 166, 0, 167, 168, 169, 6, 6, 27, 170, 6, - 6, 6, 81, 171, 6, 6, 6, 6, 6, 81, 25, 6, 172, 6, 151, 1, - 90, 173, 174, 175, 6, 6, 6, 78, 1, 2, 3, 106, 6, 104, 176, 0, - 177, 178, 179, 0, 6, 6, 6, 68, 0, 0, 6, 95, 0, 0, 0, 180, - 0, 0, 0, 0, 78, 6, 181, 182, 6, 25, 102, 68, 81, 6, 183, 0, - 6, 6, 6, 6, 81, 80, 184, 30, 6, 185, 6, 186, 0, 0, 0, 0, - 6, 135, 103, 151, 0, 0, 0, 0, 187, 188, 103, 135, 104, 0, 0, 189, - 103, 166, 0, 0, 6, 190, 0, 0, 191, 192, 0, 78, 78, 0, 75, 193, - 6, 103, 103, 194, 27, 0, 0, 0, 6, 6, 116, 0, 6, 194, 6, 194, - 6, 6, 193, 195, 6, 68, 25, 196, 6, 197, 25, 198, 6, 6, 199, 0, - 200, 201, 0, 0, 202, 203, 6, 204, 34, 43, 205, 206, 0, 0, 0, 0, - 6, 6, 204, 0, 6, 6, 207, 0, 0, 0, 0, 0, 6, 208, 209, 0, - 6, 6, 210, 0, 6, 101, 99, 0, 211, 113, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 6, 6, 212, 0, 0, 0, 0, 0, 0, 6, 213, - 214, 5, 215, 216, 172, 217, 0, 0, 6, 6, 6, 6, 166, 0, 0, 0, - 6, 6, 6, 142, 6, 6, 6, 6, 6, 6, 186, 0, 0, 0, 0, 0, - 6, 142, 0, 0, 0, 0, 0, 0, 6, 6, 193, 0, 0, 0, 0, 0, - 6, 213, 104, 99, 0, 0, 25, 107, 6, 135, 218, 219, 90, 0, 0, 0, - 6, 6, 220, 104, 221, 0, 0, 182, 6, 6, 6, 6, 6, 6, 6, 143, - 6, 6, 6, 6, 6, 6, 6, 194, 222, 0, 0, 0, 0, 0, 0, 0, - 6, 6, 6, 223, 224, 0, 0, 0, 0, 0, 0, 225, 226, 227, 0, 0, - 0, 0, 228, 0, 0, 0, 0, 0, 6, 6, 197, 6, 229, 230, 231, 6, - 232, 233, 234, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 235, 236, 83, - 197, 197, 132, 132, 214, 214, 237, 6, 6, 238, 6, 239, 240, 241, 0, 0, - 242, 243, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 244, 0, - 6, 6, 204, 0, 0, 0, 0, 0, 231, 245, 246, 247, 248, 249, 0, 0, - 6, 6, 6, 6, 6, 6, 135, 0, 6, 95, 6, 6, 6, 6, 6, 6, - 81, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 222, 0, 0, - 81, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 90, -}; - -static RE_UINT8 re_xid_continue_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, - 0, 4, 160, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, - 31, 80, 0, 0, 255, 255, 223, 184, 192, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 251, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 2, - 254, 255, 255, 255, 255, 0, 254, 255, 255, 255, 255, 191, 182, 0, 255, 255, - 255, 7, 7, 0, 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, - 255, 253, 255, 159, 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, - 255, 255, 63, 4, 255, 63, 0, 0, 255, 255, 255, 15, 255, 255, 223, 63, - 0, 0, 240, 255, 207, 255, 254, 255, 239, 159, 249, 255, 255, 253, 197, 243, - 159, 121, 128, 176, 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, - 135, 57, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, - 191, 59, 1, 0, 207, 255, 0, 2, 238, 159, 249, 255, 159, 57, 192, 176, - 207, 255, 2, 0, 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, - 192, 255, 0, 0, 239, 223, 253, 255, 255, 253, 255, 227, 223, 61, 96, 7, - 207, 255, 0, 0, 255, 253, 239, 243, 223, 61, 96, 64, 207, 255, 6, 0, - 238, 223, 253, 255, 255, 255, 255, 231, 223, 125, 240, 128, 207, 255, 0, 252, - 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, 192, 255, 12, 0, - 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, 174, 236, 255, 59, - 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, 255, 254, 255, 255, - 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, 64, 0, 0, 0, - 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, 255, 255, 255, 247, - 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, - 255, 255, 61, 255, 0, 254, 3, 0, 255, 255, 0, 0, 255, 255, 63, 63, - 255, 159, 255, 255, 255, 199, 255, 1, 255, 223, 31, 0, 255, 255, 31, 0, - 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, - 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, - 255, 255, 255, 127, 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, - 255, 15, 255, 255, 255, 3, 255, 7, 255, 255, 255, 159, 255, 3, 255, 3, - 128, 0, 255, 63, 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, - 255, 1, 0, 0, 0, 0, 247, 255, 255, 255, 127, 3, 255, 255, 63, 248, - 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, - 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, - 226, 255, 1, 0, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, - 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, 255, 255, 127, 0, - 127, 127, 127, 127, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 230, - 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, - 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 240, 191, 0, 0, 128, 255, - 252, 255, 255, 255, 255, 249, 255, 255, 255, 127, 255, 0, 255, 0, 0, 0, - 63, 0, 255, 3, 255, 255, 255, 40, 255, 63, 255, 255, 1, 128, 255, 3, - 255, 63, 255, 3, 255, 255, 127, 252, 7, 0, 0, 56, 255, 255, 124, 0, - 126, 126, 126, 0, 127, 127, 255, 255, 63, 0, 255, 255, 255, 55, 255, 3, - 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, - 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 240, 255, 255, 255, - 255, 255, 252, 255, 255, 255, 24, 0, 0, 224, 0, 0, 0, 0, 138, 170, - 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, - 0, 0, 0, 32, 255, 255, 1, 0, 1, 0, 0, 0, 15, 255, 62, 0, - 255, 255, 15, 255, 255, 0, 255, 255, 15, 0, 0, 0, 63, 253, 255, 255, - 255, 255, 191, 145, 255, 255, 55, 0, 255, 255, 255, 192, 111, 240, 239, 254, - 255, 255, 15, 135, 127, 0, 0, 0, 255, 255, 7, 0, 192, 255, 0, 128, - 255, 1, 255, 3, 255, 255, 223, 255, 255, 255, 79, 0, 31, 28, 255, 23, - 255, 255, 251, 255, 255, 255, 255, 64, 127, 189, 255, 191, 255, 1, 255, 255, - 255, 7, 255, 3, 159, 57, 129, 224, 207, 31, 31, 0, 191, 0, 255, 3, - 255, 255, 63, 255, 1, 0, 0, 63, 17, 0, 255, 3, 255, 255, 255, 227, - 255, 3, 0, 128, 255, 255, 255, 1, 255, 253, 255, 255, 1, 0, 255, 3, - 0, 0, 252, 255, 255, 254, 127, 0, 15, 0, 255, 3, 248, 255, 255, 224, - 31, 0, 255, 255, 0, 128, 255, 255, 3, 0, 0, 0, 255, 7, 255, 31, - 255, 1, 255, 99, 224, 227, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, - 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, - 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, - 253, 255, 255, 247, 247, 207, 255, 255, 255, 255, 127, 248, 255, 31, 32, 0, - 16, 0, 0, 248, 254, 255, 0, 0, 127, 255, 255, 249, 219, 7, 0, 0, - 31, 0, 127, 0, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, - 255, 251, 255, 15, 238, 251, 255, 15, -}; - -/* XID_Continue: 2290 bytes. */ - -RE_UINT32 re_get_xid_continue(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_xid_continue_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_xid_continue_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_xid_continue_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_xid_continue_stage_4[pos + f] << 5; - pos += code; - value = (re_xid_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Default_Ignorable_Code_Point. */ - -static RE_UINT8 re_default_ignorable_code_point_stage_1[] = { - 0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, - 2, 2, -}; - -static RE_UINT8 re_default_ignorable_code_point_stage_2[] = { - 0, 1, 2, 3, 4, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 8, 1, 1, 1, 1, 1, - 9, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_default_ignorable_code_point_stage_3[] = { - 0, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 4, 1, 1, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, - 7, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 9, 10, 1, 1, 1, 1, 11, 1, 1, 1, - 1, 12, 1, 1, 1, 1, 1, 1, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_default_ignorable_code_point_stage_4[] = { - 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, - 7, 0, 0, 0, 0, 0, 0, 0, 8, 9, 0, 10, 0, 0, 0, 0, - 0, 0, 0, 11, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 5, 0, 12, 0, 0, 0, 0, 0, 13, 0, 0, - 0, 0, 0, 14, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 15, 15, -}; - -static RE_UINT8 re_default_ignorable_code_point_stage_5[] = { - 0, 0, 0, 0, 0, 32, 0, 0, 0, 128, 0, 0, 0, 0, 0, 16, - 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 48, 0, 0, 120, 0, 0, - 0, 248, 0, 0, 0, 124, 0, 0, 255, 255, 0, 0, 16, 0, 0, 0, - 0, 0, 255, 1, 15, 0, 0, 0, 0, 0, 248, 7, 255, 255, 255, 255, -}; - -/* Default_Ignorable_Code_Point: 370 bytes. */ - -RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_default_ignorable_code_point_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_default_ignorable_code_point_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_default_ignorable_code_point_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_default_ignorable_code_point_stage_4[pos + f] << 5; - pos += code; - value = (re_default_ignorable_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Grapheme_Extend. */ - -static RE_UINT8 re_grapheme_extend_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, - 2, -}; - -static RE_UINT8 re_grapheme_extend_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 6, - 7, 8, 4, 4, 4, 4, 9, 4, 4, 4, 4, 10, 4, 11, 12, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 13, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_grapheme_extend_stage_3[] = { - 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 0, 0, 15, 0, 0, 0, 16, 17, 18, 19, 20, 21, 22, 0, 0, - 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 0, 0, - 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 27, 0, 28, 29, 30, 31, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 33, 34, - 0, 35, 36, 37, 0, 0, 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, - 39, 40, 41, 42, 43, 44, 45, 46, 0, 0, 0, 0, 47, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 49, 0, 0, 0, 50, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, - 0, 52, 53, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, - 55, 0, 0, 0, 0, 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 0, - 58, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_grapheme_extend_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 0, 0, 0, - 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 0, - 7, 0, 8, 9, 0, 0, 10, 11, 12, 13, 14, 0, 0, 15, 0, 16, - 17, 18, 19, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 25, - 29, 30, 31, 32, 29, 30, 33, 25, 26, 34, 35, 25, 36, 37, 38, 0, - 39, 40, 41, 25, 26, 42, 43, 25, 26, 37, 28, 25, 0, 0, 44, 0, - 0, 45, 46, 0, 0, 47, 48, 0, 49, 50, 0, 51, 52, 53, 54, 0, - 0, 55, 56, 57, 58, 0, 0, 0, 0, 0, 59, 0, 0, 0, 0, 0, - 60, 60, 61, 61, 0, 62, 63, 0, 64, 0, 0, 0, 65, 66, 0, 0, - 0, 67, 0, 0, 0, 0, 0, 0, 68, 0, 69, 70, 0, 71, 0, 0, - 72, 73, 36, 16, 74, 75, 0, 76, 0, 77, 0, 0, 0, 0, 78, 79, - 0, 0, 0, 0, 0, 0, 1, 80, 81, 0, 0, 0, 0, 0, 13, 82, - 0, 0, 0, 0, 0, 0, 0, 83, 0, 0, 0, 84, 0, 0, 0, 1, - 0, 85, 0, 0, 86, 0, 0, 0, 0, 0, 0, 87, 40, 0, 0, 88, - 89, 65, 0, 0, 0, 0, 90, 91, 0, 92, 93, 0, 22, 94, 0, 95, - 0, 96, 97, 30, 0, 98, 26, 99, 0, 0, 0, 0, 0, 0, 0, 100, - 37, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101, - 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 102, 0, 0, 0, 0, - 103, 104, 0, 0, 0, 0, 0, 65, 26, 105, 106, 84, 74, 107, 0, 0, - 22, 108, 0, 109, 74, 110, 111, 0, 0, 112, 0, 0, 0, 0, 84, 113, - 74, 27, 114, 115, 0, 0, 0, 0, 0, 105, 116, 0, 0, 117, 118, 0, - 0, 0, 0, 0, 0, 119, 120, 0, 0, 121, 39, 0, 0, 122, 0, 0, - 59, 123, 0, 0, 0, 0, 0, 0, 0, 124, 0, 0, 125, 126, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 127, 0, 128, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 129, 0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, - 0, 0, 0, 131, 132, 133, 0, 0, 0, 0, 134, 0, 0, 0, 0, 0, - 1, 135, 1, 136, 137, 138, 0, 0, 139, 140, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 141, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, -}; - -static RE_UINT8 re_grapheme_extend_stage_5[] = { - 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 0, 0, 248, 3, 0, 0, - 0, 0, 254, 255, 255, 255, 255, 191, 182, 0, 0, 0, 0, 0, 255, 7, - 0, 248, 255, 255, 0, 0, 1, 0, 0, 0, 192, 159, 159, 61, 0, 0, - 0, 0, 2, 0, 0, 0, 255, 255, 255, 7, 0, 0, 192, 255, 1, 0, - 0, 248, 15, 0, 0, 0, 192, 251, 239, 62, 0, 0, 0, 0, 0, 14, - 0, 0, 240, 255, 251, 255, 255, 255, 7, 0, 0, 0, 0, 0, 0, 20, - 254, 33, 254, 0, 12, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 80, - 30, 32, 128, 0, 6, 0, 0, 0, 0, 0, 0, 16, 134, 57, 2, 0, - 0, 0, 35, 0, 190, 33, 0, 0, 0, 0, 0, 208, 30, 32, 192, 0, - 4, 0, 0, 0, 0, 0, 0, 64, 1, 32, 128, 0, 1, 0, 0, 0, - 0, 0, 0, 192, 193, 61, 96, 0, 0, 0, 0, 144, 68, 48, 96, 0, - 0, 132, 92, 128, 0, 0, 242, 7, 128, 127, 0, 0, 0, 0, 242, 27, - 0, 63, 0, 0, 0, 0, 0, 3, 0, 0, 160, 2, 0, 0, 254, 127, - 223, 224, 255, 254, 255, 255, 255, 31, 64, 0, 0, 0, 0, 224, 253, 102, - 0, 0, 0, 195, 1, 0, 30, 0, 100, 32, 0, 32, 0, 0, 0, 224, - 0, 0, 28, 0, 0, 0, 12, 0, 0, 0, 176, 63, 64, 254, 15, 32, - 0, 56, 0, 0, 96, 0, 0, 0, 0, 2, 0, 0, 135, 1, 4, 14, - 0, 0, 128, 9, 0, 0, 64, 127, 229, 31, 248, 159, 0, 0, 255, 127, - 15, 0, 0, 0, 0, 0, 208, 23, 3, 0, 0, 0, 60, 59, 0, 0, - 64, 163, 3, 0, 0, 240, 207, 0, 0, 0, 247, 255, 253, 33, 16, 3, - 255, 255, 63, 248, 0, 16, 0, 0, 255, 255, 1, 0, 0, 128, 3, 0, - 0, 0, 0, 128, 0, 252, 0, 0, 0, 0, 0, 6, 0, 128, 247, 63, - 0, 0, 3, 0, 68, 8, 0, 0, 48, 0, 0, 0, 255, 255, 3, 0, - 192, 63, 0, 0, 128, 255, 3, 0, 0, 0, 200, 19, 32, 0, 0, 0, - 0, 126, 102, 0, 8, 16, 0, 0, 0, 0, 157, 193, 0, 48, 64, 0, - 32, 33, 0, 0, 0, 0, 0, 32, 0, 0, 192, 7, 110, 240, 0, 0, - 0, 0, 0, 135, 0, 0, 0, 255, 127, 0, 0, 0, 0, 0, 120, 6, - 128, 239, 31, 0, 0, 0, 8, 0, 0, 0, 192, 127, 0, 28, 0, 0, - 0, 128, 211, 64, 248, 7, 0, 0, 1, 0, 128, 0, 192, 31, 31, 0, - 92, 0, 0, 0, 0, 0, 249, 165, 13, 0, 0, 0, 0, 128, 60, 176, - 1, 0, 0, 48, 0, 0, 248, 167, 0, 40, 191, 0, 188, 15, 0, 0, - 0, 0, 127, 191, 0, 0, 252, 255, 255, 252, 109, 0, 0, 0, 31, 0, - 0, 0, 127, 0, 0, 128, 7, 0, 0, 0, 0, 96, 160, 195, 7, 248, - 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 127, 248, - 255, 31, 32, 0, 16, 0, 0, 248, 254, 255, 0, 0, 127, 255, 255, 249, - 219, 7, 0, 0, 240, 7, 0, 0, -}; - -/* Grapheme_Extend: 1353 bytes. */ - -RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_grapheme_extend_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_grapheme_extend_stage_2[pos + f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_grapheme_extend_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_grapheme_extend_stage_4[pos + f] << 5; - pos += code; - value = (re_grapheme_extend_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Grapheme_Base. */ - -static RE_UINT8 re_grapheme_base_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, -}; - -static RE_UINT8 re_grapheme_base_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 13, 13, - 13, 13, 13, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 15, 13, 16, 17, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 19, 19, 13, 32, 19, 19, - 19, 33, 19, 19, 19, 19, 19, 19, 19, 19, 34, 35, 13, 13, 13, 13, - 13, 36, 37, 19, 19, 19, 19, 19, 19, 19, 19, 19, 38, 19, 19, 39, - 19, 19, 19, 19, 40, 41, 42, 19, 19, 19, 43, 44, 45, 46, 47, 19, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 48, 13, 13, 13, 49, 50, 13, - 13, 13, 13, 51, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 52, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, -}; - -static RE_UINT8 re_grapheme_base_stage_3[] = { - 0, 1, 2, 2, 2, 2, 3, 4, 2, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 2, 2, 30, 31, 32, 33, 2, 2, 2, 2, 2, 34, 35, 36, - 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 2, 47, 2, 2, 48, 49, - 50, 51, 2, 52, 2, 2, 2, 53, 54, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 55, 56, 57, 58, 59, 60, 61, 62, 2, 63, - 64, 65, 66, 67, 68, 53, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 69, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 70, - 2, 71, 2, 2, 72, 73, 2, 74, 75, 76, 77, 78, 79, 80, 81, 82, - 2, 2, 2, 2, 2, 2, 2, 83, 84, 84, 84, 84, 84, 84, 84, 84, - 84, 84, 2, 2, 85, 86, 87, 88, 2, 2, 89, 90, 91, 92, 93, 94, - 95, 96, 97, 98, 84, 99, 100, 101, 2, 102, 103, 84, 2, 2, 104, 84, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 84, 84, 115, 84, 84, 84, - 116, 117, 118, 119, 120, 121, 122, 84, 123, 124, 84, 125, 126, 127, 128, 84, - 84, 129, 84, 84, 84, 130, 84, 84, 131, 132, 84, 84, 84, 84, 84, 84, - 2, 2, 2, 2, 2, 2, 2, 133, 134, 2, 135, 84, 84, 84, 84, 84, - 136, 84, 84, 84, 84, 84, 84, 84, 2, 2, 2, 2, 137, 84, 84, 84, - 2, 2, 2, 2, 138, 139, 140, 141, 84, 84, 84, 84, 84, 84, 142, 143, - 2, 2, 2, 2, 2, 2, 2, 144, 2, 2, 2, 2, 2, 145, 84, 84, - 146, 84, 84, 84, 84, 84, 84, 84, 147, 148, 84, 84, 84, 84, 84, 84, - 2, 149, 150, 151, 152, 84, 153, 84, 154, 155, 156, 2, 2, 157, 2, 158, - 2, 2, 2, 2, 159, 160, 84, 84, 2, 161, 162, 84, 84, 84, 84, 84, - 84, 84, 84, 84, 163, 164, 84, 84, 165, 166, 167, 168, 169, 84, 2, 2, - 2, 2, 2, 2, 2, 170, 171, 172, 173, 174, 175, 176, 84, 84, 84, 84, - 2, 2, 2, 2, 2, 177, 2, 2, 2, 2, 2, 2, 2, 2, 178, 2, - 179, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 180, 84, 84, - 2, 2, 2, 2, 181, 84, 84, 84, -}; - -static RE_UINT8 re_grapheme_base_stage_4[] = { - 0, 0, 1, 1, 1, 1, 1, 2, 0, 0, 3, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 4, - 5, 1, 6, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 8, 1, 9, 8, 1, 10, 0, 0, 11, 12, 1, 13, 14, - 15, 16, 1, 1, 13, 0, 1, 8, 1, 1, 1, 1, 1, 17, 18, 1, - 19, 20, 1, 0, 21, 1, 1, 1, 1, 1, 22, 23, 1, 1, 13, 24, - 1, 25, 26, 2, 1, 27, 0, 0, 0, 0, 1, 28, 0, 0, 0, 0, - 29, 1, 1, 30, 31, 32, 33, 1, 34, 35, 36, 37, 38, 39, 40, 41, - 42, 35, 36, 43, 44, 45, 15, 46, 47, 6, 36, 48, 49, 44, 40, 50, - 51, 35, 36, 52, 53, 39, 40, 54, 55, 56, 57, 58, 59, 44, 15, 13, - 60, 20, 36, 61, 62, 63, 40, 64, 65, 20, 36, 66, 67, 11, 40, 68, - 69, 20, 1, 70, 71, 72, 40, 1, 73, 74, 1, 75, 76, 77, 15, 46, - 8, 1, 1, 78, 79, 41, 0, 0, 80, 81, 82, 83, 84, 85, 0, 0, - 1, 4, 1, 86, 87, 1, 88, 89, 90, 0, 0, 91, 92, 13, 0, 0, - 1, 1, 88, 93, 1, 94, 8, 95, 96, 3, 1, 1, 97, 1, 1, 1, - 1, 1, 1, 1, 98, 99, 1, 1, 98, 1, 1, 100, 101, 102, 1, 1, - 1, 101, 1, 1, 1, 13, 1, 88, 1, 103, 1, 1, 1, 1, 1, 104, - 1, 88, 1, 1, 1, 1, 1, 105, 3, 106, 1, 107, 1, 106, 3, 44, - 1, 1, 1, 108, 109, 110, 103, 103, 13, 103, 1, 1, 1, 1, 1, 54, - 111, 1, 112, 1, 1, 1, 1, 22, 1, 2, 113, 114, 115, 1, 19, 14, - 1, 1, 41, 1, 103, 116, 1, 1, 1, 117, 1, 1, 1, 118, 119, 120, - 103, 103, 19, 0, 0, 0, 0, 0, 121, 1, 1, 122, 123, 1, 13, 110, - 124, 1, 125, 1, 1, 1, 126, 127, 1, 1, 41, 128, 129, 1, 1, 1, - 105, 0, 0, 0, 54, 130, 131, 132, 1, 1, 1, 1, 0, 0, 0, 0, - 1, 104, 1, 1, 104, 133, 1, 19, 1, 1, 1, 134, 134, 135, 1, 136, - 13, 1, 137, 1, 1, 1, 0, 33, 2, 88, 1, 2, 0, 0, 0, 0, - 41, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, - 1, 1, 76, 0, 13, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 138, - 1, 139, 1, 129, 36, 106, 140, 0, 1, 1, 2, 1, 1, 2, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 141, 1, 1, 97, 1, 1, 1, 137, 44, - 1, 76, 142, 142, 142, 142, 0, 0, 1, 1, 1, 1, 14, 0, 0, 0, - 1, 143, 1, 1, 1, 1, 1, 144, 1, 1, 1, 1, 1, 22, 0, 41, - 1, 1, 103, 1, 8, 1, 1, 1, 1, 145, 1, 1, 1, 1, 1, 1, - 146, 1, 19, 8, 1, 1, 1, 1, 2, 1, 1, 13, 1, 1, 144, 1, - 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 22, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 22, 0, 0, 88, 1, 1, 1, 76, 1, 1, 1, - 1, 1, 41, 0, 1, 1, 2, 147, 1, 19, 1, 1, 1, 1, 1, 148, - 1, 1, 2, 54, 0, 0, 0, 149, 150, 1, 151, 103, 1, 1, 1, 54, - 1, 1, 1, 1, 152, 103, 0, 153, 1, 1, 154, 1, 76, 155, 1, 88, - 29, 1, 1, 156, 157, 158, 134, 2, 1, 1, 159, 160, 161, 85, 1, 162, - 1, 1, 1, 163, 164, 165, 166, 22, 167, 168, 142, 1, 1, 1, 22, 1, - 1, 1, 1, 1, 1, 1, 169, 103, 1, 1, 144, 1, 145, 1, 1, 41, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 19, 1, - 1, 1, 1, 1, 1, 103, 0, 0, 76, 170, 1, 171, 172, 1, 1, 1, - 1, 1, 1, 1, 106, 29, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, - 1, 124, 1, 1, 54, 0, 0, 19, 0, 103, 0, 1, 1, 173, 174, 134, - 1, 1, 1, 1, 1, 1, 1, 88, 8, 1, 1, 1, 1, 1, 1, 1, - 1, 19, 1, 2, 175, 176, 142, 177, 162, 1, 102, 178, 19, 19, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 13, 179, 1, 1, 180, 1, 1, 1, 1, - 2, 41, 44, 0, 0, 1, 1, 88, 1, 88, 1, 1, 1, 44, 8, 41, - 1, 1, 144, 1, 13, 1, 1, 22, 1, 157, 1, 1, 181, 22, 0, 0, - 1, 19, 103, 1, 1, 181, 1, 41, 1, 1, 54, 1, 1, 1, 182, 0, - 1, 1, 1, 76, 1, 22, 54, 0, 183, 1, 1, 184, 1, 185, 1, 1, - 1, 2, 149, 0, 0, 0, 1, 186, 1, 187, 1, 58, 0, 0, 0, 0, - 1, 1, 1, 188, 1, 124, 1, 1, 44, 189, 1, 144, 54, 105, 1, 1, - 1, 1, 0, 0, 1, 1, 190, 76, 1, 1, 1, 191, 1, 139, 1, 192, - 1, 193, 194, 0, 0, 0, 0, 0, 1, 1, 1, 1, 105, 0, 0, 0, - 1, 1, 1, 120, 1, 1, 1, 7, 0, 0, 0, 0, 0, 0, 1, 2, - 20, 1, 1, 54, 195, 124, 1, 0, 124, 1, 1, 196, 106, 1, 105, 103, - 29, 1, 197, 15, 144, 1, 1, 198, 124, 1, 1, 199, 61, 1, 8, 14, - 1, 6, 2, 200, 0, 0, 0, 0, 201, 157, 103, 1, 1, 2, 120, 103, - 51, 35, 36, 202, 203, 204, 144, 0, 1, 1, 1, 54, 205, 206, 0, 0, - 1, 1, 1, 207, 208, 103, 0, 0, 1, 1, 2, 209, 8, 41, 0, 0, - 1, 1, 1, 210, 62, 103, 88, 0, 1, 1, 211, 212, 103, 0, 0, 0, - 1, 103, 213, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 214, - 0, 0, 0, 0, 1, 1, 1, 105, 36, 1, 1, 11, 22, 1, 88, 1, - 1, 0, 215, 216, 0, 0, 0, 0, 1, 103, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 2, 14, 1, 1, 1, 1, 144, 0, 0, 0, - 1, 1, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 76, 0, 0, 0, - 1, 1, 1, 105, 1, 2, 158, 0, 0, 0, 0, 0, 0, 1, 19, 217, - 1, 1, 1, 149, 22, 143, 6, 218, 1, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 14, 1, 1, 2, 0, 29, 0, 0, 0, 0, 44, 0, - 1, 1, 1, 1, 1, 1, 88, 0, 1, 1, 1, 1, 1, 1, 1, 120, - 106, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 13, 88, - 105, 219, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 22, - 1, 1, 9, 1, 1, 1, 220, 0, 221, 1, 158, 1, 1, 1, 105, 0, - 1, 1, 1, 1, 222, 0, 0, 0, 1, 1, 1, 1, 1, 76, 1, 106, - 1, 1, 1, 1, 1, 134, 1, 1, 1, 3, 223, 30, 224, 1, 1, 1, - 225, 226, 1, 227, 228, 20, 1, 1, 1, 1, 139, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 166, 1, 1, 1, 0, 0, 0, 229, 0, 0, 21, 134, - 230, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 111, 0, 0, 0, - 1, 1, 1, 1, 144, 158, 0, 0, 224, 1, 231, 232, 233, 234, 235, 236, - 143, 41, 237, 41, 0, 0, 0, 106, 1, 1, 41, 1, 1, 1, 1, 1, - 1, 144, 2, 8, 8, 8, 1, 22, 88, 1, 2, 1, 1, 1, 41, 1, - 1, 1, 88, 0, 0, 0, 15, 1, 120, 1, 1, 41, 105, 106, 0, 0, - 1, 1, 1, 1, 1, 120, 88, 76, 1, 1, 1, 1, 1, 1, 1, 144, - 1, 1, 1, 1, 1, 14, 0, 0, 41, 1, 1, 1, 54, 103, 1, 1, - 54, 1, 19, 0, 0, 0, 0, 0, 0, 2, 54, 238, 41, 2, 0, 0, - 1, 106, 0, 0, 44, 0, 0, 0, 1, 1, 1, 1, 1, 76, 0, 0, - 1, 1, 1, 14, 1, 1, 1, 1, 1, 19, 1, 1, 1, 1, 1, 1, - 1, 1, 106, 0, 0, 0, 0, 0, 1, 19, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_grapheme_base_stage_5[] = { - 0, 0, 255, 255, 255, 127, 255, 223, 255, 252, 240, 215, 251, 255, 7, 252, - 254, 255, 127, 254, 255, 230, 0, 64, 73, 0, 255, 7, 31, 0, 192, 255, - 0, 200, 63, 64, 96, 194, 255, 63, 253, 255, 0, 224, 63, 0, 2, 0, - 240, 7, 63, 4, 16, 1, 255, 65, 223, 63, 248, 255, 255, 235, 1, 222, - 1, 255, 243, 255, 237, 159, 249, 255, 255, 253, 197, 163, 129, 89, 0, 176, - 195, 255, 255, 15, 232, 135, 109, 195, 1, 0, 0, 94, 28, 0, 232, 191, - 237, 227, 1, 26, 3, 2, 236, 159, 237, 35, 129, 25, 255, 0, 232, 199, - 61, 214, 24, 199, 255, 131, 198, 29, 238, 223, 255, 35, 30, 0, 0, 7, - 0, 255, 237, 223, 239, 99, 155, 13, 6, 0, 236, 223, 255, 167, 193, 221, - 112, 255, 236, 255, 127, 252, 251, 47, 127, 0, 3, 127, 13, 128, 127, 128, - 150, 37, 240, 254, 174, 236, 13, 32, 95, 0, 255, 243, 95, 253, 255, 254, - 255, 31, 0, 128, 32, 31, 0, 192, 191, 223, 2, 153, 255, 60, 225, 255, - 155, 223, 191, 32, 255, 61, 127, 61, 61, 127, 61, 255, 127, 255, 255, 3, - 63, 63, 255, 1, 3, 0, 99, 0, 79, 192, 191, 1, 240, 31, 159, 255, - 255, 5, 120, 14, 251, 1, 241, 255, 255, 199, 127, 198, 191, 0, 26, 224, - 7, 0, 240, 255, 47, 232, 251, 15, 252, 255, 195, 196, 191, 92, 12, 240, - 48, 248, 255, 227, 8, 0, 2, 222, 111, 0, 255, 170, 223, 255, 207, 239, - 220, 127, 255, 128, 207, 255, 63, 255, 0, 240, 12, 254, 127, 127, 255, 251, - 15, 0, 127, 248, 224, 255, 8, 192, 252, 0, 128, 255, 187, 247, 159, 15, - 15, 192, 252, 63, 63, 192, 12, 128, 55, 236, 255, 191, 255, 195, 255, 129, - 25, 0, 247, 47, 255, 239, 98, 62, 5, 0, 0, 248, 255, 207, 126, 126, - 126, 0, 223, 30, 248, 160, 127, 95, 219, 255, 247, 255, 127, 15, 252, 252, - 252, 28, 0, 48, 255, 183, 135, 255, 143, 255, 15, 255, 15, 128, 63, 253, - 191, 145, 191, 255, 55, 248, 255, 143, 255, 240, 239, 254, 31, 248, 63, 254, - 7, 255, 3, 30, 0, 254, 128, 63, 135, 217, 127, 16, 119, 0, 63, 128, - 44, 63, 127, 189, 237, 163, 158, 57, 1, 224, 163, 255, 255, 43, 6, 90, - 242, 0, 3, 79, 7, 88, 255, 215, 64, 0, 67, 0, 7, 128, 0, 2, - 18, 0, 32, 0, 255, 224, 255, 147, 95, 60, 24, 240, 35, 0, 100, 222, - 239, 255, 191, 231, 223, 223, 255, 123, 95, 252, 128, 7, 239, 15, 150, 254, - 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 238, 251, 249, 127, -}; - -/* Grapheme_Base: 2616 bytes. */ - -RE_UINT32 re_get_grapheme_base(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_grapheme_base_stage_1[f] << 5; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_grapheme_base_stage_2[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_grapheme_base_stage_3[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_grapheme_base_stage_4[pos + f] << 4; - pos += code; - value = (re_grapheme_base_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Grapheme_Link. */ - -static RE_UINT8 re_grapheme_link_stage_1[] = { - 0, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, -}; - -static RE_UINT8 re_grapheme_link_stage_2[] = { - 0, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 6, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, - 0, 0, 8, 0, 9, 10, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_grapheme_link_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 2, 3, 0, 0, 4, 5, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, 0, 0, 8, 0, 9, 10, - 0, 0, 11, 0, 0, 0, 0, 0, 12, 9, 13, 14, 0, 15, 0, 16, - 0, 0, 0, 0, 17, 0, 0, 0, 18, 19, 20, 14, 21, 22, 1, 0, - 23, 23, 0, 17, 17, 24, 25, 0, 17, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_grapheme_link_stage_4[] = { - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 3, 0, 0, - 4, 0, 0, 0, 0, 5, 0, 0, 6, 6, 0, 0, 0, 0, 7, 0, - 0, 0, 0, 8, 0, 0, 4, 0, 0, 9, 0, 10, 0, 0, 0, 11, - 12, 0, 0, 0, 0, 0, 13, 0, 0, 0, 8, 0, 0, 0, 0, 14, - 0, 0, 0, 1, 0, 11, 0, 0, 0, 0, 12, 11, 0, 15, 0, 0, - 0, 16, 0, 0, 0, 17, 0, 0, 0, 0, 0, 2, 0, 0, 18, 0, - 0, 14, 0, 0, 0, 19, 0, 0, -}; - -static RE_UINT8 re_grapheme_link_stage_5[] = { - 0, 0, 0, 0, 0, 32, 0, 0, 0, 4, 0, 0, 0, 0, 0, 4, - 16, 0, 0, 0, 0, 0, 0, 6, 0, 0, 16, 0, 0, 0, 4, 0, - 1, 0, 0, 0, 0, 12, 0, 0, 0, 0, 12, 0, 0, 0, 0, 128, - 64, 0, 0, 0, 0, 0, 8, 0, 0, 0, 64, 0, 0, 0, 0, 2, - 0, 0, 24, 0, 0, 0, 32, 0, 4, 0, 0, 0, 0, 8, 0, 0, -}; - -/* Grapheme_Link: 412 bytes. */ - -RE_UINT32 re_get_grapheme_link(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 14; - code = ch ^ (f << 14); - pos = (RE_UINT32)re_grapheme_link_stage_1[f] << 4; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_grapheme_link_stage_2[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_grapheme_link_stage_3[pos + f] << 2; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_grapheme_link_stage_4[pos + f] << 5; - pos += code; - value = (re_grapheme_link_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* White_Space. */ - -static RE_UINT8 re_white_space_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_white_space_stage_2[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_white_space_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_white_space_stage_4[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 3, 1, 1, 1, 1, 1, 4, 5, 1, 1, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_white_space_stage_5[] = { - 0, 62, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 32, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 255, 7, 0, 0, 0, 131, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, -}; - -/* White_Space: 169 bytes. */ - -RE_UINT32 re_get_white_space(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_white_space_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_white_space_stage_2[pos + f] << 4; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_white_space_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_white_space_stage_4[pos + f] << 6; - pos += code; - value = (re_white_space_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Bidi_Control. */ - -static RE_UINT8 re_bidi_control_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_bidi_control_stage_2[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_bidi_control_stage_3[] = { - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_bidi_control_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 2, 3, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_bidi_control_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, - 0, 192, 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 192, 3, 0, 0, -}; - -/* Bidi_Control: 129 bytes. */ - -RE_UINT32 re_get_bidi_control(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_bidi_control_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_bidi_control_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_bidi_control_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_bidi_control_stage_4[pos + f] << 6; - pos += code; - value = (re_bidi_control_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Join_Control. */ - -static RE_UINT8 re_join_control_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_join_control_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_join_control_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_join_control_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_join_control_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, -}; - -/* Join_Control: 97 bytes. */ - -RE_UINT32 re_get_join_control(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_join_control_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_join_control_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_join_control_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_join_control_stage_4[pos + f] << 6; - pos += code; - value = (re_join_control_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Dash. */ - -static RE_UINT8 re_dash_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_dash_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_dash_stage_3[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 4, 1, 1, 1, - 5, 6, 1, 1, 1, 1, 1, 7, 8, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, -}; - -static RE_UINT8 re_dash_stage_4[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, - 4, 1, 1, 1, 1, 1, 1, 1, 5, 6, 7, 1, 1, 1, 1, 1, - 8, 1, 1, 1, 1, 1, 1, 1, 9, 3, 1, 1, 1, 1, 1, 1, - 10, 1, 11, 1, 1, 1, 1, 1, 12, 13, 1, 1, 14, 1, 1, 1, -}; - -static RE_UINT8 re_dash_stage_5[] = { - 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 4, 0, 0, 0, 0, 0, 64, 1, 0, 0, 0, 0, 0, 0, 0, - 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 128, 4, 0, 0, 0, 12, - 0, 0, 0, 16, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 1, 8, 0, 0, 0, - 0, 32, 0, 0, 0, 0, 0, 0, -}; - -/* Dash: 297 bytes. */ - -RE_UINT32 re_get_dash(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_dash_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_dash_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_dash_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_dash_stage_4[pos + f] << 6; - pos += code; - value = (re_dash_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Hyphen. */ - -static RE_UINT8 re_hyphen_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_hyphen_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_hyphen_stage_3[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, - 4, 1, 1, 1, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, -}; - -static RE_UINT8 re_hyphen_stage_4[] = { - 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, - 4, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 6, 1, 1, 1, 1, 1, 7, 1, 1, 8, 9, 1, 1, -}; - -static RE_UINT8 re_hyphen_stage_5[] = { - 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 4, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, - 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, -}; - -/* Hyphen: 241 bytes. */ - -RE_UINT32 re_get_hyphen(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_hyphen_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_hyphen_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_hyphen_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_hyphen_stage_4[pos + f] << 6; - pos += code; - value = (re_hyphen_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Quotation_Mark. */ - -static RE_UINT8 re_quotation_mark_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_quotation_mark_stage_2[] = { - 0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_quotation_mark_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 1, 1, 3, 4, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 5, -}; - -static RE_UINT8 re_quotation_mark_stage_4[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, - 5, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 7, 8, 1, 1, -}; - -static RE_UINT8 re_quotation_mark_stage_5[] = { - 0, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 255, 0, 0, 0, 6, - 4, 0, 0, 0, 0, 0, 0, 0, 0, 240, 0, 224, 0, 0, 0, 0, - 30, 0, 0, 0, 0, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 12, 0, 0, 0, -}; - -/* Quotation_Mark: 209 bytes. */ - -RE_UINT32 re_get_quotation_mark(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_quotation_mark_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_quotation_mark_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_quotation_mark_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_quotation_mark_stage_4[pos + f] << 6; - pos += code; - value = (re_quotation_mark_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Terminal_Punctuation. */ - -static RE_UINT8 re_terminal_punctuation_stage_1[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, -}; - -static RE_UINT8 re_terminal_punctuation_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 13, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 14, - 15, 9, 16, 9, 17, 18, 9, 19, 9, 20, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 21, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, - 9, 9, 9, 9, 9, 9, 23, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, -}; - -static RE_UINT8 re_terminal_punctuation_stage_3[] = { - 0, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 4, 5, 6, 7, 8, - 9, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, 1, 12, 1, - 13, 1, 1, 1, 1, 1, 14, 1, 1, 1, 1, 1, 15, 16, 17, 18, - 19, 1, 20, 1, 1, 21, 22, 1, 23, 1, 1, 1, 1, 1, 1, 1, - 24, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 25, 1, 1, 1, 26, 1, 1, 1, 1, 1, 1, 1, - 1, 27, 1, 1, 28, 29, 1, 1, 30, 31, 32, 33, 34, 35, 1, 36, - 1, 1, 1, 1, 37, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 39, - 40, 1, 41, 1, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 1, 1, - 52, 1, 1, 53, 54, 1, 55, 1, 56, 1, 1, 1, 1, 1, 1, 1, - 57, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 58, 59, 60, 1, - 1, 41, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 61, 1, 1, -}; - -static RE_UINT8 re_terminal_punctuation_stage_4[] = { - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, - 4, 0, 5, 0, 6, 0, 0, 0, 0, 0, 7, 0, 8, 0, 0, 0, - 0, 0, 0, 9, 0, 10, 2, 0, 0, 0, 0, 11, 0, 0, 12, 0, - 13, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 15, 0, 0, 0, 16, - 0, 0, 0, 17, 0, 18, 0, 0, 0, 0, 19, 0, 20, 0, 0, 0, - 0, 0, 11, 0, 0, 21, 0, 0, 0, 0, 22, 0, 0, 23, 0, 24, - 0, 25, 26, 0, 0, 27, 28, 0, 29, 0, 0, 0, 0, 0, 0, 24, - 30, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 32, 0, 0, 33, 0, - 0, 34, 0, 0, 0, 0, 26, 0, 0, 0, 35, 0, 0, 0, 36, 37, - 0, 0, 0, 38, 0, 0, 39, 0, 1, 0, 0, 40, 36, 0, 41, 0, - 0, 0, 42, 0, 36, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 43, - 0, 44, 0, 0, 45, 0, 0, 0, 0, 0, 46, 0, 0, 24, 47, 0, - 0, 0, 48, 0, 0, 0, 49, 0, 0, 50, 0, 0, 0, 4, 0, 0, - 0, 0, 51, 0, 0, 0, 52, 0, 0, 0, 29, 0, 0, 53, 0, 0, - 0, 0, 48, 54, 0, 0, 0, 55, 0, 0, 0, 33, 0, 0, 0, 56, - 0, 57, 58, 0, 59, 0, 0, 0, -}; - -static RE_UINT8 re_terminal_punctuation_stage_5[] = { - 0, 0, 0, 0, 2, 80, 0, 140, 0, 0, 0, 64, 128, 0, 0, 0, - 0, 2, 0, 0, 8, 0, 0, 0, 0, 16, 0, 136, 0, 0, 16, 0, - 255, 23, 0, 0, 0, 0, 0, 3, 0, 0, 255, 127, 48, 0, 0, 0, - 0, 0, 0, 12, 0, 225, 7, 0, 0, 12, 0, 0, 254, 1, 0, 0, - 0, 96, 0, 0, 0, 56, 0, 0, 0, 0, 96, 0, 0, 0, 112, 4, - 60, 3, 0, 0, 0, 15, 0, 0, 0, 0, 0, 236, 0, 0, 0, 248, - 0, 0, 0, 192, 0, 0, 0, 48, 128, 3, 0, 0, 0, 64, 0, 16, - 2, 0, 0, 0, 6, 0, 0, 0, 0, 224, 0, 0, 0, 0, 248, 0, - 0, 0, 192, 0, 0, 192, 0, 0, 0, 128, 0, 0, 0, 0, 0, 224, - 0, 0, 0, 128, 0, 0, 3, 0, 0, 8, 0, 0, 0, 0, 247, 0, - 18, 0, 0, 0, 0, 0, 1, 0, 0, 0, 128, 0, 0, 0, 63, 0, - 0, 0, 0, 252, 0, 0, 0, 30, 128, 63, 0, 0, 3, 0, 0, 0, - 14, 0, 0, 0, 96, 32, 0, 192, 0, 0, 0, 31, 0, 56, 0, 8, - 60, 254, 255, 0, 0, 0, 0, 112, 0, 0, 2, 0, 0, 0, 31, 0, - 0, 0, 32, 0, 0, 0, 128, 3, 16, 0, 0, 0, 128, 7, 0, 0, -}; - -/* Terminal_Punctuation: 874 bytes. */ - -RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_terminal_punctuation_stage_1[f] << 5; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_terminal_punctuation_stage_2[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_terminal_punctuation_stage_3[pos + f] << 2; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_terminal_punctuation_stage_4[pos + f] << 5; - pos += code; - value = (re_terminal_punctuation_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Math. */ - -static RE_UINT8 re_other_math_stage_1[] = { - 0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, -}; - -static RE_UINT8 re_other_math_stage_2[] = { - 0, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 6, 1, 1, -}; - -static RE_UINT8 re_other_math_stage_3[] = { - 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 4, 1, 5, 1, 6, 7, 8, 1, 9, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 10, 11, 1, 1, 1, 1, 12, 13, 14, 15, - 1, 1, 1, 1, 1, 1, 16, 1, -}; - -static RE_UINT8 re_other_math_stage_4[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 8, 0, 9, 10, - 11, 12, 13, 0, 14, 15, 16, 17, 18, 0, 0, 0, 0, 19, 20, 21, - 0, 0, 0, 0, 0, 22, 23, 24, 25, 0, 26, 27, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 25, 28, 0, 0, 0, 0, 29, 0, 30, 31, - 0, 0, 0, 32, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, - 34, 34, 35, 34, 36, 37, 38, 34, 39, 40, 41, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 42, 43, 44, 35, 35, 45, 45, 46, 46, 47, 34, - 38, 48, 49, 50, 51, 52, 0, 0, -}; - -static RE_UINT8 re_other_math_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 39, 0, 0, 0, 51, 0, - 0, 0, 64, 0, 0, 0, 28, 0, 1, 0, 0, 0, 30, 0, 0, 96, - 0, 96, 0, 0, 0, 0, 255, 31, 98, 248, 0, 0, 132, 252, 47, 62, - 16, 179, 251, 241, 224, 3, 0, 0, 0, 0, 224, 243, 182, 62, 195, 240, - 255, 63, 235, 47, 48, 0, 0, 0, 0, 15, 0, 0, 0, 0, 176, 0, - 0, 0, 1, 0, 4, 0, 0, 0, 3, 192, 127, 240, 193, 140, 15, 0, - 148, 31, 0, 0, 96, 0, 0, 0, 5, 0, 0, 0, 15, 96, 0, 0, - 192, 255, 0, 0, 248, 255, 255, 1, 0, 0, 0, 15, 0, 0, 0, 48, - 10, 1, 0, 0, 0, 0, 0, 80, 255, 255, 255, 255, 255, 255, 223, 255, - 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, - 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, - 255, 255, 255, 247, 255, 127, 255, 255, 255, 253, 255, 255, 247, 207, 255, 255, - 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, - 238, 251, 255, 15, -}; - -/* Other_Math: 502 bytes. */ - -RE_UINT32 re_get_other_math(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_other_math_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_other_math_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_other_math_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_other_math_stage_4[pos + f] << 5; - pos += code; - value = (re_other_math_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Hex_Digit. */ - -static RE_UINT8 re_hex_digit_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_hex_digit_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_hex_digit_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 2, -}; - -static RE_UINT8 re_hex_digit_stage_4[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 1, -}; - -static RE_UINT8 re_hex_digit_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, 0, 0, 0, 0, -}; - -/* Hex_Digit: 129 bytes. */ - -RE_UINT32 re_get_hex_digit(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_hex_digit_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_hex_digit_stage_2[pos + f] << 3; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_hex_digit_stage_3[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_hex_digit_stage_4[pos + f] << 7; - pos += code; - value = (re_hex_digit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* ASCII_Hex_Digit. */ - -static RE_UINT8 re_ascii_hex_digit_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_ascii_hex_digit_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_ascii_hex_digit_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_ascii_hex_digit_stage_4[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_ascii_hex_digit_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -/* ASCII_Hex_Digit: 97 bytes. */ - -RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_ascii_hex_digit_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_ascii_hex_digit_stage_2[pos + f] << 3; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_ascii_hex_digit_stage_3[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_ascii_hex_digit_stage_4[pos + f] << 7; - pos += code; - value = (re_ascii_hex_digit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Alphabetic. */ - -static RE_UINT8 re_other_alphabetic_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_other_alphabetic_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 7, 3, 3, 3, 3, 8, 3, 3, 3, 3, 9, 3, 3, 10, 11, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_other_alphabetic_stage_3[] = { - 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 0, 0, 14, 0, 0, 0, 15, 16, 17, 18, 19, 20, 21, 0, 0, - 0, 0, 0, 0, 22, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 24, 0, 25, 26, 27, 28, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 0, 0, 0, 0, - 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, - 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 40, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 42, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 0, 0, 0, - 44, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, - 0, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_alphabetic_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 3, 0, 4, 0, 5, 6, 0, 0, 7, 8, - 9, 10, 0, 0, 0, 11, 0, 0, 12, 13, 0, 0, 0, 0, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 19, 20, 21, 23, 24, 20, 21, 25, 19, - 20, 21, 26, 19, 27, 21, 28, 0, 16, 21, 29, 19, 20, 21, 29, 19, - 20, 21, 30, 19, 19, 0, 31, 32, 0, 33, 34, 0, 0, 35, 34, 0, - 0, 0, 0, 36, 37, 38, 0, 0, 0, 39, 40, 41, 42, 0, 0, 0, - 0, 0, 43, 0, 0, 0, 0, 0, 32, 32, 32, 32, 0, 44, 45, 0, - 0, 0, 0, 0, 46, 47, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, - 49, 0, 50, 51, 0, 0, 0, 0, 52, 53, 16, 0, 54, 55, 0, 56, - 0, 57, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 58, - 0, 0, 0, 0, 0, 44, 59, 60, 0, 0, 0, 0, 0, 0, 0, 59, - 0, 0, 0, 61, 21, 0, 0, 0, 0, 62, 0, 0, 63, 14, 64, 0, - 0, 65, 66, 0, 16, 14, 0, 0, 0, 67, 68, 0, 0, 69, 0, 70, - 0, 0, 0, 0, 0, 0, 0, 71, 72, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 73, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, - 54, 75, 76, 0, 27, 77, 0, 0, 54, 66, 0, 0, 54, 78, 0, 0, - 0, 79, 0, 0, 0, 0, 43, 45, 16, 21, 22, 19, 0, 0, 0, 0, - 0, 53, 80, 0, 0, 10, 63, 0, 0, 0, 0, 0, 0, 81, 82, 0, - 0, 83, 84, 0, 0, 85, 0, 0, 86, 87, 0, 0, 0, 0, 0, 0, - 0, 88, 0, 0, 89, 90, 0, 0, 0, 91, 0, 0, 0, 0, 0, 0, - 0, 0, 36, 92, 0, 0, 0, 0, 0, 0, 0, 0, 72, 0, 0, 0, - 93, 94, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, 0, 0, 0, - 0, 10, 96, 96, 60, 0, 0, 0, -}; - -static RE_UINT8 re_other_alphabetic_stage_5[] = { - 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 255, 191, 182, 0, 0, 0, - 0, 0, 255, 7, 0, 248, 255, 254, 0, 0, 1, 0, 0, 0, 192, 31, - 158, 33, 0, 0, 0, 0, 2, 0, 0, 0, 255, 255, 192, 255, 1, 0, - 0, 0, 192, 248, 239, 30, 0, 0, 0, 0, 240, 255, 248, 3, 255, 255, - 15, 0, 0, 0, 0, 0, 0, 204, 255, 223, 224, 0, 12, 0, 0, 0, - 14, 0, 0, 0, 0, 0, 0, 192, 159, 25, 128, 0, 135, 25, 2, 0, - 0, 0, 35, 0, 191, 27, 0, 0, 159, 25, 192, 0, 4, 0, 0, 0, - 199, 29, 128, 0, 223, 29, 96, 0, 223, 29, 128, 0, 0, 128, 95, 255, - 0, 0, 12, 0, 0, 0, 242, 7, 0, 32, 0, 0, 0, 0, 242, 27, - 0, 0, 254, 255, 3, 224, 255, 254, 255, 255, 255, 31, 0, 248, 127, 121, - 0, 0, 192, 195, 133, 1, 30, 0, 124, 0, 0, 48, 0, 0, 0, 128, - 0, 0, 192, 255, 255, 1, 0, 0, 96, 0, 0, 0, 0, 2, 0, 0, - 255, 15, 255, 1, 0, 0, 128, 15, 0, 0, 224, 127, 254, 255, 31, 0, - 31, 0, 0, 0, 0, 0, 224, 255, 7, 0, 0, 0, 254, 51, 0, 0, - 128, 255, 3, 0, 240, 255, 63, 0, 128, 255, 31, 0, 255, 255, 255, 255, - 255, 3, 0, 0, 0, 0, 240, 15, 248, 0, 0, 0, 3, 0, 0, 0, - 47, 0, 0, 0, 192, 7, 0, 0, 128, 255, 7, 0, 0, 254, 127, 0, - 8, 48, 0, 0, 0, 0, 157, 65, 0, 248, 32, 0, 248, 7, 0, 0, - 0, 0, 0, 64, 0, 0, 192, 7, 110, 240, 0, 0, 0, 0, 0, 255, - 63, 0, 0, 0, 0, 0, 255, 1, 0, 0, 248, 255, 0, 240, 159, 64, - 59, 0, 0, 0, 0, 128, 63, 127, 0, 0, 0, 48, 0, 0, 255, 127, - 1, 0, 0, 0, 0, 248, 63, 0, 0, 0, 0, 224, 255, 7, 0, 0, - 0, 128, 127, 127, 0, 0, 252, 255, 255, 254, 127, 0, 0, 0, 127, 0, - 255, 255, 255, 127, 127, 255, 255, 249, 219, 7, 0, 0, 128, 0, 0, 0, - 255, 3, 255, 255, -}; - -/* Other_Alphabetic: 1021 bytes. */ - -RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_alphabetic_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_other_alphabetic_stage_2[pos + f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_other_alphabetic_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_other_alphabetic_stage_4[pos + f] << 5; - pos += code; - value = (re_other_alphabetic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Ideographic. */ - -static RE_UINT8 re_ideographic_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_ideographic_stage_2[] = { - 0, 0, 0, 1, 2, 3, 3, 3, 3, 4, 0, 0, 0, 0, 0, 5, - 0, 0, 0, 0, 0, 0, 0, 3, 6, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 8, 9, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_ideographic_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 4, 0, 0, 0, 0, 5, 6, 0, 0, - 2, 2, 2, 7, 2, 8, 0, 0, 2, 2, 2, 9, 2, 2, 2, 2, - 2, 2, 2, 10, 11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12, - 0, 0, 0, 0, 2, 13, 0, 0, -}; - -static RE_UINT8 re_ideographic_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, - 2, 2, 2, 2, 2, 2, 2, 4, 0, 0, 0, 0, 2, 2, 2, 2, - 2, 5, 2, 6, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 7, - 2, 2, 2, 8, 0, 0, 0, 0, 2, 2, 2, 9, 2, 2, 2, 2, - 2, 2, 2, 2, 10, 2, 2, 2, 11, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 12, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_ideographic_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 254, 3, 0, 7, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 63, 0, - 255, 255, 63, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 63, 255, 255, - 255, 255, 255, 3, 0, 0, 0, 0, 255, 255, 255, 255, 255, 31, 0, 0, - 255, 255, 255, 255, 255, 255, 7, 0, 255, 255, 127, 0, 0, 0, 0, 0, - 255, 255, 255, 255, 255, 255, 31, 0, 255, 255, 255, 63, 255, 255, 255, 255, - 255, 255, 255, 255, 3, 0, 0, 0, 255, 255, 255, 63, 0, 0, 0, 0, -}; - -/* Ideographic: 393 bytes. */ - -RE_UINT32 re_get_ideographic(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_ideographic_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_ideographic_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_ideographic_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_ideographic_stage_4[pos + f] << 6; - pos += code; - value = (re_ideographic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Diacritic. */ - -static RE_UINT8 re_diacritic_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_diacritic_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 7, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, - 10, 11, 12, 13, 4, 4, 4, 4, 4, 4, 4, 4, 4, 14, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 15, 4, 4, 16, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_diacritic_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 1, 1, 1, 1, 1, 17, 1, 18, 19, 20, 21, 22, 1, 23, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 24, 1, 25, 1, - 26, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 28, - 29, 30, 31, 32, 1, 1, 1, 1, 1, 1, 1, 33, 1, 1, 34, 35, - 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 37, 1, 1, 1, 1, 1, - 38, 39, 40, 41, 42, 43, 44, 45, 1, 1, 1, 1, 46, 1, 1, 1, - 1, 1, 47, 1, 1, 1, 1, 48, 1, 49, 1, 1, 1, 1, 1, 1, - 50, 51, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_diacritic_stage_4[] = { - 0, 0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 4, 5, 5, 5, 5, 6, 7, 8, 0, 0, 0, - 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 10, 0, 11, 12, 13, 0, - 0, 0, 14, 0, 0, 0, 15, 16, 0, 4, 17, 0, 0, 18, 0, 19, - 20, 0, 0, 0, 0, 0, 0, 21, 0, 22, 23, 24, 0, 22, 25, 0, - 0, 22, 25, 0, 0, 22, 25, 0, 0, 22, 25, 0, 0, 0, 25, 0, - 0, 0, 25, 0, 0, 22, 25, 0, 0, 0, 25, 0, 0, 0, 26, 0, - 0, 0, 27, 0, 0, 0, 28, 0, 20, 29, 0, 0, 30, 0, 31, 0, - 0, 32, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 0, - 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 37, 0, 0, - 0, 38, 39, 40, 0, 41, 0, 0, 0, 42, 0, 43, 0, 0, 4, 44, - 0, 45, 5, 17, 0, 0, 46, 47, 0, 0, 0, 0, 0, 48, 49, 50, - 0, 0, 0, 0, 0, 0, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, - 0, 53, 0, 0, 54, 0, 0, 22, 0, 0, 0, 55, 56, 0, 0, 57, - 58, 59, 0, 0, 60, 0, 0, 20, 0, 0, 0, 0, 0, 0, 39, 61, - 0, 62, 63, 0, 0, 63, 2, 64, 0, 0, 0, 65, 0, 15, 66, 67, - 0, 0, 68, 0, 0, 0, 0, 69, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 70, 0, 0, 0, 0, 0, 0, 0, 1, 2, 71, 72, 0, 0, 73, - 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 74, - 0, 0, 0, 0, 0, 75, 0, 0, 0, 76, 0, 63, 0, 0, 77, 0, - 0, 78, 0, 0, 0, 0, 0, 79, 0, 22, 25, 80, 0, 0, 0, 0, - 0, 0, 81, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 15, 2, 0, - 0, 15, 0, 0, 0, 42, 0, 0, 0, 83, 0, 0, 0, 0, 0, 0, - 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84, - 0, 0, 0, 0, 85, 0, 0, 0, 0, 0, 0, 86, 87, 88, 0, 0, - 0, 0, 0, 0, 0, 0, 89, 0, 0, 0, 90, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_diacritic_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 64, 1, 0, 0, 0, 0, 129, 144, 1, - 0, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 224, 7, 0, 48, 4, - 48, 0, 0, 0, 248, 0, 0, 0, 0, 0, 0, 2, 0, 0, 254, 255, - 251, 255, 255, 191, 22, 0, 0, 0, 0, 248, 135, 1, 0, 0, 0, 128, - 97, 28, 0, 0, 255, 7, 0, 0, 192, 255, 1, 0, 0, 248, 63, 0, - 0, 0, 0, 3, 248, 255, 255, 127, 0, 0, 0, 16, 0, 32, 30, 0, - 0, 0, 2, 0, 0, 32, 0, 0, 0, 4, 0, 0, 128, 95, 0, 0, - 0, 31, 0, 0, 0, 0, 160, 194, 220, 0, 0, 0, 64, 0, 0, 0, - 0, 0, 128, 6, 128, 191, 0, 12, 0, 254, 15, 32, 0, 0, 0, 14, - 0, 0, 224, 159, 0, 0, 255, 63, 0, 0, 16, 0, 16, 0, 0, 0, - 0, 248, 15, 0, 0, 12, 0, 0, 0, 0, 192, 0, 0, 0, 0, 63, - 255, 33, 16, 3, 0, 240, 255, 255, 240, 255, 0, 0, 0, 0, 32, 224, - 0, 0, 0, 160, 3, 224, 0, 224, 0, 224, 0, 96, 0, 128, 3, 0, - 0, 128, 0, 0, 0, 252, 0, 0, 0, 0, 0, 30, 0, 128, 0, 176, - 0, 0, 0, 48, 0, 0, 3, 0, 0, 0, 128, 255, 3, 0, 0, 0, - 0, 1, 0, 0, 255, 255, 3, 0, 0, 120, 0, 0, 0, 0, 8, 0, - 32, 0, 0, 0, 0, 0, 0, 56, 7, 0, 0, 0, 0, 0, 64, 0, - 0, 0, 0, 248, 0, 48, 0, 0, 255, 255, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 192, 8, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 6, - 0, 0, 24, 0, 1, 28, 0, 0, 0, 0, 96, 0, 0, 6, 0, 0, - 192, 31, 31, 0, 68, 0, 0, 0, 12, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 31, 0, 0, 128, 255, 255, 128, 227, 7, 248, 231, 15, 0, 0, - 0, 60, 0, 0, 0, 0, 127, 0, 112, 7, 0, 0, -}; - -/* Diacritic: 1029 bytes. */ - -RE_UINT32 re_get_diacritic(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_diacritic_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_diacritic_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_diacritic_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_diacritic_stage_4[pos + f] << 5; - pos += code; - value = (re_diacritic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Extender. */ - -static RE_UINT8 re_extender_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_extender_stage_2[] = { - 0, 1, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, - 2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_extender_stage_3[] = { - 0, 1, 2, 1, 1, 1, 3, 4, 1, 1, 1, 1, 1, 1, 5, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 7, 1, 8, 1, 1, 1, - 9, 1, 1, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 11, 1, - 1, 12, 13, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 14, - 1, 1, 1, 15, 1, 16, 1, 1, 1, 1, 1, 17, 1, 1, 1, 18, - 1, 19, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_extender_stage_4[] = { - 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 5, 0, - 6, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, - 0, 9, 0, 10, 0, 0, 0, 0, 11, 12, 0, 0, 13, 0, 0, 14, - 15, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 17, 5, 0, 0, 0, 18, 0, 0, 19, 20, - 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 21, 0, 0, 0, 22, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 23, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_extender_stage_5[] = { - 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 3, 0, 1, 0, 0, 0, - 0, 0, 0, 4, 64, 0, 0, 0, 0, 4, 0, 0, 8, 0, 0, 0, - 128, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 8, 32, 0, 0, 0, - 0, 0, 62, 0, 0, 0, 0, 96, 0, 0, 0, 112, 0, 0, 32, 0, - 0, 16, 0, 0, 0, 128, 0, 0, 0, 0, 1, 0, 0, 0, 0, 32, - 0, 0, 24, 0, 192, 1, 0, 0, 12, 0, 0, 0, 112, 0, 0, 0, -}; - -/* Extender: 457 bytes. */ - -RE_UINT32 re_get_extender(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_extender_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_extender_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_extender_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_extender_stage_4[pos + f] << 5; - pos += code; - value = (re_extender_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Lowercase. */ - -static RE_UINT8 re_other_lowercase_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_other_lowercase_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_other_lowercase_stage_3[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, - 4, 2, 5, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 7, 2, 8, 2, 2, -}; - -static RE_UINT8 re_other_lowercase_stage_4[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 4, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, - 0, 8, 9, 0, 0, 10, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, - 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 14, 0, 15, - 0, 0, 0, 0, 0, 16, 0, 0, -}; - -static RE_UINT8 re_other_lowercase_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, - 0, 0, 0, 0, 0, 0, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, - 32, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 240, 255, 255, - 255, 255, 255, 255, 255, 7, 0, 1, 0, 0, 0, 248, 255, 255, 255, 255, - 0, 0, 0, 0, 0, 0, 2, 128, 0, 0, 255, 31, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 255, 3, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 48, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, - 0, 0, 0, 240, 0, 0, 0, 0, -}; - -/* Other_Lowercase: 297 bytes. */ - -RE_UINT32 re_get_other_lowercase(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_lowercase_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_other_lowercase_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_other_lowercase_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_other_lowercase_stage_4[pos + f] << 6; - pos += code; - value = (re_other_lowercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Uppercase. */ - -static RE_UINT8 re_other_uppercase_stage_1[] = { - 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, -}; - -static RE_UINT8 re_other_uppercase_stage_2[] = { - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -}; - -static RE_UINT8 re_other_uppercase_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, - 0, 3, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_uppercase_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 1, 0, 0, 3, 4, 4, 5, 0, 0, 0, -}; - -static RE_UINT8 re_other_uppercase_stage_5[] = { - 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 192, 255, 0, 0, 255, 255, - 255, 3, 255, 255, 255, 3, 0, 0, -}; - -/* Other_Uppercase: 162 bytes. */ - -RE_UINT32 re_get_other_uppercase(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_other_uppercase_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_other_uppercase_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_other_uppercase_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_other_uppercase_stage_4[pos + f] << 5; - pos += code; - value = (re_other_uppercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Noncharacter_Code_Point. */ - -static RE_UINT8 re_noncharacter_code_point_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_noncharacter_code_point_stage_2[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -}; - -static RE_UINT8 re_noncharacter_code_point_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, - 0, 0, 0, 0, 0, 0, 0, 2, -}; - -static RE_UINT8 re_noncharacter_code_point_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 2, -}; - -static RE_UINT8 re_noncharacter_code_point_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 192, -}; - -/* Noncharacter_Code_Point: 121 bytes. */ - -RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_noncharacter_code_point_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_noncharacter_code_point_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_noncharacter_code_point_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_noncharacter_code_point_stage_4[pos + f] << 6; - pos += code; - value = (re_noncharacter_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Grapheme_Extend. */ - -static RE_UINT8 re_other_grapheme_extend_stage_1[] = { - 0, 1, 1, 2, 3, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, -}; - -static RE_UINT8 re_other_grapheme_extend_stage_2[] = { - 0, 1, 0, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 0, 0, - 0, 0, 6, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_grapheme_extend_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 3, 4, 0, 0, - 5, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 8, 9, 10, 0, 0, - 0, 11, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_grapheme_extend_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, - 0, 1, 2, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, - 0, 1, 2, 0, 0, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, - 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, - 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, - 0, 11, 11, 11, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_grapheme_extend_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 128, 0, 4, 0, 96, 0, - 0, 128, 0, 128, 0, 16, 0, 0, 0, 192, 0, 0, 0, 0, 0, 192, - 0, 0, 1, 32, 0, 128, 0, 0, 32, 192, 7, 0, 255, 255, 255, 255, -}; - -/* Other_Grapheme_Extend: 332 bytes. */ - -RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 14; - code = ch ^ (f << 14); - pos = (RE_UINT32)re_other_grapheme_extend_stage_1[f] << 3; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_other_grapheme_extend_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_other_grapheme_extend_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_other_grapheme_extend_stage_4[pos + f] << 5; - pos += code; - value = (re_other_grapheme_extend_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* IDS_Binary_Operator. */ - -static RE_UINT8 re_ids_binary_operator_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_ids_binary_operator_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_ids_binary_operator_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -}; - -static RE_UINT8 re_ids_binary_operator_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -}; - -static RE_UINT8 re_ids_binary_operator_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243, 15, -}; - -/* IDS_Binary_Operator: 97 bytes. */ - -RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_ids_binary_operator_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_ids_binary_operator_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_ids_binary_operator_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_ids_binary_operator_stage_4[pos + f] << 6; - pos += code; - value = (re_ids_binary_operator_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* IDS_Trinary_Operator. */ - -static RE_UINT8 re_ids_trinary_operator_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_ids_trinary_operator_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_ids_trinary_operator_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -}; - -static RE_UINT8 re_ids_trinary_operator_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -}; - -static RE_UINT8 re_ids_trinary_operator_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, -}; - -/* IDS_Trinary_Operator: 97 bytes. */ - -RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_ids_trinary_operator_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_ids_trinary_operator_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_ids_trinary_operator_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_ids_trinary_operator_stage_4[pos + f] << 6; - pos += code; - value = (re_ids_trinary_operator_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Radical. */ - -static RE_UINT8 re_radical_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_radical_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_radical_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -}; - -static RE_UINT8 re_radical_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 2, 2, 3, 2, 2, 2, 2, 2, 2, 4, 0, -}; - -static RE_UINT8 re_radical_stage_5[] = { - 0, 0, 0, 0, 255, 255, 255, 251, 255, 255, 255, 255, 255, 255, 15, 0, - 255, 255, 63, 0, -}; - -/* Radical: 117 bytes. */ - -RE_UINT32 re_get_radical(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_radical_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_radical_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_radical_stage_3[pos + f] << 4; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_radical_stage_4[pos + f] << 5; - pos += code; - value = (re_radical_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Unified_Ideograph. */ - -static RE_UINT8 re_unified_ideograph_stage_1[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_unified_ideograph_stage_2[] = { - 0, 0, 0, 1, 2, 3, 3, 3, 3, 4, 0, 0, 0, 0, 0, 5, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 7, 8, 0, 0, 0, -}; - -static RE_UINT8 re_unified_ideograph_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 3, 0, 0, 0, 0, 0, 4, 0, 0, - 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 6, 7, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 8, -}; - -static RE_UINT8 re_unified_ideograph_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 3, - 4, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 5, 1, 1, 1, 1, - 1, 1, 1, 1, 6, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 8, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_unified_ideograph_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 63, 0, 255, 255, 63, 0, 0, 0, 0, 0, - 0, 192, 26, 128, 154, 3, 0, 0, 255, 255, 127, 0, 0, 0, 0, 0, - 255, 255, 255, 255, 255, 255, 31, 0, 255, 255, 255, 63, 255, 255, 255, 255, - 255, 255, 255, 255, 3, 0, 0, 0, -}; - -/* Unified_Ideograph: 281 bytes. */ - -RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_unified_ideograph_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_unified_ideograph_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_unified_ideograph_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_unified_ideograph_stage_4[pos + f] << 6; - pos += code; - value = (re_unified_ideograph_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Default_Ignorable_Code_Point. */ - -static RE_UINT8 re_other_default_ignorable_code_point_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, - 1, -}; - -static RE_UINT8 re_other_default_ignorable_code_point_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_other_default_ignorable_code_point_stage_3[] = { - 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, - 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, - 7, 8, 8, 8, 8, 8, 8, 8, -}; - -static RE_UINT8 re_other_default_ignorable_code_point_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, - 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 9, 9, 0, 0, 0, 10, - 9, 9, 9, 9, 9, 9, 9, 9, -}; - -static RE_UINT8 re_other_default_ignorable_code_point_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, - 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 1, - 253, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, - 0, 0, 0, 0, 0, 0, 255, 255, -}; - -/* Other_Default_Ignorable_Code_Point: 281 bytes. */ - -RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_4[pos + f] << 6; - pos += code; - value = (re_other_default_ignorable_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Deprecated. */ - -static RE_UINT8 re_deprecated_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, - 1, 1, -}; - -static RE_UINT8 re_deprecated_stage_2[] = { - 0, 1, 2, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_deprecated_stage_3[] = { - 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, - 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, - 5, 0, 0, 6, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_deprecated_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, - 0, 6, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_deprecated_stage_5[] = { - 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 8, 0, 0, 0, 128, 2, - 24, 0, 0, 0, 0, 252, 0, 0, 0, 6, 0, 0, 2, 0, 0, 0, -}; - -/* Deprecated: 226 bytes. */ - -RE_UINT32 re_get_deprecated(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_deprecated_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_deprecated_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_deprecated_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_deprecated_stage_4[pos + f] << 5; - pos += code; - value = (re_deprecated_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Soft_Dotted. */ - -static RE_UINT8 re_soft_dotted_stage_1[] = { - 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, -}; - -static RE_UINT8 re_soft_dotted_stage_2[] = { - 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_soft_dotted_stage_3[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 6, 7, 5, 8, 9, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 11, 12, 13, 5, -}; - -static RE_UINT8 re_soft_dotted_stage_4[] = { - 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, - 0, 0, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, - 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 9, 10, 11, 0, 0, 0, 12, 0, 0, 0, 0, 13, 0, - 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, - 0, 0, 0, 16, 0, 0, 0, 0, 0, 17, 18, 0, 19, 20, 0, 21, - 0, 22, 23, 0, 24, 0, 17, 18, 0, 19, 20, 0, 21, 0, 0, 0, -}; - -static RE_UINT8 re_soft_dotted_stage_5[] = { - 0, 0, 0, 0, 0, 6, 0, 0, 0, 128, 0, 0, 0, 2, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 32, 0, 0, 4, 0, 0, 0, 8, 0, - 0, 0, 64, 1, 4, 0, 0, 0, 0, 0, 64, 0, 16, 1, 0, 0, - 0, 32, 0, 0, 0, 8, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, - 0, 0, 0, 16, 12, 0, 0, 0, 0, 0, 192, 0, 0, 12, 0, 0, - 0, 0, 0, 192, 0, 0, 12, 0, 192, 0, 0, 0, 0, 0, 0, 12, - 0, 192, 0, 0, -}; - -/* Soft_Dotted: 342 bytes. */ - -RE_UINT32 re_get_soft_dotted(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_soft_dotted_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_soft_dotted_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_soft_dotted_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_soft_dotted_stage_4[pos + f] << 5; - pos += code; - value = (re_soft_dotted_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Logical_Order_Exception. */ - -static RE_UINT8 re_logical_order_exception_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_logical_order_exception_stage_2[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_logical_order_exception_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, -}; - -static RE_UINT8 re_logical_order_exception_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_logical_order_exception_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 224, 4, 0, 0, 0, 0, 0, 0, 96, 26, -}; - -/* Logical_Order_Exception: 145 bytes. */ - -RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_logical_order_exception_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_logical_order_exception_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_logical_order_exception_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_logical_order_exception_stage_4[pos + f] << 6; - pos += code; - value = (re_logical_order_exception_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_ID_Start. */ - -static RE_UINT8 re_other_id_start_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_other_id_start_stage_2[] = { - 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_id_start_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_id_start_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_id_start_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 64, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, -}; - -/* Other_ID_Start: 145 bytes. */ - -RE_UINT32 re_get_other_id_start(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_id_start_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_other_id_start_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_other_id_start_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_other_id_start_stage_4[pos + f] << 6; - pos += code; - value = (re_other_id_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_ID_Continue. */ - -static RE_UINT8 re_other_id_continue_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_other_id_continue_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_other_id_continue_stage_3[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 4, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_other_id_continue_stage_4[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 4, -}; - -static RE_UINT8 re_other_id_continue_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, - 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 254, 3, 0, - 0, 0, 0, 4, 0, 0, 0, 0, -}; - -/* Other_ID_Continue: 145 bytes. */ - -RE_UINT32 re_get_other_id_continue(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_id_continue_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_other_id_continue_stage_2[pos + f] << 4; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_other_id_continue_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_other_id_continue_stage_4[pos + f] << 6; - pos += code; - value = (re_other_id_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Sentence_Terminal. */ - -static RE_UINT8 re_sentence_terminal_stage_1[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, -}; - -static RE_UINT8 re_sentence_terminal_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 3, 3, 9, 10, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 12, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 13, - 3, 3, 14, 3, 15, 16, 3, 17, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 18, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 19, - 3, 3, 3, 3, 3, 3, 20, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_sentence_terminal_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, - 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 8, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 10, 1, 11, 1, - 12, 1, 13, 1, 1, 14, 15, 1, 16, 1, 1, 1, 1, 1, 1, 1, - 17, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 18, 1, 1, 1, - 19, 1, 1, 1, 1, 1, 1, 1, 1, 20, 1, 1, 21, 22, 1, 1, - 23, 24, 25, 26, 27, 28, 1, 29, 1, 1, 1, 1, 30, 1, 31, 1, - 1, 1, 1, 1, 32, 1, 1, 1, 33, 34, 35, 36, 37, 38, 1, 1, - 39, 1, 1, 40, 41, 1, 42, 1, 41, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 43, 44, 45, 1, 1, 3, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 46, 1, 1, -}; - -static RE_UINT8 re_sentence_terminal_stage_4[] = { - 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, - 0, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 7, - 0, 0, 8, 0, 0, 0, 0, 9, 0, 0, 0, 10, 0, 11, 0, 0, - 12, 0, 0, 0, 0, 0, 7, 0, 0, 13, 0, 0, 0, 0, 14, 0, - 0, 15, 0, 16, 0, 17, 18, 0, 0, 19, 0, 0, 20, 0, 0, 0, - 0, 0, 0, 3, 21, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, 23, - 0, 0, 21, 0, 0, 24, 0, 0, 0, 0, 25, 0, 0, 0, 26, 0, - 0, 0, 0, 27, 0, 0, 0, 28, 0, 0, 29, 0, 1, 0, 0, 30, - 0, 0, 23, 0, 0, 0, 31, 0, 0, 16, 32, 0, 0, 0, 33, 0, - 0, 0, 34, 0, 0, 35, 0, 0, 0, 2, 0, 0, 0, 0, 36, 0, - 0, 0, 37, 0, 0, 0, 38, 0, 0, 39, 0, 0, 0, 0, 0, 21, - 0, 0, 0, 40, 0, 41, 42, 0, 43, 0, 0, 0, -}; - -static RE_UINT8 re_sentence_terminal_stage_5[] = { - 0, 0, 0, 0, 2, 64, 0, 128, 0, 2, 0, 0, 0, 0, 0, 128, - 0, 0, 16, 0, 7, 0, 0, 0, 0, 0, 0, 2, 48, 0, 0, 0, - 0, 12, 0, 0, 132, 1, 0, 0, 0, 64, 0, 0, 0, 0, 96, 0, - 8, 2, 0, 0, 0, 15, 0, 0, 0, 0, 0, 204, 0, 0, 0, 24, - 0, 0, 0, 192, 0, 0, 0, 48, 128, 3, 0, 0, 0, 64, 0, 16, - 4, 0, 0, 0, 0, 192, 0, 0, 0, 0, 136, 0, 0, 0, 192, 0, - 0, 128, 0, 0, 0, 3, 0, 0, 0, 0, 0, 224, 0, 0, 3, 0, - 0, 8, 0, 0, 0, 0, 196, 0, 2, 0, 0, 0, 128, 1, 0, 0, - 3, 0, 0, 0, 14, 0, 0, 0, 96, 32, 0, 192, 0, 0, 0, 27, - 0, 24, 0, 0, 12, 254, 255, 0, 6, 0, 0, 0, 0, 0, 0, 112, - 0, 0, 32, 0, 0, 0, 128, 1, 16, 0, 0, 0, 0, 1, 0, 0, -}; - -/* Sentence_Terminal: 726 bytes. */ - -RE_UINT32 re_get_sentence_terminal(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_sentence_terminal_stage_1[f] << 5; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_sentence_terminal_stage_2[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_sentence_terminal_stage_3[pos + f] << 2; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_sentence_terminal_stage_4[pos + f] << 5; - pos += code; - value = (re_sentence_terminal_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Variation_Selector. */ - -static RE_UINT8 re_variation_selector_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, - 1, -}; - -static RE_UINT8 re_variation_selector_stage_2[] = { - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_variation_selector_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_variation_selector_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, -}; - -static RE_UINT8 re_variation_selector_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, - 255, 255, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 0, 0, -}; - -/* Variation_Selector: 169 bytes. */ - -RE_UINT32 re_get_variation_selector(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_variation_selector_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_variation_selector_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_variation_selector_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_variation_selector_stage_4[pos + f] << 6; - pos += code; - value = (re_variation_selector_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Pattern_White_Space. */ - -static RE_UINT8 re_pattern_white_space_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_pattern_white_space_stage_2[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_pattern_white_space_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_pattern_white_space_stage_4[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_pattern_white_space_stage_5[] = { - 0, 62, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 32, 0, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 3, 0, 0, -}; - -/* Pattern_White_Space: 129 bytes. */ - -RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_pattern_white_space_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_pattern_white_space_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_pattern_white_space_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_pattern_white_space_stage_4[pos + f] << 6; - pos += code; - value = (re_pattern_white_space_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Pattern_Syntax. */ - -static RE_UINT8 re_pattern_syntax_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_pattern_syntax_stage_2[] = { - 0, 1, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_pattern_syntax_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 3, 4, 4, 5, 4, 4, 6, 4, 4, 4, 4, 1, 1, 7, 1, - 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 10, 1, -}; - -static RE_UINT8 re_pattern_syntax_stage_4[] = { - 0, 1, 2, 2, 0, 3, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, - 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, - 8, 8, 8, 9, 10, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, - 11, 12, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, - 0, 0, 14, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_pattern_syntax_stage_5[] = { - 0, 0, 0, 0, 254, 255, 0, 252, 1, 0, 0, 120, 254, 90, 67, 136, - 0, 0, 128, 0, 0, 0, 255, 255, 255, 0, 255, 127, 254, 255, 239, 127, - 255, 255, 255, 255, 255, 255, 63, 0, 0, 0, 240, 255, 14, 255, 255, 255, - 1, 0, 1, 0, 0, 0, 0, 192, 96, 0, 0, 0, -}; - -/* Pattern_Syntax: 277 bytes. */ - -RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_pattern_syntax_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_pattern_syntax_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_pattern_syntax_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_pattern_syntax_stage_4[pos + f] << 5; - pos += code; - value = (re_pattern_syntax_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Prepended_Concatenation_Mark. */ - -static RE_UINT8 re_prepended_concatenation_mark_stage_1[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, -}; - -static RE_UINT8 re_prepended_concatenation_mark_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_prepended_concatenation_mark_stage_3[] = { - 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_prepended_concatenation_mark_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 0, - 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_prepended_concatenation_mark_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 32, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, -}; - -/* Prepended_Concatenation_Mark: 162 bytes. */ - -RE_UINT32 re_get_prepended_concatenation_mark(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_prepended_concatenation_mark_stage_1[f] << 3; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_prepended_concatenation_mark_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_prepended_concatenation_mark_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_prepended_concatenation_mark_stage_4[pos + f] << 6; - pos += code; - value = (re_prepended_concatenation_mark_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Hangul_Syllable_Type. */ - -static RE_UINT8 re_hangul_syllable_type_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_hangul_syllable_type_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_hangul_syllable_type_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 3, 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, 9, 10, 4, - 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, - 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, - 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, - 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, - 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 11, -}; - -static RE_UINT8 re_hangul_syllable_type_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 4, - 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, - 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, - 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, - 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, - 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, - 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, - 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, - 6, 5, 6, 6, 8, 0, 2, 2, 9, 10, 3, 3, 3, 3, 3, 11, -}; - -static RE_UINT8 re_hangul_syllable_type_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 1, 1, 1, 1, 1, 0, 0, 0, 4, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, - 5, 5, 5, 5, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, - 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, -}; - -/* Hangul_Syllable_Type: 497 bytes. */ - -RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_hangul_syllable_type_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_hangul_syllable_type_stage_2[pos + f] << 4; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_hangul_syllable_type_stage_3[pos + f] << 4; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_hangul_syllable_type_stage_4[pos + f] << 3; - value = re_hangul_syllable_type_stage_5[pos + code]; - - return value; -} - -/* Bidi_Class. */ - -static RE_UINT8 re_bidi_class_stage_1[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 7, - 8, 9, 5, 5, 5, 5, 10, 5, 5, 5, 5, 11, 5, 12, 13, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 16, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, -}; - -static RE_UINT8 re_bidi_class_stage_2[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 2, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 2, 2, 2, 2, 30, 31, 32, 2, 2, 2, 2, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 2, 46, 2, 2, 2, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 53, 53, 53, 58, 53, 53, - 2, 2, 53, 53, 53, 53, 59, 60, 2, 61, 62, 63, 64, 65, 53, 66, - 67, 68, 2, 69, 70, 71, 72, 73, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 74, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 75, 2, 2, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 2, 86, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 87, 88, 88, 88, 89, 90, 91, 92, 93, 94, - 2, 2, 95, 96, 2, 97, 98, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 99, 99, 100, 99, 101, 102, 103, 99, 99, 99, 99, 99, 104, 99, 99, 99, - 105, 106, 107, 108, 109, 110, 111, 2, 112, 113, 2, 114, 115, 116, 117, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 118, 119, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 120, 121, 2, 2, 2, 2, 2, 2, 2, 2, 122, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 123, 2, 2, 2, 2, 2, 2, - 2, 2, 124, 125, 126, 2, 127, 2, 2, 2, 2, 2, 2, 128, 129, 130, - 2, 2, 2, 2, 131, 132, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 133, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 99, 134, 135, 99, 99, 99, 99, 99, 99, 99, 99, 99, 88, 136, 99, 99, - 137, 138, 139, 2, 2, 2, 53, 53, 53, 53, 53, 53, 53, 140, 141, 142, - 143, 144, 145, 146, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 147, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 147, - 148, 148, 149, 150, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, - 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, -}; - -static RE_UINT8 re_bidi_class_stage_3[] = { - 0, 1, 2, 3, 4, 5, 4, 6, 7, 8, 9, 10, 11, 12, 11, 12, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 13, 14, 14, 15, 16, - 17, 17, 17, 17, 17, 17, 17, 18, 19, 11, 11, 11, 11, 11, 11, 20, - 21, 11, 11, 11, 11, 11, 11, 11, 22, 23, 17, 24, 25, 26, 26, 26, - 27, 28, 29, 29, 30, 17, 31, 32, 29, 29, 29, 29, 29, 33, 34, 35, - 29, 36, 29, 17, 28, 29, 29, 29, 29, 29, 37, 32, 26, 26, 38, 39, - 26, 40, 41, 26, 26, 42, 26, 26, 26, 26, 29, 29, 29, 43, 44, 17, - 45, 11, 11, 46, 47, 48, 49, 11, 50, 11, 11, 51, 52, 11, 49, 53, - 54, 11, 11, 51, 55, 50, 11, 56, 54, 11, 11, 51, 57, 11, 49, 58, - 50, 11, 11, 59, 52, 60, 49, 11, 61, 11, 11, 11, 62, 11, 11, 63, - 64, 11, 11, 65, 66, 67, 49, 68, 50, 11, 11, 51, 69, 11, 49, 11, - 50, 11, 11, 11, 52, 11, 49, 11, 11, 11, 11, 11, 70, 71, 11, 11, - 11, 11, 11, 72, 73, 11, 11, 11, 11, 11, 11, 74, 75, 11, 11, 11, - 11, 76, 11, 77, 11, 11, 11, 78, 79, 80, 17, 81, 60, 11, 11, 11, - 11, 11, 82, 83, 11, 84, 64, 85, 86, 87, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 82, 11, 11, 11, 88, 11, 11, 11, 11, 11, 11, - 4, 11, 11, 11, 11, 11, 11, 11, 89, 90, 11, 11, 11, 11, 11, 11, - 11, 91, 11, 91, 11, 49, 11, 49, 11, 11, 11, 92, 93, 94, 11, 88, - 95, 11, 11, 11, 11, 11, 11, 11, 67, 11, 96, 11, 11, 11, 11, 11, - 11, 11, 97, 98, 99, 11, 11, 11, 11, 11, 11, 11, 11, 100, 16, 16, - 11, 101, 11, 11, 11, 102, 103, 104, 11, 11, 11, 105, 11, 11, 11, 11, - 106, 11, 11, 107, 61, 11, 108, 106, 109, 11, 110, 11, 11, 11, 111, 109, - 11, 11, 112, 113, 11, 11, 11, 11, 11, 11, 11, 11, 11, 114, 115, 116, - 11, 11, 11, 11, 17, 17, 17, 117, 11, 11, 11, 118, 119, 120, 120, 121, - 122, 16, 123, 124, 125, 126, 127, 128, 129, 11, 130, 130, 130, 17, 17, 64, - 131, 132, 133, 134, 135, 16, 11, 11, 136, 16, 16, 16, 16, 16, 16, 16, - 16, 137, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 138, 11, 11, 11, 5, 16, 139, 16, 16, 16, 16, 16, 140, - 16, 16, 141, 11, 142, 11, 16, 16, 143, 144, 11, 11, 11, 11, 145, 16, - 16, 16, 146, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 147, - 16, 148, 16, 149, 150, 151, 152, 11, 11, 11, 11, 11, 11, 11, 153, 154, - 11, 11, 11, 11, 11, 11, 11, 155, 11, 11, 11, 11, 11, 11, 17, 17, - 16, 16, 16, 16, 156, 11, 11, 11, 16, 157, 16, 16, 16, 16, 16, 158, - 16, 16, 16, 16, 16, 138, 11, 159, 160, 16, 161, 162, 11, 11, 11, 11, - 11, 163, 4, 11, 11, 11, 11, 164, 11, 11, 11, 11, 16, 16, 158, 11, - 11, 121, 11, 11, 11, 16, 11, 165, 11, 11, 11, 166, 152, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 167, 11, 11, 11, 11, 11, 100, 11, 168, - 11, 11, 11, 11, 16, 16, 16, 16, 11, 16, 16, 16, 141, 11, 11, 11, - 120, 11, 11, 11, 11, 11, 155, 169, 11, 65, 11, 11, 11, 11, 11, 109, - 16, 16, 151, 11, 11, 11, 11, 11, 170, 11, 11, 11, 11, 11, 11, 11, - 171, 11, 172, 173, 11, 11, 11, 174, 11, 11, 11, 11, 175, 11, 17, 109, - 11, 11, 176, 11, 177, 109, 11, 11, 45, 11, 11, 178, 11, 11, 179, 11, - 11, 11, 180, 181, 182, 11, 11, 51, 11, 11, 11, 183, 50, 11, 69, 60, - 11, 11, 11, 11, 11, 11, 184, 11, 11, 185, 186, 26, 26, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 187, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 8, 8, 188, 17, 88, 17, 16, 16, 189, 190, 29, - 29, 29, 29, 29, 29, 29, 29, 191, 192, 3, 4, 5, 4, 5, 138, 11, - 11, 11, 11, 11, 11, 11, 193, 194, 195, 11, 11, 11, 16, 16, 16, 16, - 196, 159, 4, 11, 11, 11, 11, 87, 11, 11, 11, 11, 11, 11, 197, 144, - 11, 11, 11, 11, 11, 11, 11, 198, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 199, 26, 26, 26, 26, 26, 26, 200, 26, 26, 201, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 202, 26, 26, 26, 26, 203, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 204, 205, 50, 11, 11, 206, 207, 14, 138, 155, - 109, 11, 11, 208, 11, 11, 11, 11, 45, 11, 209, 210, 11, 11, 11, 211, - 109, 11, 11, 212, 213, 11, 11, 11, 11, 11, 155, 214, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 155, 215, 11, 109, 11, 11, 51, 64, 11, 216, 210, - 11, 11, 11, 206, 71, 11, 11, 11, 11, 11, 11, 217, 218, 11, 11, 11, - 11, 11, 11, 219, 64, 69, 11, 11, 11, 11, 11, 220, 64, 11, 196, 11, - 11, 11, 221, 222, 11, 11, 11, 11, 11, 82, 223, 11, 11, 11, 11, 11, - 11, 11, 11, 224, 11, 11, 11, 11, 11, 225, 226, 227, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 210, 11, 11, 11, 207, 11, 11, 11, 11, - 155, 45, 11, 11, 11, 11, 11, 11, 11, 228, 229, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 230, 231, 232, 11, 233, 11, 11, 11, 11, 11, - 16, 16, 16, 16, 234, 11, 11, 11, 16, 16, 16, 16, 16, 141, 11, 11, - 11, 11, 11, 11, 11, 164, 11, 11, 11, 235, 11, 11, 168, 11, 11, 11, - 236, 11, 11, 11, 237, 238, 238, 238, 17, 17, 17, 239, 17, 17, 81, 179, - 240, 108, 241, 11, 11, 11, 11, 11, 242, 243, 244, 11, 11, 11, 11, 11, - 26, 26, 26, 26, 26, 245, 26, 26, 26, 26, 26, 26, 246, 26, 26, 26, - 29, 29, 29, 29, 29, 29, 29, 247, 16, 16, 159, 16, 16, 16, 16, 16, - 16, 158, 140, 166, 166, 166, 16, 138, 248, 11, 11, 11, 11, 11, 134, 11, - 16, 16, 16, 16, 16, 249, 196, 141, 16, 16, 16, 16, 16, 16, 16, 158, - 16, 16, 16, 16, 16, 156, 11, 11, 159, 16, 16, 16, 250, 88, 16, 16, - 250, 16, 251, 11, 11, 11, 11, 11, 11, 140, 250, 252, 159, 140, 11, 11, - 16, 151, 11, 11, 4, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 253, - 8, 8, 8, 8, 8, 8, 8, 8, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 8, -}; - -static RE_UINT8 re_bidi_class_stage_4[] = { - 0, 0, 1, 2, 0, 0, 0, 3, 4, 5, 6, 7, 8, 8, 9, 10, - 11, 12, 12, 12, 12, 12, 13, 10, 12, 12, 13, 14, 0, 15, 0, 0, - 0, 0, 0, 0, 16, 5, 17, 18, 19, 20, 21, 10, 12, 12, 12, 12, - 12, 13, 12, 12, 12, 12, 22, 12, 23, 10, 10, 10, 12, 24, 10, 17, - 10, 10, 10, 10, 25, 25, 25, 25, 12, 26, 12, 27, 12, 17, 12, 12, - 12, 27, 12, 12, 28, 25, 29, 12, 12, 12, 27, 30, 31, 25, 25, 25, - 25, 25, 25, 32, 33, 32, 34, 34, 34, 34, 34, 34, 35, 36, 37, 38, - 25, 25, 39, 40, 40, 40, 40, 40, 40, 40, 41, 25, 35, 35, 42, 43, - 44, 40, 40, 40, 40, 45, 25, 46, 25, 47, 48, 49, 8, 8, 50, 40, - 51, 40, 40, 40, 40, 45, 25, 25, 34, 34, 52, 25, 25, 53, 54, 34, - 34, 55, 32, 25, 25, 31, 31, 56, 34, 34, 31, 34, 40, 25, 25, 25, - 57, 25, 25, 25, 58, 12, 12, 12, 12, 12, 59, 60, 61, 25, 60, 62, - 61, 25, 12, 12, 63, 12, 12, 12, 62, 12, 12, 12, 12, 12, 12, 60, - 61, 60, 12, 62, 64, 12, 65, 12, 66, 12, 12, 12, 66, 28, 67, 29, - 29, 62, 12, 12, 61, 68, 60, 62, 69, 12, 12, 12, 12, 12, 12, 67, - 12, 59, 12, 12, 59, 12, 12, 12, 60, 12, 12, 62, 13, 10, 70, 12, - 60, 12, 12, 12, 12, 12, 12, 63, 60, 63, 71, 29, 12, 66, 12, 12, - 12, 12, 10, 72, 12, 12, 12, 29, 12, 12, 59, 12, 63, 73, 12, 12, - 62, 25, 58, 65, 12, 28, 25, 58, 62, 25, 68, 60, 12, 12, 25, 29, - 12, 12, 29, 12, 12, 74, 75, 26, 61, 25, 25, 58, 25, 71, 12, 61, - 25, 25, 61, 25, 25, 25, 25, 60, 12, 12, 12, 61, 71, 25, 66, 66, - 12, 12, 29, 63, 61, 60, 12, 12, 59, 66, 12, 62, 12, 12, 12, 62, - 10, 10, 26, 12, 76, 12, 12, 12, 12, 12, 13, 11, 63, 60, 12, 12, - 12, 68, 25, 29, 12, 59, 61, 25, 25, 12, 65, 62, 10, 10, 77, 78, - 12, 12, 62, 12, 58, 28, 60, 12, 59, 12, 61, 12, 11, 26, 12, 12, - 12, 12, 12, 23, 12, 28, 67, 12, 12, 59, 25, 58, 73, 61, 25, 60, - 28, 25, 25, 67, 25, 25, 25, 58, 25, 12, 12, 12, 12, 71, 58, 60, - 12, 12, 28, 25, 29, 12, 12, 12, 63, 29, 68, 29, 12, 59, 29, 74, - 12, 12, 12, 25, 25, 63, 12, 12, 58, 25, 25, 25, 71, 25, 60, 62, - 12, 60, 29, 12, 25, 29, 28, 25, 12, 12, 12, 79, 26, 12, 12, 24, - 12, 12, 12, 24, 12, 12, 12, 22, 80, 80, 81, 82, 10, 10, 83, 84, - 85, 86, 10, 10, 10, 87, 10, 10, 10, 10, 10, 88, 0, 89, 90, 0, - 91, 8, 92, 72, 8, 8, 92, 72, 85, 85, 85, 85, 17, 72, 26, 12, - 12, 20, 11, 23, 10, 79, 93, 94, 12, 12, 23, 12, 10, 11, 23, 26, - 12, 12, 24, 12, 95, 10, 10, 10, 10, 26, 12, 12, 10, 20, 10, 10, - 10, 10, 10, 72, 10, 72, 12, 12, 10, 10, 72, 12, 10, 10, 8, 8, - 8, 8, 8, 12, 12, 12, 23, 10, 10, 10, 10, 24, 10, 23, 10, 10, - 10, 26, 10, 10, 10, 10, 26, 24, 10, 10, 20, 10, 26, 12, 12, 12, - 12, 12, 12, 10, 12, 24, 72, 28, 29, 12, 24, 10, 12, 12, 12, 28, - 10, 11, 12, 12, 10, 10, 17, 10, 10, 12, 12, 12, 10, 10, 10, 12, - 96, 11, 10, 10, 11, 12, 63, 29, 11, 23, 12, 24, 12, 12, 97, 11, - 12, 12, 13, 12, 12, 12, 12, 72, 24, 10, 10, 10, 12, 13, 72, 12, - 12, 12, 12, 13, 98, 25, 25, 99, 12, 12, 11, 12, 59, 59, 28, 12, - 12, 66, 10, 12, 12, 12, 100, 12, 12, 10, 12, 12, 12, 29, 12, 12, - 12, 63, 25, 29, 12, 28, 25, 25, 28, 63, 29, 60, 12, 62, 12, 12, - 12, 12, 61, 58, 66, 66, 12, 12, 28, 12, 12, 60, 71, 67, 60, 63, - 12, 62, 60, 62, 12, 12, 12, 101, 34, 34, 102, 34, 40, 40, 40, 103, - 40, 40, 40, 104, 105, 106, 10, 107, 108, 72, 109, 12, 40, 40, 40, 110, - 30, 5, 6, 7, 5, 111, 10, 72, 0, 0, 112, 113, 93, 12, 12, 12, - 10, 10, 10, 11, 114, 8, 8, 8, 12, 63, 58, 12, 34, 34, 34, 115, - 31, 33, 34, 25, 34, 34, 116, 52, 34, 33, 34, 34, 34, 34, 117, 10, - 35, 35, 35, 35, 35, 35, 35, 118, 12, 12, 25, 25, 25, 58, 12, 12, - 28, 58, 66, 12, 12, 28, 25, 61, 25, 60, 12, 12, 28, 12, 12, 12, - 12, 63, 25, 58, 12, 12, 63, 60, 29, 71, 12, 59, 28, 25, 58, 12, - 12, 63, 25, 60, 28, 25, 73, 28, 71, 12, 12, 12, 63, 29, 12, 68, - 28, 25, 58, 74, 12, 12, 28, 62, 25, 68, 12, 12, 63, 68, 25, 12, - 25, 58, 25, 29, 63, 25, 25, 25, 25, 25, 63, 25, 71, 66, 12, 12, - 12, 12, 12, 66, 0, 12, 12, 12, 12, 28, 29, 12, 119, 0, 120, 25, - 58, 61, 25, 12, 12, 12, 63, 29, 121, 122, 12, 12, 12, 93, 12, 12, - 12, 12, 93, 12, 13, 12, 12, 123, 8, 8, 8, 8, 25, 58, 28, 25, - 12, 60, 12, 12, 61, 25, 25, 25, 25, 58, 25, 25, 25, 25, 67, 25, - 68, 71, 58, 12, 25, 116, 34, 34, 34, 25, 116, 34, 124, 40, 40, 40, - 8, 8, 125, 11, 72, 12, 12, 12, 10, 10, 12, 12, 10, 10, 10, 26, - 126, 10, 10, 72, 12, 12, 12, 127, -}; - -static RE_UINT8 re_bidi_class_stage_5[] = { - 11, 11, 11, 11, 11, 8, 7, 8, 9, 7, 11, 11, 7, 7, 7, 8, - 9, 10, 10, 4, 4, 4, 10, 10, 10, 10, 10, 3, 6, 3, 6, 6, - 2, 2, 2, 2, 2, 2, 6, 10, 10, 10, 10, 10, 10, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 11, 11, 7, 11, 11, - 6, 10, 4, 4, 10, 10, 0, 10, 10, 11, 10, 10, 4, 4, 2, 2, - 10, 0, 10, 10, 10, 2, 0, 10, 0, 10, 10, 0, 0, 0, 10, 10, - 0, 10, 10, 10, 12, 12, 12, 12, 10, 10, 0, 0, 0, 0, 10, 0, - 0, 0, 0, 12, 12, 12, 0, 0, 0, 10, 10, 4, 1, 12, 12, 12, - 12, 12, 1, 12, 1, 12, 12, 1, 1, 1, 1, 1, 5, 5, 5, 5, - 5, 5, 10, 10, 13, 4, 4, 13, 6, 13, 10, 10, 12, 12, 12, 13, - 13, 13, 13, 13, 13, 13, 13, 12, 5, 5, 4, 5, 5, 13, 13, 13, - 12, 13, 13, 13, 13, 13, 12, 12, 12, 5, 10, 12, 12, 13, 13, 12, - 12, 10, 12, 12, 12, 12, 13, 13, 2, 2, 13, 13, 13, 12, 13, 13, - 1, 1, 1, 12, 1, 1, 10, 10, 10, 10, 1, 1, 1, 1, 12, 12, - 12, 12, 1, 1, 12, 12, 5, 12, 12, 12, 12, 0, 0, 0, 12, 0, - 12, 0, 0, 0, 0, 12, 12, 12, 0, 12, 0, 0, 0, 0, 12, 12, - 0, 0, 4, 4, 0, 0, 0, 4, 0, 12, 12, 0, 12, 0, 0, 12, - 12, 12, 0, 12, 0, 4, 0, 0, 10, 4, 10, 0, 12, 0, 12, 12, - 10, 10, 10, 0, 12, 0, 12, 0, 0, 12, 0, 12, 0, 12, 10, 10, - 9, 0, 0, 0, 10, 10, 10, 12, 12, 12, 11, 0, 0, 10, 0, 10, - 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 0, 1, 9, 7, 16, 17, - 18, 14, 15, 6, 4, 4, 4, 4, 4, 10, 10, 10, 6, 10, 10, 10, - 10, 10, 10, 9, 11, 11, 19, 20, 21, 22, 11, 11, 2, 0, 0, 0, - 2, 2, 3, 3, 0, 10, 0, 0, 0, 0, 4, 0, 10, 10, 3, 4, - 9, 10, 10, 10, 0, 12, 12, 10, 12, 12, 12, 10, 12, 12, 10, 10, - 4, 4, 0, 0, 0, 1, 12, 1, 1, 3, 1, 1, 13, 13, 10, 10, - 13, 10, 13, 13, 6, 10, 6, 0, 10, 6, 10, 10, 10, 10, 10, 4, - 10, 10, 3, 3, 10, 4, 4, 10, 13, 13, 13, 11, 10, 4, 4, 0, - 11, 10, 10, 10, 10, 10, 11, 11, 12, 2, 2, 2, 1, 1, 1, 10, - 12, 12, 12, 1, 1, 10, 10, 10, 5, 5, 5, 1, 0, 0, 0, 11, - 11, 11, 11, 12, 10, 10, 12, 12, 12, 10, 0, 0, 0, 0, 2, 2, - 10, 10, 13, 13, 2, 2, 2, 10, 10, 0, 0, 10, 0, 0, 11, 11, -}; - -/* Bidi_Class: 3552 bytes. */ - -RE_UINT32 re_get_bidi_class(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_bidi_class_stage_1[f] << 5; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_bidi_class_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_bidi_class_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_bidi_class_stage_4[pos + f] << 2; - value = re_bidi_class_stage_5[pos + code]; - - return value; -} - -/* Canonical_Combining_Class. */ - -static RE_UINT8 re_canonical_combining_class_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 6, 2, 7, 8, 9, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_canonical_combining_class_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 0, - 14, 0, 0, 0, 0, 0, 15, 0, 16, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 21, - 22, 23, 0, 0, 0, 24, 0, 0, 25, 26, 27, 28, 0, 0, 29, 0, - 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 32, 33, 0, 0, 0, 0, 0, 0, - 34, 0, 0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_canonical_combining_class_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, - 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, - 9, 0, 10, 11, 0, 0, 12, 13, 14, 15, 16, 0, 0, 0, 0, 17, - 18, 19, 20, 0, 0, 0, 21, 22, 0, 23, 24, 0, 0, 23, 25, 0, - 0, 23, 25, 0, 0, 23, 25, 0, 0, 23, 25, 0, 0, 0, 25, 0, - 0, 0, 26, 0, 0, 23, 25, 0, 0, 0, 25, 0, 0, 0, 27, 0, - 0, 28, 29, 0, 0, 30, 31, 0, 32, 33, 0, 34, 35, 0, 36, 0, - 0, 37, 0, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 40, 40, 0, 0, 0, 0, 41, 0, - 0, 0, 0, 0, 0, 42, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, - 44, 0, 0, 45, 0, 46, 0, 0, 0, 47, 48, 49, 0, 50, 0, 51, - 0, 52, 0, 0, 0, 0, 53, 54, 0, 0, 0, 0, 0, 0, 55, 56, - 0, 0, 0, 0, 0, 0, 57, 58, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, 0, 60, 0, 0, 0, 61, - 0, 62, 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 64, 65, 0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, - 67, 0, 0, 0, 0, 0, 48, 68, 0, 69, 70, 0, 0, 71, 72, 0, - 0, 0, 0, 0, 0, 73, 74, 75, 0, 0, 0, 0, 0, 0, 0, 25, - 0, 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 0, 0, 0, 0, - 0, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78, - 0, 0, 0, 0, 0, 0, 0, 79, 0, 0, 0, 80, 0, 0, 0, 0, - 81, 82, 0, 0, 0, 0, 0, 83, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 67, 60, 0, 84, 0, 0, 85, 86, 0, 71, 0, 0, 87, 0, - 0, 88, 0, 0, 0, 0, 0, 89, 0, 23, 25, 90, 0, 0, 0, 0, - 0, 0, 91, 0, 0, 0, 92, 0, 0, 0, 0, 0, 0, 60, 93, 0, - 0, 60, 0, 0, 0, 94, 0, 0, 0, 95, 0, 0, 0, 0, 0, 0, - 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 96, 0, 97, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 100, 101, 0, 0, - 0, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 105, 0, 0, 0, 106, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_canonical_combining_class_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, - 5, 6, 7, 4, 4, 8, 9, 10, 1, 11, 12, 13, 14, 15, 16, 17, - 18, 1, 1, 1, 0, 0, 0, 0, 19, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 20, 21, 22, 1, 23, 4, 21, 24, 25, 26, 27, 28, - 29, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 31, 0, - 0, 0, 32, 33, 34, 35, 1, 36, 0, 0, 0, 0, 37, 0, 0, 0, - 0, 0, 0, 0, 0, 38, 1, 39, 14, 39, 40, 41, 0, 0, 0, 0, - 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 43, 36, 44, 45, - 21, 45, 46, 0, 0, 0, 0, 0, 0, 0, 19, 1, 21, 0, 0, 0, - 0, 0, 0, 0, 0, 38, 47, 1, 1, 48, 48, 49, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 1, 1, 1, - 51, 21, 43, 52, 53, 21, 35, 1, 0, 0, 0, 0, 0, 0, 0, 54, - 0, 0, 0, 55, 56, 57, 0, 0, 0, 0, 0, 55, 0, 0, 0, 0, - 0, 0, 0, 55, 0, 58, 0, 0, 0, 0, 59, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 61, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 65, 66, 0, - 0, 0, 0, 0, 67, 68, 69, 70, 71, 72, 0, 0, 0, 0, 0, 0, - 0, 73, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74, 75, 0, - 0, 0, 0, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, - 0, 0, 0, 0, 0, 77, 0, 0, 0, 0, 0, 0, 59, 0, 0, 78, - 0, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 0, - 0, 0, 0, 0, 0, 19, 81, 0, 77, 0, 0, 0, 0, 48, 1, 82, - 0, 0, 0, 0, 1, 52, 15, 41, 0, 0, 0, 0, 0, 54, 0, 0, - 0, 77, 0, 0, 0, 0, 0, 0, 0, 0, 19, 10, 1, 0, 0, 0, - 0, 0, 83, 0, 0, 0, 0, 0, 0, 84, 0, 0, 83, 0, 0, 0, - 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 85, 9, 12, 4, - 86, 8, 87, 76, 0, 57, 49, 0, 21, 1, 21, 88, 89, 1, 1, 1, - 1, 1, 1, 1, 1, 49, 19, 90, 0, 0, 0, 0, 91, 1, 92, 57, - 78, 93, 94, 4, 57, 0, 0, 0, 0, 0, 0, 19, 49, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 95, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98, 0, - 0, 0, 0, 19, 0, 1, 1, 49, 0, 0, 0, 0, 0, 0, 0, 38, - 0, 0, 0, 0, 49, 0, 0, 0, 0, 59, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 49, 0, 0, 0, 0, 0, 99, 64, 0, 0, 0, 0, - 0, 0, 0, 0, 95, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, - 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 101, 57, 38, - 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 102, 1, 14, 4, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 76, 81, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 38, 85, 0, 0, 0, 0, 103, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 104, 95, 0, 105, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 106, 0, 85, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 95, 77, 0, 0, 77, 0, 84, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 106, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, - 0, 38, 1, 57, 1, 57, 0, 0, 59, 84, 0, 0, 0, 0, 0, 0, - 108, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 108, 0, 0, 0, 0, 95, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 8, 87, 0, 0, 0, 0, 0, 0, 1, 85, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 109, 0, 110, 111, 112, 113, 0, 99, 4, - 114, 48, 23, 0, 0, 0, 0, 0, 0, 0, 38, 49, 0, 0, 0, 0, - 38, 57, 0, 0, 0, 0, 0, 0, 1, 85, 1, 1, 1, 1, 39, 1, - 47, 100, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 114, 0, 0, - 0, 1, 115, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_canonical_combining_class_stage_5[] = { - 0, 0, 0, 0, 50, 50, 50, 50, 50, 51, 45, 45, 45, 45, 51, 43, - 45, 45, 45, 45, 45, 41, 41, 45, 45, 45, 45, 41, 41, 45, 45, 45, - 1, 1, 1, 1, 1, 45, 45, 45, 45, 50, 50, 50, 50, 54, 50, 45, - 45, 45, 50, 50, 50, 45, 45, 0, 50, 50, 50, 45, 45, 45, 45, 50, - 51, 45, 45, 50, 52, 53, 53, 52, 53, 53, 52, 50, 0, 0, 0, 50, - 0, 45, 50, 50, 50, 50, 45, 50, 50, 50, 46, 45, 50, 50, 45, 45, - 50, 46, 49, 50, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14, 15, - 16, 17, 0, 18, 0, 19, 20, 0, 50, 45, 0, 13, 25, 26, 27, 0, - 0, 0, 0, 22, 23, 24, 25, 26, 27, 28, 29, 50, 50, 45, 45, 50, - 45, 50, 50, 45, 30, 0, 0, 0, 0, 0, 50, 50, 50, 0, 0, 50, - 50, 0, 45, 50, 50, 45, 0, 0, 0, 31, 0, 0, 50, 45, 50, 50, - 45, 45, 50, 45, 45, 50, 45, 50, 45, 50, 50, 0, 50, 50, 0, 50, - 0, 50, 50, 50, 50, 50, 0, 0, 0, 45, 45, 45, 50, 50, 0, 45, - 50, 45, 45, 45, 22, 23, 24, 50, 2, 0, 0, 0, 0, 4, 0, 0, - 0, 50, 45, 50, 50, 0, 0, 0, 0, 32, 33, 0, 0, 0, 4, 0, - 34, 34, 4, 0, 35, 35, 35, 35, 36, 36, 0, 0, 37, 37, 37, 37, - 45, 45, 0, 0, 0, 45, 0, 45, 0, 43, 0, 0, 0, 38, 39, 0, - 40, 0, 0, 0, 0, 0, 39, 39, 39, 39, 0, 0, 39, 0, 50, 50, - 4, 0, 50, 50, 0, 0, 45, 0, 0, 0, 0, 2, 0, 4, 4, 0, - 0, 45, 0, 0, 4, 0, 0, 0, 0, 50, 0, 0, 0, 49, 0, 0, - 0, 46, 50, 45, 45, 0, 0, 0, 50, 0, 0, 45, 0, 0, 4, 4, - 0, 0, 2, 0, 50, 50, 50, 0, 50, 0, 1, 1, 1, 0, 0, 0, - 50, 53, 42, 45, 41, 50, 50, 50, 52, 45, 50, 45, 50, 50, 1, 1, - 1, 1, 1, 50, 0, 1, 1, 50, 45, 50, 1, 1, 0, 0, 0, 4, - 0, 0, 44, 49, 51, 46, 47, 47, 0, 3, 3, 0, 0, 0, 0, 45, - 50, 0, 50, 50, 45, 0, 0, 50, 0, 0, 21, 0, 0, 45, 0, 50, - 50, 1, 45, 0, 0, 50, 45, 0, 0, 4, 2, 0, 0, 2, 4, 0, - 0, 0, 4, 2, 0, 0, 1, 0, 0, 43, 43, 1, 1, 1, 0, 0, - 0, 48, 43, 43, 43, 43, 43, 0, 45, 45, 45, 0, 50, 50, 2, 0, -}; - -/* Canonical_Combining_Class: 2192 bytes. */ - -RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_canonical_combining_class_stage_1[f] << 4; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_canonical_combining_class_stage_2[pos + f] << 4; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_canonical_combining_class_stage_3[pos + f] << 3; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_canonical_combining_class_stage_4[pos + f] << 2; - value = re_canonical_combining_class_stage_5[pos + code]; - - return value; -} - -/* Decomposition_Type. */ - -static RE_UINT8 re_decomposition_type_stage_1[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 6, 2, 2, 2, 2, 2, 7, 8, - 2, 2, 2, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_decomposition_type_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 16, 7, 17, 18, 19, - 20, 21, 22, 23, 24, 7, 7, 7, 7, 7, 25, 7, 26, 27, 28, 29, - 30, 31, 32, 33, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 34, 35, 7, 7, 7, 36, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 38, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 37, 39, 40, 41, 42, 43, 44, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 45, 46, 7, 47, 48, 49, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 50, 7, 7, 51, 52, 53, 54, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 55, 7, - 7, 56, 57, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 37, 37, 58, 7, 7, 7, 7, 7, -}; - -static RE_UINT8 re_decomposition_type_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 3, 5, - 6, 7, 8, 9, 10, 11, 8, 12, 0, 0, 13, 14, 15, 16, 17, 18, - 6, 19, 20, 21, 0, 0, 0, 0, 0, 0, 0, 22, 0, 23, 24, 0, - 0, 0, 0, 0, 25, 0, 0, 26, 27, 14, 28, 14, 29, 30, 0, 31, - 32, 33, 0, 33, 0, 32, 0, 34, 0, 0, 0, 0, 35, 36, 37, 38, - 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 40, 0, 0, 0, 0, 41, 0, 0, 0, 0, 42, 43, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 33, 44, 0, 45, 0, 0, 0, 0, 0, 0, 46, 47, 0, 0, - 0, 0, 0, 48, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 50, 51, 0, 0, 0, 52, 0, 0, 53, 0, 0, 0, - 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0, - 0, 0, 0, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, - 0, 0, 0, 57, 0, 0, 0, 0, 0, 0, 0, 57, 0, 58, 0, 0, - 59, 0, 0, 0, 60, 61, 33, 62, 63, 60, 61, 33, 0, 0, 0, 0, - 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, - 66, 67, 0, 68, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 70, 71, 72, 73, 74, 75, 0, 76, 73, 73, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 77, 6, 6, 6, 6, 6, 78, - 6, 79, 6, 6, 79, 80, 6, 81, 6, 6, 6, 82, 83, 84, 6, 85, - 86, 87, 88, 89, 90, 91, 0, 92, 93, 94, 95, 0, 0, 0, 0, 0, - 96, 97, 98, 99, 100, 101, 102, 102, 103, 104, 105, 0, 106, 0, 0, 0, - 107, 0, 108, 109, 110, 0, 111, 112, 112, 0, 113, 0, 0, 0, 114, 0, - 0, 0, 115, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 116, 117, 102, 102, 102, 118, 116, 116, 119, 0, - 120, 0, 0, 0, 0, 0, 0, 121, 0, 0, 0, 0, 0, 122, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 125, 0, 0, 0, 0, 0, 57, - 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 126, 0, 0, - 127, 0, 0, 128, 129, 130, 131, 132, 0, 133, 129, 130, 131, 132, 0, 134, - 0, 0, 0, 135, 102, 102, 102, 102, 136, 137, 0, 0, 0, 0, 0, 0, - 102, 136, 102, 102, 138, 139, 116, 140, 116, 116, 116, 116, 141, 116, 116, 140, - 142, 142, 142, 142, 142, 143, 102, 144, 142, 142, 142, 142, 142, 142, 102, 145, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 147, 0, 0, 0, 0, 0, 0, 0, 148, - 0, 0, 0, 0, 0, 149, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 21, 0, 0, 0, 0, 0, - 81, 150, 151, 6, 6, 6, 81, 6, 6, 6, 6, 6, 6, 78, 0, 0, - 152, 153, 154, 155, 156, 157, 158, 158, 159, 158, 160, 161, 0, 162, 163, 164, - 165, 165, 165, 165, 165, 165, 166, 167, 167, 168, 169, 169, 169, 170, 171, 172, - 165, 173, 174, 175, 0, 176, 177, 178, 179, 180, 167, 181, 182, 0, 0, 183, - 0, 184, 0, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 194, 195, 196, - 197, 198, 198, 198, 198, 198, 199, 200, 200, 200, 200, 201, 202, 203, 204, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 205, 206, 0, 0, 0, 0, 0, - 0, 0, 207, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 208, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 207, 209, 0, 0, 0, 0, 210, 14, 0, 0, 0, - 211, 211, 211, 211, 211, 212, 211, 211, 211, 213, 214, 215, 216, 211, 211, 211, - 217, 218, 211, 219, 220, 221, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, - 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 222, 211, 211, 211, 211, 211, - 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 223, 211, 211, 211, - 216, 211, 224, 225, 226, 227, 228, 229, 230, 231, 232, 231, 0, 0, 0, 0, - 233, 102, 234, 142, 142, 0, 235, 0, 0, 236, 0, 0, 0, 0, 0, 0, - 237, 142, 142, 238, 239, 240, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_decomposition_type_stage_4[] = { - 0, 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 8, 8, - 10, 11, 10, 12, 10, 11, 10, 9, 8, 8, 8, 8, 13, 8, 8, 8, - 8, 12, 8, 8, 14, 8, 10, 15, 16, 8, 17, 8, 12, 8, 8, 8, - 8, 8, 8, 15, 12, 0, 0, 18, 19, 0, 0, 0, 0, 20, 20, 21, - 8, 8, 8, 22, 8, 13, 8, 8, 23, 12, 8, 8, 8, 8, 8, 13, - 0, 13, 8, 8, 8, 0, 0, 0, 24, 24, 25, 0, 0, 0, 20, 5, - 24, 25, 0, 0, 9, 19, 0, 0, 0, 19, 26, 27, 0, 21, 11, 22, - 0, 0, 13, 8, 0, 0, 13, 11, 28, 29, 0, 0, 30, 5, 31, 0, - 9, 18, 0, 11, 0, 0, 32, 0, 0, 13, 0, 0, 33, 0, 0, 0, - 8, 13, 13, 8, 13, 8, 13, 8, 8, 12, 12, 0, 0, 3, 0, 0, - 13, 11, 0, 0, 0, 34, 35, 0, 36, 0, 0, 0, 18, 0, 0, 0, - 32, 19, 0, 0, 0, 0, 8, 8, 0, 0, 18, 19, 0, 0, 0, 9, - 18, 27, 0, 0, 0, 0, 10, 27, 0, 0, 37, 19, 0, 0, 0, 12, - 0, 19, 0, 0, 0, 0, 13, 19, 0, 0, 19, 0, 19, 18, 22, 0, - 0, 0, 27, 11, 3, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 1, - 18, 0, 0, 32, 27, 18, 0, 19, 18, 38, 17, 0, 32, 0, 0, 0, - 0, 27, 0, 0, 0, 0, 0, 25, 0, 27, 36, 36, 27, 0, 0, 0, - 0, 0, 18, 32, 9, 0, 0, 0, 0, 0, 0, 39, 24, 24, 39, 24, - 24, 24, 24, 40, 24, 24, 24, 24, 41, 42, 43, 0, 0, 0, 25, 0, - 0, 0, 44, 24, 8, 8, 45, 0, 8, 8, 12, 0, 8, 12, 8, 12, - 8, 8, 46, 46, 8, 8, 8, 12, 8, 22, 8, 47, 21, 22, 8, 8, - 8, 13, 8, 10, 13, 22, 8, 48, 49, 50, 30, 0, 51, 3, 0, 0, - 0, 30, 0, 52, 3, 53, 0, 54, 0, 3, 5, 0, 0, 3, 0, 3, - 55, 24, 24, 24, 42, 42, 42, 43, 42, 42, 42, 56, 0, 0, 35, 0, - 57, 34, 58, 59, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 68, 59, - 69, 61, 62, 0, 70, 70, 70, 70, 20, 20, 20, 20, 0, 0, 71, 0, - 0, 0, 13, 0, 0, 0, 0, 27, 0, 0, 0, 10, 0, 19, 32, 19, - 0, 36, 0, 72, 35, 0, 0, 0, 32, 37, 32, 0, 36, 0, 0, 10, - 12, 12, 12, 0, 0, 0, 0, 8, 8, 0, 13, 12, 0, 0, 33, 0, - 73, 73, 73, 73, 73, 20, 20, 20, 20, 74, 73, 73, 73, 73, 75, 0, - 0, 0, 0, 35, 0, 30, 0, 0, 0, 0, 0, 19, 0, 0, 0, 76, - 0, 0, 0, 44, 0, 0, 0, 3, 20, 5, 0, 0, 77, 0, 0, 0, - 0, 26, 30, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 46, 32, 0, - 9, 22, 33, 12, 0, 19, 3, 78, 0, 37, 11, 79, 34, 20, 20, 20, - 20, 20, 20, 30, 4, 24, 24, 24, 20, 73, 0, 0, 80, 73, 73, 73, - 73, 73, 73, 75, 20, 20, 20, 81, 81, 81, 81, 81, 81, 81, 20, 20, - 82, 81, 81, 81, 20, 20, 20, 83, 0, 0, 0, 55, 25, 0, 0, 0, - 0, 0, 55, 0, 0, 0, 0, 24, 36, 10, 8, 11, 36, 33, 13, 8, - 20, 30, 0, 0, 3, 20, 0, 46, 59, 59, 84, 8, 8, 11, 8, 36, - 9, 22, 8, 15, 85, 86, 86, 86, 86, 86, 86, 86, 86, 85, 85, 85, - 87, 85, 86, 86, 88, 0, 0, 0, 89, 90, 91, 92, 85, 87, 86, 85, - 85, 85, 93, 87, 94, 94, 94, 94, 94, 95, 95, 95, 95, 95, 95, 95, - 95, 96, 97, 97, 97, 97, 97, 97, 97, 97, 97, 98, 99, 99, 99, 99, - 99, 100, 94, 94, 101, 95, 95, 95, 95, 95, 95, 102, 97, 99, 99, 103, - 104, 97, 105, 106, 107, 105, 108, 105, 104, 96, 95, 105, 96, 109, 110, 97, - 111, 106, 112, 105, 95, 106, 113, 95, 96, 106, 0, 0, 94, 94, 94, 114, - 115, 115, 116, 0, 115, 115, 115, 115, 115, 117, 118, 20, 119, 120, 120, 120, - 120, 119, 120, 0, 121, 122, 123, 123, 124, 91, 125, 126, 90, 125, 127, 127, - 127, 127, 126, 91, 125, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 126, - 125, 126, 91, 128, 129, 130, 130, 130, 130, 130, 130, 130, 131, 132, 132, 132, - 132, 132, 132, 132, 132, 132, 132, 133, 134, 132, 134, 132, 134, 132, 134, 135, - 130, 136, 132, 133, 0, 0, 27, 19, 0, 0, 18, 0, 0, 0, 0, 13, - 0, 0, 18, 36, 8, 19, 0, 0, 0, 0, 18, 8, 59, 59, 59, 59, - 59, 137, 59, 59, 59, 59, 59, 137, 138, 139, 61, 137, 59, 59, 66, 61, - 59, 61, 59, 59, 59, 66, 140, 61, 59, 137, 59, 137, 59, 59, 66, 140, - 59, 141, 142, 59, 137, 59, 59, 59, 59, 62, 59, 59, 59, 59, 59, 142, - 139, 143, 61, 59, 140, 59, 144, 0, 138, 145, 144, 61, 139, 143, 144, 144, - 139, 143, 140, 59, 140, 59, 61, 141, 59, 59, 66, 59, 59, 59, 59, 0, - 61, 61, 66, 59, 20, 20, 30, 0, 20, 20, 146, 75, 0, 0, 4, 0, - 147, 0, 0, 0, 148, 0, 0, 0, 81, 81, 81, 0, 20, 20, 35, 0, - 149, 0, 0, 0, -}; - -static RE_UINT8 re_decomposition_type_stage_5[] = { - 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 10, 0, 0, 0, 0, 2, - 0, 0, 10, 10, 2, 2, 0, 0, 2, 10, 10, 0, 17, 17, 17, 0, - 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, - 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 1, 1, 1, 2, - 2, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, - 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, - 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, - 2, 2, 2, 1, 1, 2, 2, 0, 2, 2, 2, 0, 0, 2, 0, 0, - 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 0, - 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 2, 10, 10, 10, 0, - 10, 10, 0, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, - 0, 0, 0, 10, 1, 1, 2, 1, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 1, 2, 0, 1, 1, 2, 2, 2, 2, 2, 4, 0, 4, 0, 0, - 0, 0, 0, 4, 2, 0, 2, 2, 2, 0, 2, 0, 10, 10, 0, 0, - 11, 0, 0, 0, 2, 2, 3, 2, 0, 2, 3, 3, 3, 3, 3, 3, - 0, 3, 2, 0, 0, 3, 3, 3, 3, 3, 0, 0, 10, 2, 10, 0, - 3, 0, 1, 0, 3, 0, 1, 1, 3, 3, 0, 3, 3, 2, 2, 2, - 2, 3, 0, 2, 3, 0, 0, 0, 17, 17, 17, 17, 0, 17, 0, 0, - 2, 2, 0, 2, 9, 9, 9, 9, 2, 2, 9, 9, 9, 9, 9, 0, - 11, 10, 0, 0, 13, 0, 0, 0, 2, 0, 1, 12, 0, 0, 1, 12, - 16, 9, 9, 9, 16, 16, 16, 16, 2, 16, 16, 16, 2, 2, 2, 16, - 3, 3, 1, 1, 8, 7, 8, 7, 5, 6, 8, 7, 8, 7, 5, 6, - 8, 7, 0, 0, 0, 0, 0, 8, 7, 5, 6, 8, 7, 8, 7, 8, - 7, 8, 8, 7, 5, 8, 7, 5, 8, 8, 8, 8, 7, 7, 7, 7, - 7, 7, 7, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, - 6, 8, 8, 8, 8, 7, 7, 7, 7, 5, 5, 5, 7, 8, 0, 0, - 5, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 5, 5, 7, 5, - 5, 7, 7, 5, 7, 7, 5, 7, 5, 5, 5, 7, 0, 0, 5, 5, - 5, 7, 7, 7, 5, 7, 5, 7, 8, 0, 0, 0, 12, 12, 12, 12, - 12, 12, 0, 0, 12, 0, 0, 12, 12, 2, 2, 2, 15, 15, 15, 0, - 15, 15, 15, 15, 8, 6, 8, 0, 8, 0, 8, 6, 8, 6, 8, 6, - 8, 8, 7, 8, 7, 8, 7, 5, 6, 8, 7, 8, 6, 8, 7, 5, - 7, 0, 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 0, 0, 0, - 13, 13, 13, 0, 3, 0, 3, 3, 0, 0, 3, 0, 0, 3, 3, 0, - 3, 3, 3, 0, 3, 0, 3, 0, 0, 0, 3, 3, 3, 0, 0, 3, - 0, 3, 0, 3, 0, 0, 0, 3, 2, 2, 2, 9, 16, 0, 0, 0, - 16, 16, 16, 0, 9, 9, 0, 0, -}; - -/* Decomposition_Type: 2964 bytes. */ - -RE_UINT32 re_get_decomposition_type(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_decomposition_type_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_decomposition_type_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_decomposition_type_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_decomposition_type_stage_4[pos + f] << 2; - value = re_decomposition_type_stage_5[pos + code]; - - return value; -} - -/* East_Asian_Width. */ - -static RE_UINT8 re_east_asian_width_stage_1[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 7, 8, 9, - 10, 10, 10, 10, 10, 10, 11, 5, 12, 10, 10, 13, 10, 10, 10, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 16, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 17, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 17, -}; - -static RE_UINT8 re_east_asian_width_stage_2[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 7, 8, 9, 10, 11, 12, 13, 14, 5, 15, 5, 16, 5, 5, 17, 18, - 19, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 24, 5, 5, 5, 5, 25, 5, 5, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 26, 5, 5, 5, 5, 5, 5, 5, 5, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 22, 22, 5, 5, 5, 28, 29, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 30, - 22, 22, 22, 22, 22, 22, 22, 31, 22, 22, 32, 5, 5, 5, 5, 5, - 33, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 34, 35, 36, 37, 38, 39, 40, 5, 5, 41, 5, 5, 5, 5, 5, 5, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 42, - 5, 43, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 44, -}; - -static RE_UINT8 re_east_asian_width_stage_3[] = { - 0, 0, 1, 1, 1, 1, 1, 2, 0, 0, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 11, 0, 0, 0, 0, 0, 15, 16, 0, 0, - 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 17, 18, 0, 0, - 19, 19, 19, 19, 19, 19, 19, 0, 0, 20, 21, 20, 21, 0, 0, 0, - 9, 19, 19, 19, 19, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 22, 22, 22, 22, 22, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 23, 24, 25, 0, 0, 0, 26, 27, 0, 28, 0, 0, 0, 0, 0, - 29, 30, 31, 0, 0, 32, 33, 34, 35, 34, 0, 36, 0, 37, 38, 0, - 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 0, 0, 0, 0, - 0, 50, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 53, - 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 54, 19, - 19, 19, 19, 19, 33, 19, 19, 55, 19, 56, 21, 57, 58, 59, 60, 61, - 62, 63, 0, 0, 64, 65, 66, 67, 0, 68, 69, 70, 71, 72, 73, 74, - 75, 0, 76, 77, 78, 79, 0, 80, 0, 81, 0, 82, 0, 0, 83, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 84, 0, 0, 0, 0, 0, 0, 0, - 0, 85, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 22, 87, 22, 22, 22, 22, 22, 65, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 88, 0, 89, - 90, 22, 22, 91, 92, 22, 22, 22, 22, 93, 22, 22, 22, 22, 22, 22, - 94, 22, 95, 92, 22, 22, 22, 22, 91, 22, 22, 96, 22, 22, 65, 22, - 22, 91, 22, 22, 97, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 91, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 0, 0, 0, 0, - 22, 22, 22, 22, 22, 22, 22, 22, 98, 22, 22, 22, 99, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 22, 98, 0, 0, 0, 0, 0, 0, 0, 0, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 65, 0, 0, 0, 0, 0, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 100, 0, 22, 22, 101, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 103, 104, 104, 104, 104, 104, 105, 106, 106, 106, 106, 107, 108, 109, 110, 77, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 98, 0, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 112, - 113, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, - 115, 19, 116, 19, 19, 19, 34, 19, 117, 118, 119, 0, 0, 0, 0, 0, - 112, 22, 22, 89, 120, 113, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 22, 22, 121, 122, 22, 22, 22, 123, 22, 65, 22, 22, 124, 65, 22, 125, - 22, 22, 22, 91, 126, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 127, - 22, 22, 22, 95, 128, 22, 129, 130, 0, 131, 114, 0, 0, 0, 0, 132, - 22, 22, 22, 22, 22, 0, 0, 0, 22, 22, 22, 22, 133, 112, 85, 134, - 0, 91, 129, 135, 89, 91, 0, 0, 22, 113, 0, 0, 111, 0, 0, 0, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 95, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 116, -}; - -static RE_UINT8 re_east_asian_width_stage_4[] = { - 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 7, 0, 10, 0, 0, 11, 12, 11, 13, 14, 10, 9, 14, - 8, 12, 9, 5, 15, 0, 0, 0, 16, 0, 12, 0, 0, 13, 12, 0, - 17, 0, 11, 12, 9, 11, 7, 15, 13, 0, 0, 0, 0, 0, 0, 10, - 5, 5, 5, 11, 0, 18, 17, 15, 11, 0, 7, 16, 7, 7, 7, 7, - 17, 7, 7, 7, 19, 7, 14, 0, 20, 20, 20, 20, 18, 9, 14, 14, - 9, 7, 0, 0, 8, 15, 12, 10, 0, 11, 0, 12, 17, 11, 0, 0, - 0, 0, 21, 11, 12, 15, 15, 0, 12, 10, 0, 0, 22, 10, 12, 0, - 12, 11, 12, 9, 7, 7, 7, 0, 7, 7, 14, 0, 0, 0, 15, 0, - 0, 0, 14, 0, 10, 11, 0, 0, 0, 12, 0, 0, 8, 12, 18, 12, - 15, 15, 10, 17, 18, 16, 7, 5, 0, 7, 0, 14, 0, 0, 11, 11, - 10, 0, 0, 0, 14, 7, 13, 13, 13, 13, 0, 0, 0, 15, 15, 0, - 0, 15, 0, 0, 0, 0, 0, 12, 10, 0, 23, 0, 0, 0, 24, 0, - 0, 0, 25, 26, 27, 0, 0, 0, 7, 7, 19, 7, 7, 0, 0, 0, - 13, 14, 0, 0, 13, 13, 0, 14, 14, 13, 18, 13, 14, 0, 0, 0, - 13, 14, 0, 12, 0, 0, 0, 24, 0, 22, 15, 13, 0, 28, 0, 5, - 5, 0, 20, 20, 20, 0, 0, 0, 19, 19, 9, 19, 0, 0, 0, 29, - 29, 0, 0, 13, 30, 0, 23, 0, 0, 0, 0, 31, 0, 32, 7, 33, - 7, 34, 7, 7, 19, 0, 33, 7, 35, 36, 33, 36, 0, 30, 23, 0, - 0, 0, 26, 0, 0, 0, 0, 15, 0, 0, 0, 37, 29, 38, 0, 0, - 0, 13, 7, 7, 0, 25, 0, 0, 26, 0, 0, 29, 0, 39, 1, 40, - 0, 41, 0, 0, 0, 0, 29, 26, 26, 42, 14, 0, 20, 20, 38, 20, - 20, 28, 0, 0, 20, 20, 20, 0, 43, 20, 20, 20, 20, 20, 20, 44, - 25, 20, 20, 20, 20, 44, 25, 20, 0, 25, 20, 20, 20, 20, 20, 28, - 20, 20, 44, 0, 20, 20, 7, 7, 20, 20, 20, 26, 20, 44, 0, 0, - 20, 20, 28, 0, 44, 20, 20, 20, 20, 44, 20, 0, 45, 46, 46, 46, - 46, 46, 46, 46, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, - 50, 48, 50, 48, 50, 48, 50, 51, 46, 52, 48, 49, 26, 0, 0, 0, - 44, 0, 0, 0, 28, 0, 0, 0, 0, 26, 0, 0, 7, 7, 9, 0, - 7, 7, 7, 14, 7, 7, 7, 33, 53, 20, 54, 7, 7, 7, 7, 11, - 20, 20, 26, 0, 26, 0, 0, 25, 20, 38, 20, 20, 20, 20, 20, 55, - 20, 20, 44, 29, 26, 26, 20, 20, 55, 20, 20, 20, 20, 20, 20, 27, - 0, 0, 29, 44, 20, 20, 0, 0, 0, 0, 56, 0, 0, 24, 0, 0, - 0, 0, 29, 20, 20, 28, 0, 26, 0, 44, 0, 0, 27, 20, 20, 44, -}; - -static RE_UINT8 re_east_asian_width_stage_5[] = { - 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 0, 0, 1, 5, 5, - 1, 5, 5, 1, 1, 0, 1, 0, 5, 1, 1, 5, 1, 1, 1, 1, - 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, - 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, - 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, - 3, 3, 3, 3, 0, 2, 0, 0, 0, 1, 1, 0, 0, 0, 3, 3, - 0, 3, 3, 0, 0, 3, 3, 3, 3, 0, 0, 0, 3, 0, 0, 3, - 3, 3, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 3, 3, 1, - 3, 3, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 3, 3, - 1, 3, 1, 1, 3, 0, 3, 0, 3, 3, 0, 3, 0, 0, 5, 5, - 5, 5, 0, 0, 0, 5, 5, 0, 0, 3, 1, 1, 4, 3, 3, 3, - 3, 3, 3, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0, - 4, 4, 4, 0, 1, 3, 3, 3, 3, 3, 3, 1, 3, 0, 3, 3, - 0, 0, 3, 0, -}; - -/* East_Asian_Width: 2052 bytes. */ - -RE_UINT32 re_get_east_asian_width(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_east_asian_width_stage_1[f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_east_asian_width_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_east_asian_width_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_east_asian_width_stage_4[pos + f] << 2; - value = re_east_asian_width_stage_5[pos + code]; - - return value; -} - -/* Joining_Group. */ - -static RE_UINT8 re_joining_group_stage_1[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, -}; - -static RE_UINT8 re_joining_group_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_joining_group_stage_3[] = { - 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_joining_group_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 0, 14, 15, 0, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 23, 24, 0, -}; - -static RE_UINT8 re_joining_group_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 45, 0, 3, 3, 43, 3, 45, 3, 4, 41, 4, 4, 13, 13, 13, 6, - 6, 31, 31, 35, 35, 33, 33, 39, 39, 1, 1, 11, 11, 55, 55, 55, - 0, 9, 29, 19, 22, 24, 26, 16, 43, 45, 45, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 29, - 0, 3, 3, 3, 0, 3, 43, 43, 45, 4, 4, 4, 4, 4, 4, 4, - 4, 13, 13, 13, 13, 13, 13, 13, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 31, 31, 31, 31, 31, 31, 31, 31, 31, 35, 35, 35, 33, 33, 39, - 1, 9, 9, 9, 9, 9, 9, 29, 29, 11, 38, 11, 19, 19, 19, 11, - 11, 11, 11, 11, 11, 22, 22, 22, 22, 26, 26, 26, 26, 56, 21, 13, - 41, 17, 17, 14, 43, 43, 43, 43, 43, 43, 43, 43, 55, 47, 55, 43, - 45, 45, 46, 46, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 31, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 33, 1, 0, 0, 21, - 2, 0, 5, 12, 12, 7, 7, 15, 44, 50, 18, 42, 42, 48, 49, 20, - 23, 25, 27, 36, 10, 8, 28, 32, 34, 30, 7, 37, 40, 5, 12, 7, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 52, 53, - 4, 4, 4, 4, 4, 4, 4, 13, 13, 6, 6, 31, 35, 1, 1, 1, - 9, 9, 11, 11, 11, 24, 24, 26, 26, 26, 22, 31, 31, 35, 13, 13, - 35, 31, 13, 3, 3, 55, 55, 45, 43, 43, 54, 54, 13, 35, 35, 19, - 4, 4, 13, 39, 9, 29, 22, 24, 45, 45, 31, 43, 57, 0, 6, 33, - 11, 58, 31, 1, 19, 0, 4, 4, 4, 31, 45, 86, 87, 88, 0, 0, - 59, 61, 61, 65, 65, 62, 0, 83, 0, 85, 85, 0, 0, 66, 80, 84, - 68, 68, 68, 69, 63, 81, 70, 71, 77, 60, 60, 73, 73, 76, 74, 74, - 74, 75, 0, 0, 78, 0, 0, 0, 0, 0, 0, 72, 64, 79, 82, 67, -}; - -/* Joining_Group: 586 bytes. */ - -RE_UINT32 re_get_joining_group(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_joining_group_stage_1[f] << 3; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_joining_group_stage_2[pos + f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_joining_group_stage_3[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_joining_group_stage_4[pos + f] << 4; - value = re_joining_group_stage_5[pos + code]; - - return value; -} - -/* Joining_Type. */ - -static RE_UINT8 re_joining_type_stage_1[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 6, - 7, 8, 4, 4, 4, 4, 9, 4, 4, 4, 4, 10, 4, 11, 12, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 13, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_joining_type_stage_2[] = { - 0, 1, 0, 0, 0, 0, 2, 0, 0, 3, 0, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 0, 0, 0, 0, 27, 0, 0, 0, 0, 0, 0, 0, 28, 29, - 30, 31, 32, 0, 33, 34, 35, 36, 37, 38, 0, 39, 0, 0, 0, 0, - 40, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 43, 44, 0, 0, 0, 0, - 45, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 48, 0, 0, - 49, 50, 51, 52, 53, 54, 0, 55, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, 0, 0, 57, 43, 0, 58, - 0, 0, 0, 59, 0, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 62, 63, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, - 65, 66, 67, 68, 69, 70, 71, 0, 72, 73, 0, 74, 75, 76, 77, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 78, 79, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 80, 81, 0, 0, 0, 0, 0, 0, 0, 0, 82, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 0, 0, 0, 0, 0, 0, - 0, 0, 84, 85, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 87, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 90, 91, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 92, 0, 93, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_joining_type_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 4, 2, 5, 6, 0, 0, 0, 0, 7, 8, 9, 10, 2, 11, 12, - 13, 14, 15, 15, 16, 17, 18, 19, 20, 21, 22, 2, 23, 24, 25, 26, - 0, 0, 27, 28, 29, 15, 30, 31, 0, 32, 33, 0, 34, 35, 0, 0, - 0, 0, 36, 37, 0, 38, 39, 2, 40, 0, 0, 41, 42, 43, 44, 0, - 45, 0, 0, 46, 47, 0, 44, 0, 48, 0, 0, 46, 49, 45, 0, 50, - 48, 0, 0, 46, 51, 0, 44, 0, 45, 0, 0, 52, 47, 53, 44, 0, - 54, 0, 0, 0, 55, 0, 0, 0, 28, 0, 0, 56, 57, 58, 44, 0, - 45, 0, 0, 52, 59, 0, 44, 0, 45, 0, 0, 0, 47, 0, 44, 0, - 0, 0, 0, 0, 60, 61, 0, 0, 0, 0, 0, 62, 63, 0, 0, 0, - 0, 0, 0, 64, 65, 0, 0, 0, 0, 66, 0, 67, 0, 0, 0, 68, - 69, 70, 2, 71, 53, 0, 0, 0, 0, 0, 72, 73, 0, 74, 28, 75, - 76, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 0, 0, - 0, 77, 0, 77, 0, 44, 0, 44, 0, 0, 0, 78, 79, 80, 0, 0, - 81, 0, 15, 15, 15, 15, 15, 82, 83, 15, 84, 0, 0, 0, 0, 0, - 0, 0, 85, 86, 0, 0, 0, 0, 0, 87, 0, 0, 0, 88, 89, 90, - 0, 0, 0, 91, 0, 0, 0, 0, 92, 0, 0, 93, 54, 0, 94, 92, - 95, 0, 96, 0, 0, 0, 97, 95, 0, 0, 98, 99, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 100, 101, 102, 0, 0, 0, 0, 2, 2, 2, 103, - 104, 0, 105, 0, 0, 0, 106, 0, 0, 0, 0, 0, 0, 2, 2, 28, - 0, 0, 0, 0, 0, 0, 20, 95, 0, 0, 0, 0, 0, 0, 0, 20, - 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 107, 0, 0, 0, 0, 0, - 0, 108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 109, - 0, 56, 0, 0, 0, 0, 0, 95, 110, 0, 58, 0, 15, 15, 15, 111, - 0, 0, 0, 0, 112, 0, 2, 95, 0, 0, 113, 0, 114, 95, 0, 0, - 40, 0, 0, 115, 0, 0, 116, 0, 0, 0, 117, 118, 119, 0, 0, 46, - 0, 0, 0, 120, 45, 0, 121, 53, 0, 0, 0, 0, 0, 0, 122, 0, - 0, 123, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 125, - 126, 0, 0, 127, 0, 0, 0, 0, 0, 0, 0, 0, 128, 129, 130, 0, - 131, 132, 133, 0, 0, 0, 0, 0, 45, 0, 0, 134, 135, 0, 0, 20, - 95, 0, 0, 136, 0, 0, 0, 0, 40, 0, 137, 138, 0, 0, 0, 139, - 95, 0, 0, 140, 141, 0, 0, 0, 0, 0, 20, 142, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 20, 143, 0, 95, 0, 0, 46, 28, 0, 144, 138, - 0, 0, 0, 134, 61, 0, 0, 0, 0, 0, 0, 145, 146, 0, 0, 0, - 0, 0, 0, 147, 28, 121, 0, 0, 0, 0, 0, 148, 28, 0, 0, 0, - 0, 0, 149, 150, 0, 0, 0, 0, 0, 72, 151, 0, 0, 0, 0, 0, - 0, 0, 0, 152, 0, 0, 0, 0, 0, 153, 154, 155, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 138, 0, 0, 0, 135, 0, 0, 0, 0, - 20, 40, 0, 0, 0, 0, 0, 0, 0, 156, 92, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 157, 158, 159, 0, 107, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 77, 0, 0, 0, 2, 2, 2, 160, 2, 2, 71, 116, - 161, 94, 4, 0, 0, 0, 0, 0, 162, 163, 164, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 135, 0, 0, 15, 15, 15, 15, 165, 0, 0, 0, - 45, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_joining_type_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 2, 4, 0, - 5, 2, 2, 2, 2, 2, 2, 6, 7, 6, 0, 0, 2, 2, 8, 9, - 10, 11, 12, 13, 14, 15, 15, 15, 16, 15, 17, 2, 0, 0, 0, 18, - 19, 20, 15, 15, 15, 15, 21, 21, 21, 21, 22, 15, 15, 15, 15, 15, - 23, 21, 21, 24, 25, 26, 2, 27, 2, 27, 28, 29, 0, 0, 18, 30, - 0, 0, 0, 3, 31, 32, 22, 33, 15, 15, 34, 23, 2, 2, 8, 35, - 15, 15, 32, 15, 15, 15, 13, 36, 24, 36, 22, 15, 0, 37, 2, 2, - 9, 0, 0, 0, 0, 0, 18, 15, 15, 15, 38, 2, 2, 0, 39, 0, - 0, 37, 6, 2, 2, 5, 5, 4, 36, 25, 12, 15, 15, 40, 5, 0, - 15, 15, 25, 41, 42, 43, 12, 44, 0, 2, 2, 2, 6, 2, 2, 2, - 8, 0, 0, 0, 0, 0, 45, 9, 5, 2, 9, 1, 5, 2, 0, 0, - 37, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 9, 5, 9, 0, 1, - 7, 0, 0, 0, 7, 3, 27, 4, 4, 1, 0, 0, 5, 6, 9, 1, - 0, 0, 0, 27, 0, 45, 0, 0, 45, 0, 0, 0, 9, 0, 0, 1, - 0, 0, 0, 37, 9, 37, 28, 4, 0, 7, 0, 0, 0, 45, 0, 4, - 0, 0, 45, 0, 37, 46, 0, 0, 1, 2, 8, 0, 0, 3, 2, 8, - 1, 2, 6, 9, 0, 0, 2, 4, 0, 0, 4, 0, 0, 47, 1, 0, - 5, 2, 2, 8, 2, 28, 0, 5, 2, 2, 5, 2, 2, 2, 2, 9, - 0, 0, 0, 5, 28, 2, 7, 7, 0, 0, 4, 37, 5, 9, 0, 0, - 45, 7, 0, 1, 37, 9, 0, 0, 0, 6, 2, 4, 0, 45, 5, 2, - 2, 0, 0, 1, 0, 48, 49, 4, 15, 15, 0, 0, 0, 50, 15, 15, - 15, 15, 51, 0, 8, 3, 9, 0, 45, 0, 5, 0, 0, 3, 27, 0, - 0, 45, 2, 8, 46, 5, 2, 9, 3, 2, 2, 27, 2, 2, 2, 8, - 2, 0, 0, 0, 0, 28, 8, 9, 0, 0, 3, 2, 4, 0, 0, 0, - 37, 4, 6, 4, 0, 45, 4, 47, 0, 0, 0, 2, 2, 37, 0, 0, - 8, 2, 2, 2, 28, 2, 9, 1, 0, 9, 4, 0, 2, 4, 3, 2, - 0, 0, 3, 52, 0, 0, 37, 8, 2, 9, 37, 2, 0, 0, 37, 4, - 0, 0, 7, 0, 8, 2, 2, 4, 45, 45, 3, 0, 53, 0, 0, 0, - 0, 4, 0, 0, 0, 37, 2, 4, 0, 3, 2, 2, 3, 37, 4, 9, - 0, 1, 0, 0, 0, 0, 5, 8, 7, 7, 0, 0, 3, 0, 0, 9, - 28, 27, 9, 37, 0, 0, 0, 4, 0, 1, 9, 1, 0, 0, 0, 45, - 0, 0, 5, 0, 0, 37, 8, 0, 5, 7, 0, 2, 0, 0, 8, 3, - 15, 54, 55, 56, 14, 57, 15, 12, 58, 59, 48, 13, 24, 22, 12, 60, - 58, 0, 0, 0, 0, 0, 20, 61, 0, 0, 2, 2, 2, 8, 0, 0, - 3, 8, 7, 1, 0, 3, 2, 5, 2, 9, 0, 0, 3, 0, 0, 0, - 0, 37, 2, 8, 0, 0, 37, 9, 4, 28, 0, 45, 3, 2, 8, 0, - 0, 37, 2, 9, 3, 2, 46, 3, 28, 0, 0, 0, 37, 4, 0, 6, - 3, 2, 8, 47, 0, 0, 3, 1, 2, 6, 0, 0, 37, 6, 2, 0, - 2, 8, 2, 6, 37, 2, 2, 2, 2, 2, 37, 2, 28, 7, 0, 0, - 0, 0, 0, 7, 0, 3, 4, 0, 3, 2, 2, 2, 8, 5, 2, 0, - 2, 8, 3, 2, 0, 9, 0, 0, 2, 8, 2, 2, 2, 2, 27, 2, - 6, 28, 8, 0, 15, 2, 8, 0, -}; - -static RE_UINT8 re_joining_type_stage_5[] = { - 0, 0, 0, 0, 0, 5, 0, 0, 5, 5, 5, 5, 0, 0, 0, 5, - 5, 5, 0, 0, 0, 5, 5, 5, 5, 5, 0, 5, 0, 5, 5, 0, - 5, 5, 5, 0, 5, 0, 0, 0, 2, 0, 3, 3, 3, 3, 2, 3, - 2, 3, 2, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 3, 2, 2, 5, 0, 0, 2, 2, 5, 3, 3, 3, - 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, - 2, 3, 2, 3, 2, 2, 3, 3, 0, 3, 5, 5, 5, 0, 0, 5, - 5, 0, 5, 5, 5, 5, 3, 3, 2, 0, 0, 2, 3, 5, 2, 2, - 2, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 2, 0, 3, 2, 2, - 3, 2, 2, 2, 0, 0, 5, 5, 2, 2, 2, 5, 0, 0, 1, 0, - 3, 2, 0, 0, 3, 0, 3, 2, 2, 3, 3, 2, 2, 0, 2, 2, - 2, 2, 0, 0, 0, 0, 5, 0, 5, 0, 5, 0, 0, 5, 0, 5, - 0, 0, 0, 2, 0, 0, 1, 5, 0, 5, 5, 2, 2, 5, 2, 0, - 0, 1, 5, 5, 2, 2, 4, 0, 2, 3, 0, 3, 0, 3, 3, 0, - 0, 4, 3, 3, 2, 2, 2, 4, 2, 3, 0, 0, 3, 5, 5, 0, - 3, 2, 3, 3, 3, 2, 2, 0, -}; - -/* Joining_Type: 2384 bytes. */ - -RE_UINT32 re_get_joining_type(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_joining_type_stage_1[f] << 5; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_joining_type_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_joining_type_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_joining_type_stage_4[pos + f] << 2; - value = re_joining_type_stage_5[pos + code]; - - return value; -} - -/* Line_Break. */ - -static RE_UINT8 re_line_break_stage_1[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 10, 17, 5, 18, 10, 10, 19, 10, 20, 21, 22, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 23, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 23, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 24, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, -}; - -static RE_UINT8 re_line_break_stage_2[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 2, 2, 2, 2, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 2, 51, 2, 2, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 2, 2, 2, 70, 2, 2, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, - 81, 82, 83, 84, 85, 86, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 87, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 88, 79, 79, 79, 79, 79, 79, 79, 79, 89, 2, 2, 90, 91, 2, 92, - 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 101, - 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, - 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, - 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, - 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, - 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 108, - 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 79, 79, 79, 79, 111, 112, 2, 2, 113, 114, 115, 116, 117, 118, - 119, 120, 121, 122, 110, 123, 124, 125, 2, 126, 127, 110, 2, 2, 128, 110, - 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 110, 110, 139, 110, 110, 110, - 140, 141, 142, 143, 144, 145, 146, 110, 147, 148, 110, 149, 150, 151, 152, 110, - 110, 153, 110, 110, 110, 154, 110, 110, 155, 156, 110, 110, 110, 110, 110, 110, - 2, 2, 2, 2, 2, 2, 2, 157, 158, 2, 159, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 2, 2, 2, 2, 160, 161, 162, 2, 163, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 2, 2, 2, 164, 165, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 2, 2, 2, 2, 166, 167, 168, 169, 110, 110, 110, 110, 110, 110, 170, 171, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 172, - 79, 79, 79, 79, 79, 173, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 174, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 175, 176, 110, 110, 110, 110, 110, 110, - 2, 177, 178, 179, 180, 110, 181, 110, 182, 183, 184, 2, 2, 185, 2, 186, - 2, 2, 2, 2, 187, 188, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 189, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 2, 190, 191, 110, 110, 110, 110, 110, 110, 110, 110, 110, 192, 193, 110, 110, - 79, 79, 194, 195, 79, 79, 79, 196, 197, 198, 199, 200, 201, 202, 203, 204, - 205, 206, 207, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 208, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 208, - 209, 110, 210, 211, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, -}; - -static RE_UINT16 re_line_break_stage_3[] = { - 0, 1, 2, 3, 4, 5, 4, 6, 7, 1, 8, 9, 4, 10, 4, 10, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11, 12, 4, 4, - 1, 1, 1, 1, 13, 14, 15, 16, 17, 4, 18, 4, 4, 4, 4, 4, - 19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 4, 21, 20, 4, - 22, 23, 1, 24, 25, 26, 27, 28, 29, 30, 4, 4, 31, 1, 32, 33, - 4, 4, 4, 4, 4, 34, 35, 36, 37, 38, 4, 1, 39, 4, 4, 4, - 4, 4, 40, 41, 36, 4, 31, 42, 4, 43, 44, 45, 4, 46, 47, 47, - 47, 47, 4, 48, 47, 49, 50, 1, 51, 4, 4, 52, 1, 53, 54, 4, - 55, 56, 57, 58, 59, 60, 61, 62, 63, 56, 57, 64, 65, 66, 67, 68, - 69, 18, 57, 70, 71, 72, 61, 73, 74, 56, 57, 70, 75, 76, 61, 77, - 78, 79, 80, 81, 82, 83, 67, 84, 85, 86, 57, 87, 88, 89, 61, 90, - 91, 86, 57, 92, 88, 93, 61, 94, 95, 86, 4, 96, 97, 98, 61, 99, - 100, 101, 4, 102, 103, 104, 67, 105, 106, 107, 107, 108, 109, 110, 47, 47, - 111, 112, 113, 114, 115, 116, 47, 47, 117, 118, 36, 119, 120, 4, 121, 122, - 123, 124, 1, 125, 126, 127, 47, 47, 107, 107, 107, 107, 128, 107, 107, 107, - 107, 129, 4, 4, 130, 4, 4, 4, 131, 131, 131, 131, 131, 131, 132, 132, - 132, 132, 133, 134, 134, 134, 134, 134, 4, 4, 4, 4, 135, 136, 4, 4, - 135, 4, 4, 137, 138, 139, 4, 4, 4, 138, 4, 4, 4, 140, 141, 121, - 4, 142, 4, 4, 4, 4, 4, 143, 144, 4, 4, 4, 4, 4, 4, 4, - 144, 145, 4, 4, 4, 4, 146, 147, 148, 149, 4, 150, 4, 151, 148, 152, - 107, 107, 107, 107, 107, 153, 154, 142, 155, 154, 4, 4, 4, 4, 4, 77, - 156, 4, 157, 4, 4, 4, 4, 158, 4, 45, 159, 159, 160, 107, 161, 162, - 107, 107, 163, 107, 164, 165, 4, 4, 4, 166, 107, 107, 107, 167, 107, 168, - 154, 154, 161, 169, 47, 47, 47, 47, 170, 4, 4, 171, 172, 173, 174, 175, - 176, 4, 177, 36, 4, 4, 40, 178, 4, 4, 171, 179, 180, 36, 4, 181, - 147, 47, 47, 47, 77, 182, 183, 184, 4, 4, 4, 4, 1, 1, 1, 185, - 4, 143, 4, 4, 143, 186, 4, 187, 4, 4, 4, 188, 188, 189, 4, 190, - 191, 192, 193, 194, 195, 196, 197, 198, 199, 121, 200, 201, 202, 1, 1, 203, - 204, 205, 206, 4, 4, 207, 208, 209, 210, 209, 4, 4, 4, 211, 4, 4, - 212, 213, 214, 215, 216, 217, 218, 4, 219, 220, 221, 222, 4, 4, 223, 4, - 224, 225, 226, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 227, - 4, 4, 228, 47, 229, 47, 230, 230, 230, 230, 230, 230, 230, 230, 230, 231, - 230, 230, 230, 230, 208, 230, 230, 232, 230, 233, 234, 235, 236, 237, 238, 4, - 239, 240, 4, 241, 242, 4, 243, 244, 4, 245, 4, 246, 247, 248, 249, 250, - 251, 4, 4, 4, 4, 252, 253, 254, 230, 255, 4, 4, 256, 4, 257, 4, - 258, 259, 4, 4, 4, 224, 4, 260, 4, 4, 4, 4, 4, 261, 4, 262, - 4, 263, 4, 264, 57, 265, 266, 47, 4, 4, 45, 4, 4, 45, 4, 4, - 4, 4, 4, 4, 4, 4, 267, 268, 4, 4, 130, 4, 4, 4, 269, 270, - 4, 228, 271, 271, 271, 271, 1, 1, 272, 273, 274, 275, 276, 47, 47, 47, - 277, 278, 277, 277, 277, 277, 277, 279, 277, 277, 277, 277, 277, 277, 277, 277, - 277, 277, 277, 277, 277, 280, 47, 281, 282, 283, 284, 285, 286, 277, 287, 277, - 288, 289, 290, 277, 287, 277, 288, 291, 292, 277, 293, 294, 277, 277, 277, 277, - 295, 277, 277, 296, 277, 277, 279, 297, 277, 295, 277, 277, 298, 277, 277, 277, - 277, 277, 277, 277, 277, 277, 277, 295, 277, 277, 277, 277, 4, 4, 4, 4, - 277, 299, 277, 277, 277, 277, 277, 277, 300, 277, 277, 277, 301, 4, 4, 181, - 302, 4, 303, 47, 4, 4, 267, 304, 4, 305, 4, 4, 4, 4, 4, 306, - 4, 4, 45, 77, 47, 47, 47, 307, 308, 4, 309, 310, 4, 4, 4, 311, - 312, 4, 4, 171, 313, 154, 1, 314, 36, 4, 315, 4, 316, 317, 131, 318, - 51, 4, 4, 319, 320, 321, 107, 322, 4, 4, 323, 324, 325, 326, 107, 107, - 107, 107, 107, 107, 327, 328, 31, 329, 330, 331, 271, 4, 4, 4, 158, 4, - 4, 4, 4, 4, 4, 4, 332, 154, 333, 334, 335, 336, 335, 337, 335, 333, - 334, 335, 336, 335, 337, 335, 333, 334, 335, 336, 335, 337, 335, 333, 334, 335, - 336, 335, 337, 335, 333, 334, 335, 336, 335, 337, 335, 333, 334, 335, 336, 335, - 337, 335, 333, 334, 335, 336, 335, 337, 335, 333, 334, 335, 336, 335, 337, 335, - 336, 335, 338, 132, 339, 134, 134, 340, 341, 341, 341, 341, 341, 341, 341, 341, - 47, 47, 47, 47, 47, 47, 47, 47, 228, 342, 343, 344, 345, 4, 4, 4, - 4, 4, 4, 4, 265, 346, 4, 4, 4, 4, 4, 347, 47, 4, 4, 4, - 4, 348, 4, 4, 77, 47, 47, 349, 1, 350, 1, 351, 352, 353, 354, 188, - 4, 4, 4, 4, 4, 4, 4, 355, 356, 357, 277, 358, 277, 359, 360, 361, - 277, 362, 277, 295, 363, 364, 365, 366, 367, 4, 139, 368, 187, 187, 47, 47, - 4, 4, 4, 4, 4, 4, 4, 229, 369, 4, 4, 370, 4, 4, 4, 4, - 45, 371, 72, 47, 47, 4, 4, 372, 4, 121, 4, 4, 4, 72, 33, 371, - 4, 4, 373, 4, 229, 4, 4, 374, 4, 375, 4, 4, 376, 377, 47, 47, - 4, 187, 154, 4, 4, 376, 4, 371, 4, 4, 77, 4, 4, 4, 378, 47, - 4, 4, 4, 228, 4, 158, 77, 47, 379, 4, 4, 380, 4, 381, 4, 4, - 4, 45, 307, 47, 47, 47, 4, 382, 4, 383, 4, 384, 47, 47, 47, 47, - 4, 4, 4, 385, 4, 348, 4, 4, 386, 387, 4, 388, 77, 389, 4, 4, - 4, 4, 47, 47, 4, 4, 390, 391, 4, 4, 4, 392, 4, 263, 4, 393, - 4, 394, 395, 47, 47, 47, 47, 47, 4, 4, 4, 4, 147, 47, 47, 47, - 4, 4, 4, 396, 4, 4, 4, 397, 47, 47, 47, 47, 47, 47, 4, 45, - 176, 4, 4, 398, 399, 348, 400, 401, 176, 4, 4, 402, 403, 4, 147, 154, - 176, 4, 316, 404, 405, 4, 4, 406, 176, 4, 4, 319, 407, 408, 20, 409, - 4, 18, 410, 411, 47, 47, 47, 47, 412, 37, 413, 4, 4, 267, 414, 154, - 415, 56, 57, 70, 75, 416, 417, 418, 4, 4, 4, 419, 420, 421, 47, 47, - 4, 4, 4, 1, 422, 154, 47, 47, 4, 4, 267, 423, 424, 425, 47, 47, - 4, 4, 4, 1, 426, 154, 427, 47, 4, 4, 31, 428, 154, 47, 47, 47, - 107, 429, 163, 430, 47, 47, 47, 47, 47, 47, 4, 4, 4, 4, 36, 431, - 47, 47, 47, 47, 4, 4, 4, 147, 57, 4, 267, 432, 433, 36, 121, 434, - 4, 435, 124, 324, 47, 47, 47, 47, 4, 142, 47, 47, 47, 47, 47, 47, - 4, 4, 4, 4, 4, 4, 45, 436, 4, 4, 4, 4, 373, 47, 47, 47, - 4, 4, 4, 4, 4, 437, 4, 4, 438, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 439, 4, 4, 45, 47, 47, 47, 47, 47, - 4, 4, 4, 4, 440, 4, 4, 4, 4, 4, 4, 4, 228, 47, 47, 47, - 4, 4, 4, 147, 4, 45, 441, 47, 47, 47, 47, 47, 47, 4, 187, 442, - 4, 4, 4, 443, 444, 445, 18, 446, 4, 47, 47, 47, 47, 47, 47, 47, - 4, 4, 4, 4, 409, 447, 1, 169, 401, 176, 47, 47, 47, 47, 448, 47, - 277, 277, 277, 277, 277, 277, 300, 47, 277, 277, 277, 277, 277, 277, 277, 449, - 450, 47, 47, 47, 47, 47, 47, 47, 4, 4, 4, 4, 4, 4, 229, 121, - 147, 451, 452, 47, 47, 47, 47, 47, 4, 4, 4, 4, 4, 4, 4, 158, - 4, 4, 21, 4, 4, 4, 453, 1, 454, 4, 455, 4, 4, 4, 147, 47, - 4, 4, 4, 4, 456, 47, 47, 47, 4, 4, 4, 4, 4, 228, 4, 265, - 4, 4, 4, 4, 4, 188, 4, 4, 4, 148, 457, 458, 459, 4, 4, 4, - 460, 461, 4, 462, 463, 86, 4, 4, 4, 4, 263, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 464, 465, 465, 465, 1, 1, 1, 466, 1, 1, 467, 468, - 469, 470, 23, 47, 47, 47, 47, 47, 432, 471, 472, 47, 47, 47, 47, 47, - 4, 4, 4, 4, 473, 324, 47, 47, 4, 4, 4, 4, 474, 475, 47, 47, - 459, 4, 476, 477, 478, 479, 480, 481, 482, 371, 483, 371, 47, 47, 47, 265, - 484, 230, 485, 230, 230, 230, 486, 230, 230, 230, 484, 277, 277, 277, 487, 488, - 489, 490, 277, 491, 492, 277, 277, 493, 277, 277, 277, 277, 494, 495, 496, 497, - 498, 277, 499, 500, 277, 277, 277, 277, 501, 502, 503, 504, 505, 277, 277, 506, - 277, 507, 277, 277, 277, 508, 277, 509, 277, 277, 277, 277, 510, 4, 4, 511, - 277, 277, 512, 513, 495, 277, 277, 277, 4, 4, 4, 4, 4, 4, 4, 514, - 4, 4, 4, 4, 4, 503, 277, 277, 515, 4, 4, 4, 516, 505, 4, 4, - 516, 4, 517, 277, 277, 277, 277, 277, 277, 518, 519, 520, 277, 277, 277, 277, - 277, 277, 277, 277, 277, 277, 277, 293, 521, 47, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47, -}; - -static RE_UINT8 re_line_break_stage_4[] = { - 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12, 12, 12, 13, 14, 15, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16, 17, 14, - 14, 14, 14, 14, 14, 16, 18, 19, 0, 0, 20, 0, 0, 0, 0, 0, - 21, 22, 23, 24, 25, 26, 27, 14, 22, 28, 29, 28, 28, 26, 28, 30, - 14, 14, 14, 24, 14, 14, 14, 14, 14, 14, 14, 24, 31, 28, 31, 14, - 25, 14, 14, 14, 28, 28, 24, 32, 0, 0, 0, 0, 0, 0, 0, 33, - 0, 0, 0, 0, 0, 0, 34, 34, 34, 35, 0, 0, 0, 0, 0, 0, - 14, 14, 14, 14, 36, 14, 14, 37, 36, 36, 14, 14, 14, 38, 38, 14, - 14, 39, 14, 14, 14, 14, 14, 14, 14, 19, 0, 0, 0, 14, 14, 14, - 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 38, 39, 14, 14, 14, - 14, 14, 14, 14, 40, 41, 39, 9, 42, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 43, 19, 44, 0, 45, 36, 36, 36, 36, - 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 36, 36, - 46, 48, 38, 36, 36, 36, 36, 36, 14, 14, 14, 14, 49, 50, 13, 14, - 0, 0, 0, 0, 0, 51, 52, 53, 14, 14, 14, 14, 14, 19, 0, 0, - 12, 12, 12, 12, 12, 54, 55, 14, 44, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 56, 0, 0, 0, 44, 19, 0, 0, 44, 19, 44, 0, 0, 14, - 12, 12, 12, 12, 12, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 39, - 19, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 52, 39, 14, - 14, 14, 14, 0, 0, 0, 0, 0, 44, 36, 36, 36, 36, 36, 36, 36, - 0, 0, 14, 14, 57, 38, 36, 36, 14, 14, 14, 0, 0, 19, 0, 0, - 0, 0, 19, 0, 19, 0, 0, 36, 14, 14, 14, 14, 14, 14, 14, 38, - 14, 14, 14, 14, 19, 0, 36, 38, 36, 36, 36, 36, 36, 36, 36, 36, - 14, 14, 38, 14, 14, 14, 14, 36, 36, 36, 0, 0, 0, 0, 0, 0, - 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 0, 44, 0, 19, 0, 0, 0, 14, 14, 14, 14, - 14, 0, 58, 12, 12, 12, 12, 12, 19, 0, 39, 14, 14, 14, 38, 39, - 38, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, - 38, 38, 36, 14, 14, 36, 44, 0, 0, 0, 52, 42, 52, 42, 0, 38, - 36, 36, 36, 42, 36, 36, 14, 39, 14, 0, 36, 12, 12, 12, 12, 12, - 14, 50, 14, 14, 49, 9, 36, 36, 42, 0, 39, 14, 14, 38, 36, 39, - 38, 14, 39, 38, 14, 36, 52, 0, 0, 52, 36, 42, 52, 42, 0, 36, - 42, 36, 36, 36, 39, 14, 38, 38, 36, 36, 36, 12, 12, 12, 12, 12, - 0, 14, 19, 36, 36, 36, 36, 36, 42, 0, 39, 14, 14, 14, 14, 39, - 38, 14, 39, 14, 14, 36, 44, 0, 0, 0, 0, 42, 0, 42, 0, 36, - 38, 36, 36, 36, 36, 36, 36, 36, 9, 36, 36, 36, 39, 36, 36, 36, - 42, 0, 39, 14, 14, 14, 38, 39, 0, 0, 52, 42, 52, 42, 0, 36, - 36, 36, 36, 0, 36, 36, 14, 39, 14, 14, 14, 14, 36, 36, 36, 36, - 36, 44, 39, 14, 14, 38, 36, 14, 38, 14, 14, 36, 39, 38, 38, 14, - 36, 39, 38, 36, 14, 38, 36, 14, 14, 14, 14, 14, 14, 36, 36, 0, - 0, 52, 36, 0, 52, 0, 0, 36, 38, 36, 36, 42, 36, 36, 36, 36, - 14, 14, 14, 14, 9, 38, 36, 36, 0, 0, 39, 14, 14, 14, 38, 14, - 38, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 39, 0, - 0, 0, 52, 0, 52, 0, 0, 36, 36, 36, 42, 52, 14, 38, 36, 36, - 36, 36, 36, 36, 14, 14, 14, 14, 19, 0, 39, 14, 14, 14, 38, 14, - 14, 14, 39, 14, 14, 36, 44, 0, 36, 36, 42, 52, 36, 36, 36, 38, - 39, 38, 36, 36, 36, 36, 36, 36, 42, 0, 39, 14, 14, 14, 38, 14, - 14, 14, 14, 14, 14, 38, 39, 0, 0, 0, 52, 0, 52, 0, 0, 14, - 36, 36, 14, 19, 14, 14, 14, 14, 14, 14, 14, 14, 49, 14, 14, 14, - 36, 0, 39, 14, 14, 14, 14, 14, 14, 14, 14, 38, 36, 14, 14, 14, - 14, 39, 14, 14, 14, 14, 39, 36, 14, 14, 14, 38, 36, 52, 36, 42, - 0, 0, 52, 52, 0, 0, 0, 0, 36, 0, 38, 36, 36, 36, 36, 36, - 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 60, 61, 36, 62, 60, 60, 60, 60, 60, 60, 60, 63, - 12, 12, 12, 12, 12, 58, 36, 36, 59, 61, 61, 59, 61, 61, 59, 36, - 36, 36, 60, 60, 59, 60, 60, 60, 59, 60, 59, 59, 36, 60, 59, 60, - 60, 60, 60, 60, 60, 59, 60, 36, 60, 60, 61, 61, 60, 60, 60, 36, - 12, 12, 12, 12, 12, 36, 60, 60, 32, 64, 29, 64, 65, 66, 67, 53, - 53, 68, 56, 14, 0, 14, 14, 14, 14, 14, 43, 19, 19, 69, 69, 0, - 14, 14, 14, 14, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 38, 36, - 42, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 14, 14, 19, 0, - 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 58, - 14, 14, 14, 44, 14, 14, 38, 14, 64, 70, 14, 14, 71, 72, 36, 36, - 12, 12, 12, 12, 12, 58, 14, 14, 12, 12, 12, 12, 12, 60, 60, 60, - 14, 14, 14, 39, 36, 36, 39, 36, 73, 73, 73, 73, 73, 73, 73, 73, - 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, 14, 14, 14, 14, 38, 14, 14, 36, - 14, 14, 14, 38, 38, 14, 14, 36, 38, 14, 14, 36, 14, 14, 14, 38, - 38, 14, 14, 36, 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 38, 42, 0, 27, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 36, 36, 36, 14, 14, 14, 36, 14, 14, 14, 36, - 76, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16, 77, 36, - 14, 14, 14, 14, 14, 27, 58, 14, 14, 14, 14, 14, 38, 36, 36, 36, - 14, 14, 14, 14, 14, 14, 38, 14, 14, 0, 52, 36, 36, 36, 36, 36, - 14, 0, 1, 41, 36, 36, 36, 36, 14, 0, 36, 36, 36, 36, 36, 36, - 38, 0, 36, 36, 36, 36, 36, 36, 60, 60, 58, 78, 76, 79, 60, 36, - 12, 12, 12, 12, 12, 36, 36, 36, 14, 53, 58, 29, 53, 19, 0, 72, - 14, 14, 19, 44, 14, 14, 14, 14, 14, 14, 14, 14, 19, 38, 36, 36, - 14, 14, 14, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, 36, 36, - 38, 36, 53, 12, 12, 12, 12, 12, 60, 60, 60, 60, 60, 60, 60, 36, - 60, 60, 61, 36, 36, 36, 36, 36, 60, 60, 60, 60, 60, 60, 36, 36, - 60, 60, 60, 60, 60, 36, 36, 36, 12, 12, 12, 12, 12, 61, 36, 60, - 14, 14, 14, 19, 0, 0, 36, 14, 60, 60, 60, 60, 60, 60, 60, 61, - 60, 60, 60, 60, 60, 60, 61, 42, 0, 0, 0, 0, 0, 0, 0, 52, - 0, 0, 44, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, - 0, 0, 44, 14, 14, 14, 36, 36, 12, 12, 12, 12, 12, 58, 27, 58, - 76, 14, 14, 14, 14, 19, 0, 0, 0, 0, 14, 14, 14, 14, 38, 36, - 0, 44, 14, 14, 14, 14, 14, 14, 19, 0, 0, 0, 0, 0, 0, 14, - 0, 0, 36, 36, 36, 36, 14, 14, 0, 0, 0, 0, 36, 80, 58, 58, - 12, 12, 12, 12, 12, 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 58, - 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 14, 19, 14, - 14, 0, 44, 38, 0, 36, 36, 36, 0, 0, 0, 36, 36, 42, 0, 0, - 14, 14, 14, 14, 39, 39, 39, 39, 14, 14, 14, 14, 14, 14, 14, 36, - 14, 14, 38, 14, 14, 14, 14, 14, 14, 14, 36, 14, 14, 14, 39, 14, - 36, 14, 38, 14, 14, 14, 32, 38, 58, 58, 58, 81, 58, 82, 83, 0, - 81, 58, 84, 25, 85, 86, 85, 86, 28, 14, 87, 88, 89, 0, 0, 33, - 50, 50, 50, 50, 7, 90, 91, 14, 14, 14, 92, 93, 91, 14, 14, 14, - 14, 14, 14, 76, 58, 58, 27, 58, 94, 14, 38, 0, 0, 0, 0, 0, - 14, 36, 25, 14, 14, 14, 16, 95, 24, 28, 25, 14, 14, 14, 16, 77, - 23, 23, 23, 6, 23, 23, 23, 23, 23, 23, 23, 22, 23, 6, 23, 22, - 23, 23, 23, 23, 23, 23, 23, 23, 52, 36, 36, 36, 36, 36, 36, 36, - 14, 49, 24, 14, 49, 14, 14, 14, 14, 24, 14, 96, 14, 14, 14, 14, - 24, 25, 14, 14, 14, 24, 14, 14, 14, 14, 28, 14, 14, 24, 14, 25, - 28, 28, 28, 28, 28, 28, 14, 14, 28, 28, 28, 28, 28, 14, 14, 14, - 14, 14, 14, 14, 24, 14, 36, 36, 14, 25, 25, 14, 14, 14, 14, 14, - 25, 28, 14, 24, 25, 24, 14, 24, 24, 23, 24, 14, 14, 25, 24, 28, - 25, 24, 24, 24, 28, 28, 25, 25, 14, 14, 28, 28, 14, 14, 28, 14, - 14, 14, 14, 14, 25, 14, 25, 14, 14, 25, 14, 14, 14, 14, 14, 14, - 28, 14, 28, 28, 14, 28, 14, 28, 14, 28, 14, 28, 14, 14, 14, 14, - 14, 14, 24, 14, 24, 14, 14, 14, 14, 14, 24, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 24, 14, 14, 14, 14, 14, 14, 14, 97, - 14, 14, 14, 14, 69, 69, 14, 14, 14, 25, 14, 14, 14, 98, 14, 14, - 14, 14, 14, 14, 16, 99, 14, 14, 98, 98, 14, 14, 14, 14, 14, 38, - 14, 14, 14, 38, 36, 36, 36, 36, 14, 14, 14, 14, 14, 38, 36, 36, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 25, - 28, 28, 25, 14, 14, 14, 14, 14, 14, 28, 28, 14, 14, 14, 14, 14, - 28, 24, 28, 28, 28, 14, 14, 14, 14, 28, 14, 28, 14, 14, 28, 14, - 28, 14, 14, 28, 25, 24, 14, 28, 28, 14, 14, 14, 14, 14, 14, 14, - 14, 28, 28, 14, 14, 14, 14, 24, 98, 98, 24, 25, 24, 14, 14, 28, - 14, 14, 98, 28, 100, 98, 101, 98, 14, 14, 14, 14, 102, 98, 14, 14, - 25, 25, 14, 14, 14, 14, 14, 14, 28, 24, 28, 24, 103, 25, 28, 24, - 14, 14, 14, 14, 14, 14, 14, 102, 14, 14, 14, 14, 14, 14, 14, 28, - 14, 14, 14, 14, 14, 14, 102, 98, 98, 98, 98, 98, 103, 28, 104, 102, - 98, 104, 103, 28, 98, 28, 103, 104, 98, 24, 14, 14, 28, 103, 28, 28, - 104, 98, 98, 104, 101, 103, 104, 98, 98, 98, 100, 14, 98, 105, 105, 14, - 14, 14, 14, 24, 14, 7, 85, 85, 5, 53, 100, 14, 69, 69, 69, 69, - 69, 69, 69, 28, 28, 28, 28, 28, 28, 28, 14, 14, 14, 14, 14, 14, - 14, 14, 16, 99, 14, 14, 14, 14, 14, 14, 14, 69, 69, 69, 69, 69, - 14, 16, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 99, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 69, 14, 14, 14, 24, 28, 28, 14, 14, 14, - 14, 14, 36, 14, 14, 14, 14, 14, 14, 14, 14, 36, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 36, 39, 14, 14, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 19, - 0, 14, 36, 36, 107, 58, 76, 108, 14, 14, 14, 14, 36, 36, 36, 39, - 41, 36, 36, 36, 36, 36, 36, 42, 14, 14, 14, 38, 14, 14, 14, 38, - 85, 85, 85, 85, 85, 85, 85, 58, 58, 58, 58, 27, 109, 14, 85, 14, - 85, 69, 69, 69, 69, 58, 58, 56, 58, 27, 76, 14, 14, 110, 58, 76, - 58, 109, 41, 36, 36, 36, 36, 36, 98, 98, 98, 98, 98, 98, 98, 98, - 98, 98, 98, 98, 98, 111, 98, 98, 98, 98, 36, 36, 36, 36, 36, 36, - 98, 98, 98, 36, 36, 36, 36, 36, 98, 98, 98, 98, 98, 98, 36, 36, - 18, 112, 113, 98, 69, 69, 69, 69, 69, 98, 69, 69, 69, 69, 114, 115, - 98, 98, 98, 98, 98, 0, 0, 0, 98, 98, 116, 98, 98, 113, 117, 98, - 118, 119, 119, 119, 119, 98, 98, 98, 98, 119, 98, 98, 98, 98, 98, 98, - 98, 119, 119, 119, 98, 98, 98, 120, 98, 98, 119, 121, 42, 122, 91, 117, - 123, 119, 119, 119, 119, 98, 98, 98, 98, 98, 119, 120, 98, 113, 124, 117, - 36, 36, 111, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 36, - 111, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 125, - 98, 98, 98, 98, 98, 125, 36, 36, 126, 126, 126, 126, 126, 126, 126, 126, - 98, 98, 98, 98, 28, 28, 28, 28, 98, 98, 113, 98, 98, 98, 98, 98, - 98, 98, 98, 98, 98, 98, 125, 36, 98, 98, 98, 125, 36, 36, 36, 36, - 14, 14, 14, 14, 14, 14, 27, 108, 12, 12, 12, 12, 12, 14, 36, 36, - 0, 44, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 0, - 0, 27, 58, 58, 36, 36, 36, 36, 36, 36, 36, 39, 14, 14, 14, 14, - 14, 44, 14, 44, 14, 19, 14, 14, 14, 19, 0, 0, 14, 14, 36, 36, - 14, 14, 14, 14, 127, 36, 36, 36, 14, 14, 64, 53, 36, 36, 36, 36, - 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 36, 36, 36, 36, 58, - 0, 14, 14, 14, 14, 14, 29, 36, 14, 14, 14, 0, 0, 0, 0, 58, - 14, 14, 14, 19, 0, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 39, - 73, 73, 73, 73, 73, 73, 128, 36, 14, 19, 0, 0, 0, 0, 0, 0, - 44, 14, 14, 27, 58, 14, 14, 39, 12, 12, 12, 12, 12, 36, 36, 14, - 12, 12, 12, 12, 12, 60, 60, 61, 14, 14, 14, 14, 19, 0, 0, 0, - 0, 0, 0, 52, 36, 36, 36, 36, 14, 19, 14, 14, 14, 14, 0, 36, - 12, 12, 12, 12, 12, 36, 27, 58, 60, 61, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 59, 60, 60, 58, 14, 19, 52, 36, 36, 36, 36, - 39, 14, 14, 38, 39, 14, 14, 38, 39, 14, 14, 38, 36, 36, 36, 36, - 14, 19, 0, 0, 0, 1, 0, 36, 129, 130, 130, 130, 130, 130, 130, 130, - 130, 130, 130, 130, 130, 130, 129, 130, 130, 130, 130, 130, 130, 130, 130, 130, - 130, 130, 130, 130, 129, 130, 130, 130, 130, 130, 129, 130, 130, 130, 130, 130, - 130, 130, 36, 36, 36, 36, 36, 36, 74, 74, 74, 131, 36, 132, 75, 75, - 75, 75, 75, 75, 75, 75, 36, 36, 133, 133, 133, 133, 133, 133, 133, 133, - 36, 39, 14, 14, 36, 36, 134, 135, 46, 46, 46, 46, 48, 46, 46, 46, - 46, 46, 46, 47, 46, 46, 47, 47, 46, 134, 47, 46, 46, 46, 46, 46, - 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 106, - 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 127, 36, - 136, 137, 57, 138, 139, 36, 36, 36, 98, 98, 140, 106, 106, 106, 106, 106, - 106, 106, 112, 140, 112, 98, 98, 98, 112, 77, 91, 53, 140, 106, 106, 112, - 98, 98, 98, 125, 141, 142, 36, 36, 14, 14, 14, 14, 14, 14, 38, 143, - 107, 98, 6, 98, 69, 98, 112, 112, 98, 98, 98, 98, 98, 91, 98, 144, - 98, 98, 98, 98, 98, 140, 145, 98, 98, 98, 98, 98, 98, 140, 145, 140, - 115, 69, 93, 119, 126, 126, 126, 126, 120, 98, 98, 98, 98, 98, 98, 98, - 98, 98, 98, 98, 98, 98, 98, 91, 36, 98, 98, 98, 36, 98, 98, 98, - 36, 98, 98, 98, 36, 98, 125, 36, 22, 98, 141, 146, 14, 14, 14, 38, - 36, 36, 36, 36, 42, 0, 147, 36, 14, 14, 14, 14, 14, 14, 39, 14, - 14, 14, 14, 14, 14, 38, 14, 39, 58, 41, 36, 39, 14, 14, 14, 14, - 14, 14, 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 36, - 14, 14, 14, 14, 14, 14, 19, 36, 14, 14, 36, 36, 36, 36, 36, 36, - 14, 14, 14, 0, 0, 52, 36, 36, 14, 14, 14, 14, 14, 14, 14, 80, - 14, 14, 36, 36, 14, 14, 14, 14, 76, 14, 14, 36, 36, 36, 36, 36, - 14, 14, 36, 36, 36, 36, 36, 39, 14, 14, 14, 36, 38, 14, 14, 14, - 14, 14, 14, 39, 38, 36, 38, 39, 14, 14, 14, 80, 14, 14, 14, 14, - 14, 38, 14, 36, 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 36, 80, - 14, 14, 14, 14, 14, 36, 36, 39, 14, 14, 14, 14, 36, 36, 14, 14, - 19, 0, 42, 52, 36, 36, 0, 0, 14, 14, 39, 14, 39, 14, 14, 14, - 14, 14, 36, 36, 0, 52, 36, 42, 58, 58, 58, 58, 38, 36, 36, 36, - 14, 14, 19, 52, 36, 39, 14, 14, 58, 58, 58, 148, 36, 36, 36, 36, - 14, 14, 14, 36, 80, 58, 58, 58, 14, 38, 36, 36, 14, 14, 14, 14, - 14, 36, 36, 36, 39, 14, 38, 36, 36, 36, 36, 36, 39, 14, 14, 14, - 14, 38, 36, 36, 36, 36, 36, 36, 14, 38, 36, 36, 36, 14, 14, 14, - 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 1, 76, 14, 14, 36, - 14, 14, 14, 12, 12, 12, 12, 12, 36, 36, 36, 36, 36, 36, 36, 42, - 0, 0, 0, 0, 0, 44, 14, 58, 58, 36, 36, 36, 36, 36, 36, 36, - 0, 0, 52, 12, 12, 12, 12, 12, 58, 58, 36, 36, 36, 36, 36, 36, - 14, 19, 32, 38, 36, 36, 36, 36, 44, 14, 27, 76, 76, 0, 44, 36, - 12, 12, 12, 12, 12, 32, 27, 58, 14, 14, 38, 36, 36, 36, 36, 36, - 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 58, 27, 76, 52, - 14, 14, 14, 38, 38, 14, 14, 39, 14, 14, 14, 14, 27, 36, 36, 36, - 0, 0, 0, 0, 0, 52, 36, 36, 0, 0, 39, 14, 14, 14, 38, 39, - 38, 36, 36, 42, 36, 36, 39, 14, 14, 0, 36, 0, 0, 0, 52, 36, - 0, 0, 52, 36, 36, 36, 36, 36, 14, 14, 19, 0, 0, 0, 0, 0, - 0, 0, 0, 44, 14, 27, 58, 76, 12, 12, 12, 12, 12, 80, 39, 36, - 0, 0, 14, 14, 36, 36, 36, 36, 0, 0, 0, 36, 0, 0, 0, 0, - 149, 58, 53, 14, 27, 58, 58, 58, 58, 58, 58, 58, 14, 14, 0, 36, - 1, 76, 38, 36, 36, 36, 36, 36, 64, 64, 64, 64, 64, 64, 150, 36, - 0, 0, 0, 0, 36, 36, 36, 36, 60, 60, 60, 60, 60, 36, 59, 60, - 12, 12, 12, 12, 12, 60, 58, 151, 14, 38, 36, 36, 36, 36, 36, 39, - 0, 0, 0, 52, 0, 0, 0, 0, 27, 58, 58, 36, 36, 36, 36, 36, - 152, 14, 14, 14, 14, 14, 14, 14, 36, 0, 0, 0, 0, 0, 0, 0, - 58, 58, 41, 36, 36, 36, 36, 36, 14, 14, 14, 14, 153, 69, 115, 14, - 14, 99, 14, 69, 69, 14, 14, 14, 14, 14, 14, 14, 16, 115, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 69, 12, 12, 12, 12, 12, 36, 36, 58, - 0, 0, 1, 36, 36, 36, 36, 36, 0, 0, 0, 1, 58, 14, 14, 14, - 14, 14, 76, 36, 36, 36, 36, 36, 12, 12, 12, 12, 12, 39, 14, 14, - 14, 14, 14, 14, 36, 36, 39, 14, 19, 0, 0, 0, 0, 0, 0, 0, - 154, 36, 36, 36, 36, 36, 36, 36, 98, 125, 36, 36, 36, 36, 36, 36, - 98, 36, 36, 36, 36, 36, 36, 36, 14, 14, 14, 14, 14, 36, 19, 1, - 0, 0, 36, 36, 36, 36, 36, 36, 14, 14, 19, 0, 0, 14, 19, 0, - 0, 44, 19, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 14, - 14, 0, 44, 36, 36, 36, 36, 36, 36, 38, 39, 38, 39, 14, 38, 14, - 14, 14, 14, 14, 14, 39, 39, 14, 14, 14, 39, 14, 14, 14, 14, 14, - 14, 14, 14, 39, 14, 38, 39, 14, 14, 14, 38, 14, 14, 14, 38, 14, - 14, 14, 14, 14, 14, 39, 14, 38, 14, 14, 38, 38, 36, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 0, 0, 0, 44, 14, 19, 0, 0, 0, 0, 0, 0, 0, 0, 44, 14, - 14, 14, 19, 14, 14, 14, 14, 14, 14, 14, 44, 27, 58, 76, 36, 36, - 36, 36, 36, 36, 36, 42, 0, 0, 0, 0, 0, 0, 52, 42, 0, 0, - 0, 42, 52, 0, 0, 52, 36, 36, 14, 14, 38, 39, 14, 14, 14, 14, - 14, 14, 0, 0, 0, 52, 36, 36, 12, 12, 12, 12, 12, 36, 36, 153, - 39, 38, 38, 39, 39, 14, 14, 14, 14, 38, 14, 14, 39, 39, 36, 36, - 36, 38, 36, 39, 39, 39, 39, 14, 39, 38, 38, 39, 39, 39, 39, 39, - 39, 38, 38, 39, 14, 38, 14, 14, 14, 38, 14, 14, 39, 14, 38, 38, - 14, 14, 14, 14, 14, 39, 14, 14, 39, 14, 39, 14, 14, 39, 14, 14, - 28, 28, 28, 28, 28, 28, 104, 98, 28, 28, 28, 28, 28, 28, 28, 102, - 28, 28, 28, 28, 28, 14, 98, 98, 98, 98, 98, 155, 155, 155, 155, 155, - 155, 155, 155, 155, 155, 155, 155, 155, 98, 98, 101, 98, 98, 98, 98, 98, - 98, 98, 98, 98, 98, 98, 14, 98, 98, 98, 100, 102, 98, 98, 102, 98, - 98, 101, 156, 98, 98, 105, 98, 98, 98, 98, 98, 98, 98, 157, 158, 158, - 98, 105, 98, 105, 105, 105, 105, 105, 156, 98, 98, 98, 98, 98, 98, 98, - 98, 98, 98, 105, 105, 98, 98, 156, 105, 105, 105, 105, 156, 98, 156, 98, - 101, 105, 101, 105, 98, 98, 98, 98, 102, 102, 102, 98, 98, 156, 98, 100, - 100, 102, 98, 98, 98, 98, 98, 98, 14, 14, 14, 102, 98, 98, 98, 98, - 98, 98, 98, 100, 14, 14, 14, 14, 14, 14, 102, 98, 98, 98, 98, 98, - 98, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 98, 98, 98, - 98, 98, 101, 98, 98, 156, 98, 98, 156, 98, 101, 156, 98, 98, 98, 98, - 98, 98, 14, 14, 14, 14, 98, 98, 98, 98, 14, 14, 14, 98, 98, 98, - 98, 98, 101, 105, 98, 101, 105, 105, 14, 14, 14, 85, 159, 91, 14, 14, - 98, 101, 98, 98, 98, 98, 98, 98, 98, 98, 105, 156, 98, 98, 98, 98, - 14, 14, 98, 98, 98, 98, 98, 98, 14, 14, 14, 14, 14, 14, 98, 98, - 14, 14, 14, 14, 98, 98, 98, 98, 14, 14, 14, 14, 14, 14, 14, 98, - 98, 98, 98, 98, 105, 105, 105, 156, 98, 98, 98, 156, 98, 98, 98, 98, - 156, 101, 105, 105, 105, 98, 105, 156, 42, 36, 36, 36, 36, 36, 36, 36, -}; - -static RE_UINT8 re_line_break_stage_5[] = { - 16, 16, 16, 18, 22, 20, 20, 21, 19, 6, 3, 12, 9, 10, 12, 3, - 1, 36, 12, 9, 8, 15, 8, 7, 11, 11, 8, 8, 12, 12, 12, 6, - 12, 1, 9, 36, 18, 2, 12, 16, 16, 29, 4, 1, 10, 9, 9, 9, - 12, 25, 25, 12, 25, 3, 12, 18, 25, 25, 17, 12, 25, 1, 17, 25, - 12, 17, 16, 4, 4, 4, 4, 16, 0, 0, 8, 12, 12, 0, 0, 12, - 0, 8, 18, 0, 0, 16, 18, 16, 16, 12, 6, 16, 37, 37, 37, 0, - 37, 12, 12, 10, 10, 10, 16, 6, 16, 0, 6, 6, 10, 11, 11, 12, - 6, 12, 8, 6, 18, 18, 0, 24, 24, 24, 24, 0, 0, 9, 24, 12, - 17, 17, 4, 17, 17, 18, 4, 6, 4, 12, 1, 2, 18, 17, 12, 4, - 4, 0, 31, 31, 32, 32, 33, 33, 18, 12, 2, 0, 5, 24, 18, 9, - 0, 18, 18, 4, 18, 28, 16, 42, 26, 25, 3, 3, 1, 3, 14, 14, - 14, 18, 20, 20, 3, 25, 5, 5, 8, 1, 2, 5, 30, 12, 2, 25, - 9, 12, 12, 14, 13, 13, 2, 12, 13, 12, 13, 40, 12, 13, 13, 25, - 25, 13, 40, 40, 2, 1, 0, 6, 6, 18, 1, 18, 26, 26, 0, 13, - 2, 13, 13, 5, 5, 1, 2, 2, 13, 16, 5, 13, 0, 38, 13, 38, - 38, 13, 38, 0, 16, 5, 5, 38, 38, 5, 13, 0, 38, 38, 10, 12, - 31, 0, 34, 35, 35, 35, 32, 0, 0, 33, 27, 27, 0, 37, 16, 37, - 8, 2, 2, 8, 6, 1, 2, 14, 13, 1, 13, 9, 10, 13, 0, 30, - 13, 6, 13, 2, 9, 0, 23, 25, 14, 0, 16, 17, 17, 0, 18, 24, - 17, 6, 1, 1, 5, 0, 39, 39, 40, 13, 13, 41, 41, 41, 3, 5, -}; - -/* Line_Break: 8960 bytes. */ - -RE_UINT32 re_get_line_break(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_line_break_stage_1[f] << 5; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_line_break_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_line_break_stage_3[pos + f] << 3; - f = code >> 1; - code ^= f << 1; - pos = (RE_UINT32)re_line_break_stage_4[pos + f] << 1; - value = re_line_break_stage_5[pos + code]; - - return value; -} - -/* Numeric_Type. */ - -static RE_UINT8 re_numeric_type_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 12, - 13, 14, 15, 11, 11, 11, 16, 11, 11, 11, 11, 11, 11, 17, 18, 19, - 20, 11, 21, 22, 11, 11, 23, 11, 11, 11, 11, 11, 11, 11, 11, 24, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, -}; - -static RE_UINT8 re_numeric_type_stage_2[] = { - 0, 1, 1, 1, 1, 1, 2, 3, 1, 4, 5, 6, 7, 8, 9, 10, - 11, 1, 1, 12, 1, 1, 13, 14, 15, 16, 17, 18, 19, 1, 1, 1, - 20, 21, 1, 1, 22, 1, 1, 23, 1, 1, 1, 1, 24, 1, 1, 1, - 25, 26, 27, 1, 28, 1, 1, 1, 29, 1, 1, 30, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 31, 32, - 1, 33, 1, 34, 1, 1, 35, 1, 36, 1, 1, 1, 1, 1, 37, 38, - 1, 1, 39, 40, 1, 1, 1, 41, 1, 1, 1, 1, 1, 1, 1, 42, - 1, 1, 1, 43, 1, 1, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 45, 1, 1, 1, 46, 1, 1, 1, 1, 1, 1, 1, 47, 48, 1, 1, - 1, 1, 1, 1, 1, 1, 49, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 50, 1, 51, 52, 53, 54, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 1, 1, 1, 1, 1, 15, - 1, 56, 57, 58, 59, 1, 1, 1, 60, 61, 62, 63, 64, 1, 65, 1, - 66, 67, 54, 1, 9, 1, 68, 69, 70, 1, 1, 1, 71, 1, 1, 1, - 1, 1, 1, 1, 72, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 73, 74, 1, 1, 1, 1, - 1, 1, 1, 75, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 77, 53, 1, 1, 1, 1, 1, 1, - 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 79, 80, 1, 1, 1, 1, 1, 1, 1, 81, 82, 83, 1, 1, 1, 1, - 1, 1, 1, 84, 1, 1, 1, 1, 1, 85, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 86, 1, 1, 1, 1, - 1, 1, 87, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 84, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_numeric_type_stage_3[] = { - 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, - 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 4, - 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 4, - 0, 0, 9, 10, 0, 0, 0, 4, 0, 0, 1, 0, 0, 0, 1, 0, - 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, - 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, - 0, 0, 0, 0, 0, 0, 0, 14, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 0, 0, 16, 0, 0, 0, - 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, - 0, 0, 0, 17, 18, 0, 0, 0, 0, 0, 19, 20, 21, 0, 0, 0, - 0, 0, 0, 22, 23, 0, 0, 24, 0, 0, 0, 25, 26, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 27, 28, 29, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 30, 0, 0, 0, 0, 31, 32, 0, 31, 33, 0, 0, - 34, 0, 0, 0, 35, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, - 0, 0, 37, 0, 0, 0, 0, 0, 38, 0, 27, 0, 39, 40, 41, 42, - 37, 0, 0, 43, 0, 0, 0, 0, 44, 0, 45, 46, 0, 0, 0, 0, - 0, 0, 47, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 49, 0, - 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 51, 0, 0, 0, 52, - 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, - 0, 0, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, - 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, - 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 0, - 0, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 0, 0, - 0, 43, 0, 0, 0, 0, 0, 0, 0, 59, 60, 61, 0, 0, 0, 57, - 0, 3, 0, 0, 0, 0, 0, 62, 0, 63, 0, 0, 0, 0, 1, 0, - 3, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 64, 0, 56, 65, 27, - 66, 67, 20, 68, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 70, - 0, 71, 72, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, 3, 0, 0, - 0, 0, 74, 75, 0, 76, 0, 77, 78, 0, 0, 0, 0, 79, 80, 20, - 0, 0, 81, 82, 83, 0, 0, 84, 0, 0, 74, 74, 0, 85, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 87, 0, 0, 0, 0, - 0, 0, 88, 89, 0, 0, 0, 1, 0, 90, 0, 0, 0, 0, 1, 91, - 0, 0, 1, 0, 0, 0, 3, 0, 0, 92, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 93, 0, 0, 94, 95, 0, 0, 0, 0, - 20, 20, 20, 96, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, - 0, 0, 97, 98, 0, 0, 0, 0, 0, 0, 0, 99, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 100, 101, 0, 0, 0, 0, 0, 0, 76, 0, - 102, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, 44, 0, 0, 0, 103, - 0, 59, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 104, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 105, 106, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 61, 0, 0, 0, - 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 0, 0, 0, 0, -}; - -static RE_UINT8 re_numeric_type_stage_4[] = { - 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 1, 2, 0, 0, - 5, 1, 0, 0, 5, 1, 6, 7, 5, 1, 8, 0, 5, 1, 9, 0, - 5, 1, 0, 10, 0, 0, 0, 10, 5, 1, 11, 12, 1, 13, 14, 0, - 0, 15, 16, 17, 0, 18, 12, 0, 1, 2, 11, 7, 0, 0, 1, 19, - 1, 2, 1, 2, 0, 0, 20, 21, 22, 21, 0, 0, 0, 0, 11, 11, - 11, 11, 11, 11, 23, 7, 0, 0, 22, 24, 25, 26, 11, 22, 24, 14, - 0, 27, 28, 29, 0, 0, 30, 31, 22, 32, 33, 0, 0, 0, 0, 34, - 35, 0, 0, 0, 36, 7, 0, 9, 0, 0, 37, 0, 11, 7, 0, 0, - 0, 11, 36, 11, 0, 0, 36, 11, 34, 0, 0, 0, 38, 0, 0, 0, - 0, 39, 0, 0, 0, 34, 0, 0, 40, 41, 0, 0, 0, 42, 43, 0, - 0, 0, 0, 35, 12, 0, 0, 35, 0, 12, 0, 0, 0, 0, 12, 0, - 42, 0, 0, 0, 44, 0, 0, 0, 0, 45, 0, 0, 46, 42, 0, 0, - 47, 0, 0, 0, 0, 0, 0, 38, 0, 0, 41, 41, 0, 0, 0, 39, - 0, 0, 0, 18, 0, 48, 12, 0, 0, 0, 0, 44, 0, 42, 0, 0, - 0, 0, 39, 0, 0, 0, 44, 0, 0, 44, 38, 0, 41, 0, 0, 0, - 44, 42, 0, 0, 0, 0, 0, 12, 18, 11, 0, 0, 0, 0, 49, 0, - 0, 38, 38, 12, 0, 0, 50, 0, 35, 11, 11, 11, 11, 11, 14, 0, - 11, 11, 11, 12, 0, 51, 0, 0, 36, 11, 11, 14, 14, 0, 0, 0, - 41, 39, 0, 0, 0, 0, 52, 0, 0, 0, 0, 11, 0, 0, 0, 36, - 35, 11, 0, 0, 0, 0, 0, 53, 0, 0, 18, 14, 0, 0, 0, 54, - 11, 11, 8, 11, 55, 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, 57, - 0, 53, 0, 0, 0, 36, 0, 0, 0, 0, 0, 8, 22, 24, 11, 10, - 0, 0, 58, 59, 60, 1, 0, 0, 0, 0, 5, 1, 36, 11, 17, 0, - 0, 0, 1, 61, 1, 13, 9, 0, 0, 0, 1, 13, 11, 17, 0, 0, - 11, 10, 0, 0, 0, 0, 1, 62, 7, 0, 0, 0, 11, 11, 7, 0, - 0, 5, 1, 1, 1, 1, 1, 1, 22, 63, 0, 0, 39, 0, 0, 0, - 38, 42, 0, 42, 0, 39, 0, 34, 0, 0, 0, 41, -}; - -static RE_UINT8 re_numeric_type_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, - 0, 2, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 3, 3, - 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 0, 0, 0, 0, 0, 0, 0, 3, 3, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 2, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 1, 1, 1, - 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 2, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, - 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, - 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, - 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, - 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, - 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 0, 0, 0, 0, - 3, 3, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 0, 0, 0, -}; - -/* Numeric_Type: 2316 bytes. */ - -RE_UINT32 re_get_numeric_type(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_numeric_type_stage_1[f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_numeric_type_stage_2[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_numeric_type_stage_3[pos + f] << 2; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_numeric_type_stage_4[pos + f] << 3; - value = re_numeric_type_stage_5[pos + code]; - - return value; -} - -/* Numeric_Value. */ - -static RE_UINT8 re_numeric_value_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 12, - 13, 14, 15, 11, 11, 11, 16, 11, 11, 11, 11, 11, 11, 17, 18, 19, - 20, 11, 21, 22, 11, 11, 23, 11, 11, 11, 11, 11, 11, 11, 11, 24, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, -}; - -static RE_UINT8 re_numeric_value_stage_2[] = { - 0, 1, 1, 1, 1, 1, 2, 3, 1, 4, 5, 6, 7, 8, 9, 10, - 11, 1, 1, 12, 1, 1, 13, 14, 15, 16, 17, 18, 19, 1, 1, 1, - 20, 21, 1, 1, 22, 1, 1, 23, 1, 1, 1, 1, 24, 1, 1, 1, - 25, 26, 27, 1, 28, 1, 1, 1, 29, 1, 1, 30, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 31, 32, - 1, 33, 1, 34, 1, 1, 35, 1, 36, 1, 1, 1, 1, 1, 37, 38, - 1, 1, 39, 40, 1, 1, 1, 41, 1, 1, 1, 1, 1, 1, 1, 42, - 1, 1, 1, 43, 1, 1, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 45, 1, 1, 1, 46, 1, 1, 1, 1, 1, 1, 1, 47, 48, 1, 1, - 1, 1, 1, 1, 1, 1, 49, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 50, 1, 51, 52, 53, 54, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 1, 1, 1, 1, 1, 15, - 1, 56, 57, 58, 59, 1, 1, 1, 60, 61, 62, 63, 64, 1, 65, 1, - 66, 67, 54, 1, 9, 1, 68, 69, 70, 1, 1, 1, 71, 1, 1, 1, - 1, 1, 1, 1, 72, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 73, 74, 1, 1, 1, 1, - 1, 1, 1, 75, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 77, 53, 1, 1, 1, 1, 1, 1, - 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 79, 80, 1, 1, 1, 1, 1, 1, 1, 81, 82, 83, 1, 1, 1, 1, - 1, 1, 1, 84, 1, 1, 1, 1, 1, 85, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 86, 1, 1, 1, 1, - 1, 1, 87, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 88, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_numeric_value_stage_3[] = { - 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, - 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 4, - 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 4, - 0, 0, 9, 10, 0, 0, 0, 4, 0, 0, 1, 0, 0, 0, 1, 0, - 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, - 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, - 0, 0, 0, 0, 0, 0, 0, 14, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 0, 0, 14, 0, 0, 0, - 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, - 0, 0, 0, 16, 3, 0, 0, 0, 0, 0, 17, 18, 19, 0, 0, 0, - 0, 0, 0, 20, 21, 0, 0, 22, 0, 0, 0, 23, 24, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 25, 26, 27, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 28, 0, 0, 0, 0, 29, 30, 0, 29, 31, 0, 0, - 32, 0, 0, 0, 33, 0, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, - 0, 0, 35, 0, 0, 0, 0, 0, 36, 0, 37, 0, 38, 39, 40, 41, - 42, 0, 0, 43, 0, 0, 0, 0, 44, 0, 45, 46, 0, 0, 0, 0, - 0, 0, 47, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 49, 0, - 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 51, 0, 0, 0, 52, - 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, - 0, 0, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, - 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 0, 0, 0, - 0, 0, 0, 59, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, - 0, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, - 0, 63, 0, 0, 0, 0, 0, 0, 0, 64, 65, 66, 0, 0, 0, 67, - 0, 3, 0, 0, 0, 0, 0, 68, 0, 69, 0, 0, 0, 0, 1, 0, - 3, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 70, 0, 71, 72, 73, - 74, 75, 76, 77, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79, - 0, 80, 81, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 3, 0, 0, - 0, 0, 83, 84, 0, 85, 0, 86, 87, 0, 0, 0, 0, 88, 89, 90, - 0, 0, 91, 92, 93, 0, 0, 94, 0, 0, 95, 95, 0, 96, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 0, - 0, 0, 99, 100, 0, 0, 0, 1, 0, 101, 0, 0, 0, 0, 1, 102, - 0, 0, 1, 0, 0, 0, 3, 0, 0, 103, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 104, 0, 0, 105, 106, 0, 0, 0, 0, - 107, 108, 109, 110, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, - 0, 0, 111, 112, 0, 0, 0, 0, 0, 0, 0, 113, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 114, 115, 0, 0, 0, 0, 0, 0, 116, 0, - 117, 0, 0, 0, 0, 0, 0, 0, 118, 0, 0, 119, 0, 0, 0, 120, - 0, 121, 0, 0, 0, 0, 0, 0, 0, 122, 0, 0, 123, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 124, 125, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 126, 0, 0, 0, - 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, - 0, 0, 0, 0, 129, 0, 0, 0, -}; - -static RE_UINT8 re_numeric_value_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, - 0, 0, 0, 0, 4, 0, 5, 6, 1, 2, 3, 0, 0, 0, 0, 0, - 0, 7, 8, 9, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 0, - 0, 7, 8, 9, 12, 13, 0, 0, 0, 7, 8, 9, 14, 0, 0, 0, - 0, 7, 8, 9, 0, 0, 1, 15, 0, 0, 0, 0, 0, 0, 16, 17, - 0, 7, 8, 9, 18, 19, 20, 0, 1, 2, 21, 22, 23, 0, 0, 0, - 0, 0, 24, 2, 25, 26, 27, 28, 0, 0, 0, 29, 30, 0, 0, 0, - 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 1, 2, 31, 0, - 0, 0, 0, 0, 32, 2, 3, 0, 0, 0, 0, 0, 33, 34, 35, 36, - 37, 38, 39, 40, 37, 38, 39, 40, 41, 42, 43, 0, 0, 0, 0, 0, - 37, 38, 39, 44, 45, 37, 38, 39, 44, 45, 37, 38, 39, 44, 45, 0, - 0, 0, 46, 47, 48, 49, 2, 50, 0, 0, 0, 0, 0, 51, 52, 53, - 37, 38, 54, 52, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, - 0, 56, 0, 0, 0, 0, 0, 0, 24, 2, 3, 0, 0, 0, 57, 0, - 0, 0, 0, 0, 51, 58, 0, 0, 37, 38, 59, 0, 0, 0, 0, 0, - 0, 0, 60, 61, 62, 63, 64, 65, 0, 0, 0, 0, 66, 67, 68, 69, - 0, 70, 0, 0, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 72, 0, 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, 0, 0, - 74, 75, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, - 0, 0, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, - 81, 0, 0, 0, 0, 0, 0, 82, 0, 0, 83, 0, 0, 0, 0, 0, - 0, 0, 0, 70, 0, 0, 0, 0, 0, 0, 0, 0, 84, 0, 0, 0, - 0, 85, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, - 0, 0, 87, 88, 0, 0, 0, 0, 89, 90, 0, 91, 0, 0, 0, 0, - 92, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93, 0, - 0, 0, 0, 0, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 94, 0, - 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, 0, 15, 78, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 97, 0, 0, 0, 0, - 0, 0, 0, 0, 98, 0, 0, 0, 0, 98, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, - 0, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, - 0, 0, 0, 102, 71, 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, - 103, 0, 0, 0, 0, 0, 0, 0, 0, 104, 0, 84, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 105, 0, 0, 0, 0, 0, 0, 106, 0, 0, - 0, 51, 52, 107, 0, 0, 0, 0, 0, 0, 0, 0, 108, 109, 0, 0, - 0, 0, 110, 0, 111, 0, 78, 0, 0, 0, 0, 0, 106, 0, 0, 0, - 0, 0, 0, 0, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, - 0, 114, 8, 9, 60, 61, 115, 116, 117, 118, 119, 120, 121, 0, 0, 0, - 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 125, 134, 135, 0, - 0, 0, 136, 0, 0, 0, 0, 0, 24, 2, 25, 26, 27, 137, 138, 0, - 139, 0, 0, 0, 0, 0, 0, 0, 140, 0, 141, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 142, 143, 0, 0, 0, 0, 0, 0, 0, 0, 144, 145, - 0, 0, 0, 0, 0, 0, 24, 146, 0, 114, 147, 148, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 114, 148, 0, 0, 0, 0, 0, 149, 150, 0, - 0, 0, 0, 0, 0, 0, 0, 151, 37, 38, 152, 153, 154, 155, 156, 157, - 158, 159, 160, 161, 162, 163, 164, 165, 37, 166, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 167, 0, 0, 0, 0, 0, 0, 0, 168, - 0, 0, 114, 148, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 166, - 0, 0, 24, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 170, 171, - 37, 38, 152, 153, 172, 155, 173, 174, 0, 0, 0, 0, 51, 52, 53, 175, - 176, 177, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, - 24, 2, 25, 26, 27, 178, 0, 0, 0, 0, 0, 0, 1, 2, 25, 0, - 1, 2, 25, 26, 179, 0, 0, 0, 0, 0, 0, 0, 1, 2, 180, 52, - 53, 175, 176, 84, 0, 0, 0, 0, 8, 9, 52, 181, 38, 182, 2, 180, - 183, 184, 9, 185, 186, 185, 187, 188, 189, 190, 191, 192, 147, 193, 194, 195, - 196, 197, 198, 199, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 200, 201, - 202, 0, 0, 0, 0, 0, 0, 0, 37, 38, 152, 153, 203, 0, 0, 0, - 0, 0, 0, 7, 8, 9, 1, 2, 204, 8, 9, 1, 2, 204, 8, 9, - 0, 114, 8, 9, 0, 0, 0, 0, 205, 52, 107, 32, 0, 0, 0, 0, - 73, 0, 0, 0, 0, 0, 0, 0, 0, 206, 0, 0, 0, 0, 0, 0, - 101, 0, 0, 0, 0, 0, 0, 0, 70, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 94, 0, 0, 0, 0, 0, 207, 0, 0, 91, 0, 0, 0, 91, - 0, 0, 104, 0, 0, 0, 0, 76, 0, 0, 0, 0, 0, 0, 76, 0, - 0, 0, 0, 0, 0, 0, 83, 0, 0, 0, 0, 0, 0, 0, 110, 0, - 0, 0, 0, 208, 0, 0, 0, 0, 0, 0, 0, 0, 209, 0, 0, 0, -}; - -static RE_UINT8 re_numeric_value_stage_5[] = { - 0, 0, 0, 0, 2, 32, 34, 36, 38, 40, 42, 44, 46, 48, 0, 0, - 0, 0, 34, 36, 0, 32, 0, 0, 17, 22, 27, 0, 0, 0, 2, 32, - 34, 36, 38, 40, 42, 44, 46, 48, 7, 11, 15, 17, 27, 55, 0, 0, - 0, 0, 17, 22, 27, 7, 11, 15, 49, 94, 103, 0, 32, 34, 36, 0, - 3, 4, 5, 6, 9, 13, 16, 0, 49, 94, 103, 17, 22, 27, 7, 11, - 15, 0, 0, 0, 46, 48, 22, 33, 35, 37, 39, 41, 43, 45, 47, 1, - 0, 32, 34, 36, 46, 48, 49, 59, 69, 79, 89, 90, 91, 92, 93, 94, - 112, 0, 0, 0, 0, 0, 56, 57, 58, 0, 0, 0, 46, 48, 32, 0, - 2, 0, 0, 0, 12, 10, 9, 18, 26, 16, 20, 24, 28, 14, 29, 11, - 19, 25, 30, 32, 32, 34, 36, 38, 40, 42, 44, 46, 48, 49, 50, 51, - 89, 94, 98, 103, 103, 107, 112, 0, 0, 42, 89, 116, 121, 2, 0, 0, - 52, 53, 54, 55, 56, 57, 58, 59, 0, 0, 2, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 32, 34, 36, 46, 48, 49, 2, 0, 0, 32, 34, - 36, 38, 40, 42, 44, 46, 48, 49, 48, 49, 32, 34, 0, 22, 0, 0, - 0, 0, 0, 2, 49, 59, 69, 0, 36, 38, 0, 0, 48, 49, 0, 0, - 49, 59, 69, 79, 89, 90, 91, 92, 0, 60, 61, 62, 63, 64, 65, 66, - 67, 68, 69, 70, 71, 72, 73, 74, 0, 75, 76, 77, 78, 79, 80, 81, - 82, 83, 84, 85, 86, 87, 88, 89, 0, 40, 0, 0, 0, 0, 0, 34, - 0, 0, 40, 0, 0, 44, 0, 0, 32, 0, 0, 44, 0, 0, 0, 112, - 0, 36, 0, 0, 0, 48, 0, 0, 34, 0, 0, 0, 40, 0, 38, 0, - 0, 0, 0, 133, 49, 0, 0, 0, 0, 0, 0, 103, 36, 0, 0, 0, - 94, 0, 0, 0, 133, 0, 0, 0, 0, 0, 135, 0, 0, 34, 0, 46, - 0, 42, 0, 0, 0, 49, 0, 103, 59, 69, 0, 0, 79, 0, 0, 0, - 0, 36, 36, 36, 0, 0, 0, 38, 0, 0, 32, 0, 0, 0, 48, 59, - 0, 0, 49, 0, 46, 0, 0, 0, 0, 0, 44, 0, 0, 0, 48, 0, - 0, 0, 94, 0, 0, 0, 38, 0, 0, 0, 34, 0, 0, 103, 0, 0, - 0, 0, 42, 0, 42, 0, 0, 0, 0, 0, 2, 0, 44, 46, 48, 2, - 17, 22, 27, 7, 11, 15, 0, 0, 0, 0, 0, 36, 0, 0, 0, 49, - 0, 42, 0, 42, 0, 49, 0, 0, 0, 0, 0, 32, 93, 94, 95, 96, - 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, - 113, 114, 115, 116, 117, 118, 119, 120, 17, 22, 32, 40, 89, 98, 107, 116, - 40, 49, 89, 94, 98, 103, 107, 40, 49, 89, 94, 98, 103, 112, 116, 49, - 32, 32, 32, 34, 34, 34, 34, 40, 49, 49, 49, 49, 49, 69, 89, 89, - 89, 89, 94, 96, 98, 98, 98, 98, 89, 22, 22, 26, 27, 0, 0, 0, - 0, 0, 2, 17, 95, 96, 97, 98, 99, 100, 101, 102, 32, 40, 49, 89, - 0, 93, 0, 0, 0, 0, 102, 0, 0, 32, 34, 49, 59, 94, 0, 0, - 32, 34, 36, 49, 59, 94, 103, 112, 38, 40, 49, 59, 34, 36, 38, 38, - 40, 49, 59, 94, 0, 0, 32, 49, 59, 94, 34, 36, 31, 22, 0, 0, - 48, 49, 59, 69, 79, 89, 90, 91, 0, 0, 94, 95, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - 116, 117, 118, 119, 120, 121, 122, 124, 125, 127, 128, 129, 130, 131, 8, 14, - 17, 18, 21, 22, 23, 26, 27, 29, 49, 59, 94, 103, 0, 32, 89, 0, - 0, 32, 49, 59, 38, 49, 59, 94, 0, 0, 32, 40, 49, 89, 94, 103, - 92, 93, 94, 95, 100, 101, 102, 22, 17, 18, 26, 0, 59, 69, 79, 89, - 90, 91, 92, 93, 94, 103, 2, 32, 103, 0, 0, 0, 91, 92, 93, 0, - 46, 48, 32, 34, 44, 46, 48, 38, 48, 32, 34, 36, 36, 38, 40, 34, - 36, 36, 38, 40, 32, 34, 36, 36, 38, 40, 123, 126, 38, 40, 36, 36, - 38, 38, 38, 38, 42, 44, 44, 44, 46, 46, 48, 48, 48, 48, 34, 36, - 38, 40, 42, 32, 40, 40, 34, 36, 32, 34, 18, 26, 29, 18, 26, 11, - 17, 14, 17, 17, 22, 18, 26, 79, 89, 38, 40, 42, 44, 46, 48, 0, - 46, 48, 0, 49, 94, 112, 132, 133, 134, 135, 0, 0, 92, 93, 0, 0, - 46, 48, 2, 32, 2, 2, 32, 34, 38, 0, 0, 0, 0, 0, 0, 69, - 0, 38, 0, 0, 48, 0, 0, 0, -}; - -/* Numeric_Value: 3264 bytes. */ - -RE_UINT32 re_get_numeric_value(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_numeric_value_stage_1[f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_numeric_value_stage_2[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_numeric_value_stage_3[pos + f] << 3; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_numeric_value_stage_4[pos + f] << 2; - value = re_numeric_value_stage_5[pos + code]; - - return value; -} - -/* Bidi_Mirrored. */ - -static RE_UINT8 re_bidi_mirrored_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_bidi_mirrored_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_bidi_mirrored_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, - 4, 5, 1, 6, 7, 8, 1, 9, 10, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, - 1, 1, 1, 12, 1, 1, 1, 1, -}; - -static RE_UINT8 re_bidi_mirrored_stage_4[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, - 6, 7, 8, 3, 3, 9, 3, 3, 10, 11, 12, 13, 14, 3, 3, 3, - 3, 3, 3, 3, 3, 15, 3, 16, 3, 3, 3, 3, 3, 3, 17, 18, - 19, 20, 21, 22, 3, 3, 3, 3, 23, 3, 3, 3, 3, 3, 3, 3, - 24, 3, 3, 3, 3, 3, 3, 3, 3, 25, 3, 3, 26, 27, 3, 3, - 3, 3, 3, 28, 29, 30, 31, 32, -}; - -static RE_UINT8 re_bidi_mirrored_stage_5[] = { - 0, 0, 0, 0, 0, 3, 0, 80, 0, 0, 0, 40, 0, 0, 0, 40, - 0, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 24, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 6, 96, 0, 0, 0, 0, 0, 0, 96, - 0, 96, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 30, 63, 98, 188, 87, 248, 15, 250, 255, 31, 60, 128, 245, 207, 255, 255, - 255, 159, 7, 1, 204, 255, 255, 193, 0, 62, 195, 255, 255, 63, 255, 255, - 0, 15, 0, 0, 3, 6, 0, 0, 0, 0, 0, 0, 0, 255, 63, 0, - 121, 59, 120, 112, 252, 255, 0, 0, 248, 255, 255, 249, 255, 255, 0, 1, - 63, 194, 55, 31, 58, 3, 240, 51, 0, 252, 255, 223, 83, 122, 48, 112, - 0, 0, 128, 1, 48, 188, 25, 254, 255, 255, 255, 255, 207, 191, 255, 255, - 255, 255, 127, 80, 124, 112, 136, 47, 60, 54, 0, 48, 255, 3, 0, 0, - 0, 255, 243, 15, 0, 0, 0, 0, 0, 0, 0, 126, 48, 0, 0, 0, - 0, 3, 0, 80, 0, 0, 0, 40, 0, 0, 0, 168, 13, 0, 0, 0, - 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, - 0, 128, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 0, 0, 0, -}; - -/* Bidi_Mirrored: 489 bytes. */ - -RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_bidi_mirrored_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_bidi_mirrored_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_bidi_mirrored_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_bidi_mirrored_stage_4[pos + f] << 6; - pos += code; - value = (re_bidi_mirrored_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Indic_Positional_Category. */ - -static RE_UINT8 re_indic_positional_category_stage_1[] = { - 0, 1, 1, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_indic_positional_category_stage_2[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 0, 0, 0, 0, 0, 0, 9, 0, 10, 11, 12, 13, 14, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 15, 16, 17, 18, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, - 20, 21, 22, 23, 24, 25, 26, 27, 0, 0, 0, 0, 28, 0, 0, 0, -}; - -static RE_UINT8 re_indic_positional_category_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 2, 3, 4, 5, 0, 6, 0, 0, 7, 8, 9, 5, 0, - 10, 0, 0, 7, 11, 0, 0, 12, 10, 0, 0, 7, 13, 0, 5, 0, - 6, 0, 0, 14, 15, 16, 5, 0, 17, 0, 0, 18, 19, 9, 0, 0, - 20, 0, 0, 21, 22, 23, 5, 0, 6, 0, 0, 14, 24, 25, 5, 0, - 6, 0, 0, 18, 26, 9, 5, 0, 27, 0, 0, 0, 28, 29, 0, 27, - 0, 0, 0, 30, 31, 0, 0, 0, 0, 0, 0, 32, 33, 0, 0, 0, - 0, 34, 0, 35, 0, 0, 0, 36, 37, 38, 39, 40, 41, 0, 0, 0, - 0, 0, 42, 43, 0, 44, 45, 46, 47, 48, 0, 0, 0, 0, 0, 0, - 0, 49, 0, 49, 0, 50, 0, 50, 0, 0, 0, 51, 52, 53, 0, 0, - 0, 0, 54, 55, 0, 0, 0, 0, 0, 0, 0, 56, 57, 0, 0, 0, - 0, 58, 0, 0, 0, 59, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0, - 62, 0, 0, 63, 64, 0, 65, 66, 67, 0, 68, 0, 0, 0, 69, 70, - 0, 0, 71, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73, 74, 75, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, - 77, 0, 78, 0, 0, 0, 0, 0, 79, 0, 0, 80, 81, 0, 82, 83, - 0, 0, 84, 0, 85, 70, 0, 0, 1, 0, 0, 86, 87, 0, 88, 0, - 0, 0, 89, 90, 91, 0, 0, 92, 0, 0, 0, 93, 94, 0, 95, 96, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 0, - 98, 0, 0, 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 100, 0, 0, 101, 102, 0, 0, 0, 67, 0, 0, 103, 0, 0, 0, 0, - 104, 0, 105, 106, 0, 0, 0, 107, 67, 0, 0, 108, 109, 0, 0, 0, - 0, 0, 110, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 113, 0, - 6, 0, 0, 18, 114, 9, 115, 116, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 117, 118, 0, 0, 0, 0, 0, 0, 119, 120, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 122, 123, 124, 0, 0, - 0, 0, 0, 125, 126, 0, 0, 0, 0, 0, 127, 128, 0, 0, 0, 0, - 0, 129, 130, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 121, 131, 0, 0, 0, 0, 0, 132, 133, 134, 0, 0, 0, 0, -}; - -static RE_UINT8 re_indic_positional_category_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 1, 2, 8, 5, 9, - 10, 7, 1, 6, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, - 10, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, - 5, 6, 3, 11, 12, 13, 14, 0, 0, 0, 0, 15, 0, 0, 0, 0, - 10, 2, 0, 0, 0, 0, 0, 0, 5, 3, 0, 10, 16, 10, 17, 0, - 1, 0, 18, 0, 0, 0, 0, 0, 5, 6, 7, 10, 19, 15, 5, 0, - 0, 0, 0, 0, 0, 0, 3, 20, 5, 6, 3, 11, 21, 13, 22, 0, - 0, 0, 0, 19, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 8, 2, 23, 0, 24, 12, 25, 26, 0, - 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 2, 8, 23, 1, 27, 1, 1, 0, 0, 0, 10, 3, 0, 0, 0, 0, - 28, 8, 23, 19, 29, 30, 1, 0, 0, 0, 15, 23, 0, 0, 0, 0, - 8, 5, 3, 24, 12, 25, 26, 0, 0, 8, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 16, 0, 15, 8, 1, 3, 3, 4, 31, 32, 33, - 20, 8, 1, 1, 6, 3, 0, 0, 34, 34, 35, 10, 1, 1, 1, 16, - 20, 8, 1, 1, 6, 10, 3, 0, 34, 34, 36, 0, 1, 1, 1, 0, - 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 18, 18, 10, 0, 0, 4, - 18, 37, 6, 38, 38, 1, 1, 2, 37, 1, 3, 1, 0, 0, 18, 6, - 6, 6, 6, 6, 18, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 15, 20, 17, 39, 1, 1, 17, 23, 2, 18, 3, - 0, 0, 0, 8, 6, 0, 0, 6, 3, 8, 23, 15, 8, 8, 8, 0, - 10, 1, 16, 0, 0, 0, 0, 0, 0, 40, 41, 2, 8, 8, 5, 15, - 0, 0, 0, 0, 0, 8, 20, 0, 0, 17, 3, 0, 0, 0, 0, 0, - 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 1, 17, 6, 42, - 43, 24, 25, 2, 20, 1, 1, 1, 1, 10, 0, 0, 0, 0, 10, 0, - 1, 40, 44, 45, 2, 8, 0, 0, 8, 40, 8, 8, 5, 17, 0, 0, - 8, 8, 46, 34, 8, 35, 8, 8, 23, 0, 0, 0, 8, 0, 0, 0, - 0, 0, 0, 10, 39, 20, 0, 0, 0, 0, 11, 40, 1, 17, 6, 3, - 15, 2, 20, 1, 17, 7, 40, 24, 24, 41, 1, 1, 1, 1, 16, 18, - 1, 1, 23, 0, 0, 0, 0, 0, 0, 0, 2, 1, 6, 47, 48, 24, - 25, 19, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 7, 1, - 1, 1, 0, 0, 0, 0, 0, 0, 1, 23, 0, 0, 0, 0, 0, 0, - 15, 6, 17, 9, 1, 23, 6, 0, 0, 0, 0, 2, 1, 8, 20, 20, - 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 4, 49, 8, 7, 1, - 1, 1, 24, 17, 0, 0, 0, 0, 1, 16, 50, 6, 6, 1, 6, 6, - 2, 51, 51, 51, 52, 0, 18, 0, 0, 0, 16, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 16, 0, 10, 0, 0, - 0, 15, 5, 2, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 7, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 18, 6, 0, 0, 0, 0, 18, 6, 17, 6, 7, - 0, 10, 8, 1, 6, 24, 2, 8, 53, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 1, 17, 54, - 41, 40, 55, 3, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 2, 0, - 0, 0, 0, 0, 0, 15, 2, 0, 2, 1, 56, 57, 58, 46, 35, 1, - 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 7, 9, - 0, 0, 15, 0, 0, 0, 0, 0, 0, 15, 20, 8, 40, 23, 5, 0, - 59, 6, 10, 52, 0, 0, 6, 7, 0, 0, 0, 0, 17, 3, 0, 0, - 20, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 6, 6, - 6, 1, 1, 16, 0, 0, 0, 0, 4, 5, 7, 2, 5, 3, 0, 0, - 1, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 1, 6, 41, 38, - 17, 3, 16, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, - 0, 15, 9, 6, 6, 6, 1, 19, 23, 0, 0, 0, 0, 10, 3, 0, - 0, 0, 0, 0, 0, 0, 8, 5, 1, 30, 2, 1, 0, 0, 0, 16, - 0, 0, 0, 0, 0, 0, 0, 10, 4, 5, 7, 1, 17, 3, 0, 0, - 2, 8, 23, 11, 12, 13, 33, 0, 0, 8, 0, 1, 1, 1, 16, 0, - 1, 1, 16, 0, 0, 0, 0, 0, 0, 0, 15, 9, 6, 6, 6, 1, - 8, 7, 2, 3, 0, 0, 0, 0, 4, 5, 6, 6, 39, 60, 33, 26, - 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, - 9, 6, 6, 0, 49, 32, 1, 5, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 0, 8, 5, 6, 6, 7, 2, 20, 5, - 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 20, 9, - 6, 1, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 10, - 8, 1, 6, 41, 7, 1, 0, 0, 1, 6, 6, 3, 1, 1, 1, 5, - 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 15, 6, 6, 6, - 39, 7, 20, 16, 0, 0, 0, 0, -}; - -static RE_UINT8 re_indic_positional_category_stage_5[] = { - 0, 0, 5, 5, 5, 1, 6, 0, 1, 2, 1, 6, 6, 6, 6, 5, - 1, 1, 2, 1, 0, 5, 0, 2, 2, 0, 0, 4, 4, 6, 0, 1, - 5, 0, 5, 6, 0, 6, 5, 8, 1, 5, 9, 0, 10, 6, 1, 0, - 2, 2, 4, 4, 4, 5, 7, 0, 8, 1, 8, 0, 8, 8, 9, 2, - 4, 10, 4, 1, 3, 3, 3, 1, 3, 0, 5, 7, 7, 7, 6, 2, - 6, 1, 2, 5, 9, 10, 4, 2, 1, 8, 8, 5, 1, 3, 6, 11, - 7, 12, 2, 9, 13, 6, 13, 13, 13, 0, 11, 0, 5, 2, 2, 6, - 6, 3, 3, 5, 5, 3, 0, 13, 5, 9, -}; - -/* Indic_Positional_Category: 1930 bytes. */ - -RE_UINT32 re_get_indic_positional_category(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_indic_positional_category_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_indic_positional_category_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_indic_positional_category_stage_3[pos + f] << 3; - f = code >> 1; - code ^= f << 1; - pos = (RE_UINT32)re_indic_positional_category_stage_4[pos + f] << 1; - value = re_indic_positional_category_stage_5[pos + code]; - - return value; -} - -/* Indic_Syllabic_Category. */ - -static RE_UINT8 re_indic_syllabic_category_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_indic_syllabic_category_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 1, 1, 1, 1, 1, 1, 10, 1, 11, 12, 13, 14, 15, 1, 1, - 16, 1, 1, 1, 1, 17, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 18, 19, 20, 21, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 22, 1, 1, 1, 1, 1, - 23, 24, 25, 26, 27, 28, 29, 30, 1, 1, 1, 1, 31, 1, 1, 1, -}; - -static RE_UINT8 re_indic_syllabic_category_stage_3[] = { - 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 5, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 12, 20, - 21, 15, 16, 22, 23, 24, 25, 26, 27, 28, 16, 29, 30, 0, 12, 31, - 14, 15, 16, 29, 32, 33, 12, 34, 35, 36, 37, 38, 39, 40, 25, 0, - 41, 42, 16, 43, 44, 45, 12, 0, 46, 42, 16, 47, 44, 48, 12, 49, - 46, 42, 8, 50, 51, 52, 12, 53, 54, 55, 8, 56, 57, 58, 25, 59, - 60, 8, 61, 62, 63, 2, 0, 0, 64, 65, 66, 67, 68, 69, 0, 0, - 0, 0, 70, 71, 72, 8, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, - 8, 8, 80, 81, 82, 83, 84, 85, 86, 87, 0, 0, 0, 0, 0, 0, - 88, 89, 90, 89, 90, 91, 88, 92, 8, 8, 93, 94, 95, 96, 2, 0, - 97, 61, 98, 99, 25, 8, 100, 101, 8, 8, 102, 103, 104, 2, 0, 0, - 8, 105, 8, 8, 106, 107, 108, 109, 2, 2, 0, 0, 0, 0, 0, 0, - 110, 90, 8, 111, 112, 2, 0, 0, 113, 8, 114, 115, 8, 8, 116, 117, - 8, 8, 118, 119, 120, 0, 0, 0, 0, 0, 0, 0, 0, 121, 122, 123, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 124, - 125, 126, 0, 0, 0, 0, 0, 127, 128, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 129, 0, 0, 0, - 130, 8, 131, 0, 8, 132, 133, 134, 135, 136, 8, 137, 138, 2, 139, 122, - 140, 8, 141, 8, 142, 143, 0, 0, 144, 8, 8, 145, 146, 2, 147, 148, - 149, 8, 150, 151, 152, 2, 8, 153, 8, 8, 8, 154, 155, 0, 156, 157, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 158, 159, 160, 2, - 161, 162, 8, 163, 164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 165, 90, 8, 166, 167, 168, 169, 170, 171, 8, 8, 172, 0, 0, 0, 0, - 173, 8, 174, 175, 0, 176, 8, 177, 178, 179, 8, 180, 181, 2, 182, 183, - 184, 185, 186, 187, 0, 0, 0, 0, 188, 189, 190, 191, 8, 192, 193, 2, - 194, 15, 16, 29, 32, 40, 195, 196, 0, 0, 0, 0, 0, 0, 0, 0, - 197, 8, 8, 198, 199, 2, 0, 0, 200, 8, 8, 201, 202, 2, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 197, 8, 203, 204, 205, 206, 0, 0, - 197, 8, 8, 207, 208, 2, 0, 0, 191, 8, 209, 210, 2, 0, 0, 0, - 8, 211, 212, 213, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 214, 8, 203, 215, 216, 70, 217, 218, 8, 219, 76, 220, 0, 0, 0, 0, -}; - -static RE_UINT8 re_indic_syllabic_category_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 4, 0, 0, 0, - 5, 0, 0, 0, 0, 6, 0, 0, 7, 8, 8, 8, 8, 9, 10, 10, - 10, 10, 10, 10, 10, 10, 11, 12, 13, 13, 13, 14, 15, 16, 10, 10, - 17, 18, 2, 2, 19, 8, 10, 10, 20, 21, 8, 22, 22, 9, 10, 10, - 10, 10, 23, 10, 24, 25, 26, 12, 13, 27, 27, 28, 0, 29, 0, 30, - 26, 0, 0, 0, 20, 21, 31, 32, 23, 33, 26, 34, 35, 29, 27, 36, - 0, 0, 37, 24, 0, 18, 2, 2, 38, 39, 0, 0, 20, 21, 8, 40, - 40, 9, 10, 10, 23, 37, 26, 12, 13, 41, 41, 36, 0, 0, 42, 0, - 13, 27, 27, 36, 0, 43, 0, 30, 42, 0, 0, 0, 44, 21, 31, 19, - 45, 46, 33, 23, 47, 48, 49, 25, 10, 10, 26, 43, 35, 43, 50, 36, - 0, 29, 0, 0, 7, 21, 8, 45, 45, 9, 10, 10, 10, 10, 26, 51, - 13, 50, 50, 36, 0, 52, 49, 0, 20, 21, 8, 45, 10, 37, 26, 12, - 0, 52, 0, 53, 54, 0, 0, 0, 10, 10, 49, 51, 13, 50, 50, 55, - 0, 56, 0, 32, 0, 0, 57, 58, 59, 21, 8, 8, 8, 31, 25, 10, - 30, 10, 10, 42, 10, 49, 60, 29, 13, 61, 13, 13, 43, 0, 0, 0, - 37, 10, 10, 10, 10, 10, 10, 49, 13, 13, 62, 0, 13, 41, 63, 64, - 33, 65, 24, 42, 0, 10, 37, 10, 37, 66, 25, 33, 13, 13, 41, 67, - 13, 68, 63, 69, 2, 2, 3, 10, 2, 2, 2, 2, 2, 70, 71, 0, - 10, 10, 37, 10, 10, 10, 10, 48, 16, 13, 13, 72, 73, 74, 75, 76, - 77, 77, 78, 77, 77, 77, 77, 77, 77, 77, 77, 79, 0, 80, 0, 0, - 81, 8, 82, 13, 13, 83, 84, 85, 2, 2, 3, 86, 87, 17, 88, 89, - 90, 91, 92, 93, 94, 95, 10, 10, 96, 97, 63, 98, 2, 2, 99, 100, - 101, 10, 10, 23, 11, 102, 0, 0, 101, 10, 10, 10, 11, 0, 0, 0, - 103, 0, 0, 0, 104, 8, 8, 8, 8, 43, 13, 13, 13, 72, 105, 106, - 107, 0, 0, 108, 109, 10, 10, 10, 13, 13, 110, 0, 111, 112, 113, 0, - 114, 115, 115, 116, 117, 118, 0, 0, 10, 10, 10, 0, 13, 13, 13, 13, - 119, 112, 120, 0, 10, 121, 13, 0, 10, 10, 10, 81, 101, 122, 112, 123, - 124, 13, 13, 13, 13, 92, 125, 126, 127, 128, 8, 8, 10, 129, 13, 13, - 13, 130, 10, 0, 131, 8, 132, 10, 133, 13, 134, 135, 2, 2, 136, 137, - 10, 138, 13, 13, 139, 0, 0, 0, 10, 140, 13, 119, 112, 141, 0, 0, - 2, 2, 3, 37, 142, 143, 143, 143, 144, 0, 0, 0, 145, 146, 144, 0, - 0, 0, 147, 0, 0, 0, 0, 148, 149, 4, 0, 0, 0, 150, 0, 0, - 5, 150, 0, 0, 0, 0, 0, 4, 40, 151, 152, 10, 121, 13, 0, 0, - 10, 10, 10, 153, 154, 155, 156, 10, 157, 0, 0, 0, 158, 8, 8, 8, - 132, 10, 10, 10, 10, 159, 13, 13, 13, 160, 0, 0, 143, 143, 143, 143, - 2, 2, 161, 10, 153, 115, 162, 120, 10, 121, 13, 163, 164, 0, 0, 0, - 165, 8, 9, 101, 166, 13, 13, 167, 168, 0, 0, 0, 10, 169, 10, 10, - 2, 2, 161, 49, 8, 132, 10, 10, 10, 10, 94, 13, 170, 171, 0, 0, - 112, 112, 112, 172, 37, 173, 174, 93, 13, 13, 13, 97, 175, 0, 0, 0, - 132, 10, 121, 13, 0, 176, 0, 0, 10, 10, 10, 87, 177, 10, 178, 112, - 179, 13, 35, 180, 94, 52, 0, 72, 10, 37, 37, 10, 10, 0, 181, 182, - 2, 2, 0, 0, 183, 184, 8, 8, 10, 10, 13, 13, 13, 185, 0, 0, - 186, 187, 187, 187, 187, 188, 2, 2, 0, 0, 0, 189, 190, 8, 8, 9, - 13, 13, 191, 0, 190, 101, 10, 10, 10, 121, 13, 13, 192, 193, 2, 2, - 115, 194, 10, 10, 166, 0, 0, 0, 190, 8, 8, 8, 9, 10, 10, 10, - 121, 13, 13, 13, 195, 0, 196, 68, 197, 2, 2, 2, 2, 198, 0, 0, - 8, 8, 10, 10, 30, 10, 10, 10, 10, 10, 10, 13, 13, 199, 0, 200, - 8, 49, 23, 30, 10, 10, 10, 30, 10, 10, 48, 0, 8, 8, 132, 10, - 10, 10, 10, 152, 13, 13, 201, 0, 7, 21, 8, 22, 17, 202, 143, 146, - 143, 146, 0, 0, 8, 8, 8, 132, 10, 94, 13, 13, 203, 204, 0, 0, - 21, 8, 8, 101, 13, 13, 13, 205, 206, 207, 0, 0, 10, 10, 10, 121, - 13, 100, 13, 208, 209, 0, 0, 0, 0, 0, 8, 100, 13, 13, 13, 210, - 68, 0, 0, 0, 10, 10, 152, 211, 13, 212, 0, 0, 10, 10, 26, 213, - 13, 13, 214, 0, 2, 2, 2, 0, 8, 8, 45, 132, 13, 35, 13, 208, - 207, 0, 0, 0, 2, 2, 2, 198, 25, 10, 10, 10, 215, 77, 77, 77, - 13, 216, 0, 0, -}; - -static RE_UINT8 re_indic_syllabic_category_stage_5[] = { - 0, 0, 0, 0, 0, 11, 0, 0, 33, 33, 33, 33, 33, 33, 0, 0, - 11, 0, 0, 0, 0, 0, 28, 28, 0, 0, 0, 11, 1, 1, 1, 2, - 8, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 9, 9, - 4, 3, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 0, 26, 26, 0, - 0, 9, 9, 9, 8, 8, 9, 9, 0, 0, 33, 33, 0, 0, 8, 8, - 0, 1, 1, 2, 0, 8, 8, 8, 8, 0, 0, 8, 12, 0, 12, 12, - 12, 0, 12, 0, 0, 0, 12, 12, 12, 12, 0, 0, 9, 0, 0, 9, - 9, 5, 13, 0, 0, 0, 0, 9, 12, 12, 0, 12, 8, 8, 8, 0, - 0, 0, 0, 8, 0, 12, 12, 0, 4, 0, 9, 9, 9, 9, 9, 0, - 9, 5, 0, 0, 0, 12, 12, 12, 1, 25, 11, 11, 0, 19, 0, 0, - 8, 8, 0, 8, 9, 9, 0, 9, 0, 12, 0, 0, 0, 0, 9, 9, - 0, 0, 1, 22, 8, 0, 8, 8, 8, 12, 0, 0, 0, 0, 0, 12, - 12, 0, 0, 0, 12, 12, 12, 0, 9, 0, 9, 9, 0, 3, 9, 9, - 0, 9, 9, 0, 0, 0, 12, 0, 0, 14, 14, 0, 9, 5, 16, 0, - 13, 13, 13, 9, 0, 0, 13, 13, 13, 13, 13, 13, 0, 0, 1, 2, - 0, 0, 5, 0, 9, 0, 9, 0, 9, 9, 6, 0, 24, 24, 24, 24, - 29, 1, 6, 0, 12, 0, 0, 12, 0, 12, 0, 12, 19, 19, 0, 0, - 9, 0, 0, 0, 0, 1, 0, 0, 0, 28, 0, 28, 0, 4, 0, 0, - 9, 9, 1, 2, 9, 9, 1, 1, 6, 3, 0, 0, 21, 21, 21, 21, - 21, 18, 18, 18, 18, 18, 18, 18, 0, 18, 18, 18, 18, 0, 0, 0, - 0, 0, 28, 0, 12, 8, 8, 8, 8, 8, 8, 9, 9, 9, 1, 24, - 2, 7, 6, 19, 19, 19, 19, 12, 0, 0, 11, 0, 12, 12, 8, 8, - 9, 9, 12, 12, 12, 12, 19, 19, 19, 12, 9, 24, 24, 12, 12, 9, - 9, 24, 24, 24, 24, 24, 12, 12, 12, 9, 9, 9, 9, 12, 12, 12, - 12, 12, 19, 9, 9, 9, 9, 24, 24, 24, 12, 24, 33, 33, 24, 24, - 9, 9, 0, 0, 8, 8, 8, 12, 6, 0, 0, 0, 12, 0, 9, 9, - 12, 12, 12, 8, 9, 27, 27, 28, 17, 29, 28, 28, 28, 6, 7, 28, - 3, 28, 0, 0, 11, 12, 12, 12, 9, 18, 18, 18, 20, 20, 1, 20, - 20, 20, 20, 20, 20, 20, 9, 28, 12, 12, 12, 10, 10, 10, 10, 10, - 10, 10, 0, 0, 23, 23, 23, 23, 23, 0, 0, 0, 9, 20, 20, 20, - 24, 24, 0, 0, 12, 12, 12, 9, 12, 19, 19, 20, 20, 20, 20, 0, - 7, 9, 9, 9, 24, 24, 28, 28, 28, 0, 0, 28, 1, 1, 1, 17, - 2, 8, 8, 8, 4, 9, 9, 9, 5, 12, 12, 12, 1, 17, 2, 8, - 8, 8, 12, 12, 12, 18, 18, 18, 9, 9, 6, 7, 18, 18, 12, 12, - 33, 33, 3, 12, 12, 12, 20, 20, 8, 8, 4, 9, 20, 20, 6, 6, - 18, 18, 9, 9, 1, 1, 28, 4, 26, 26, 26, 0, 26, 26, 26, 26, - 26, 26, 0, 0, 0, 0, 2, 2, 26, 0, 0, 0, 0, 0, 0, 28, - 30, 31, 0, 0, 11, 11, 11, 11, 28, 0, 0, 0, 8, 8, 6, 12, - 12, 12, 12, 1, 12, 12, 10, 10, 10, 10, 12, 12, 12, 12, 10, 18, - 18, 12, 12, 12, 12, 18, 12, 1, 1, 2, 8, 8, 20, 9, 9, 9, - 5, 1, 0, 0, 33, 33, 12, 12, 10, 10, 10, 24, 9, 9, 9, 20, - 20, 20, 20, 6, 1, 1, 17, 2, 12, 12, 12, 4, 9, 18, 19, 19, - 5, 0, 0, 0, 12, 9, 0, 12, 9, 9, 9, 19, 19, 19, 19, 0, - 20, 20, 0, 0, 11, 11, 11, 0, 0, 0, 12, 24, 23, 24, 23, 0, - 0, 2, 7, 0, 12, 8, 12, 12, 12, 12, 12, 20, 20, 20, 20, 9, - 24, 6, 0, 0, 4, 4, 4, 0, 0, 0, 0, 7, 1, 1, 2, 14, - 14, 8, 8, 8, 9, 9, 5, 0, 0, 0, 34, 34, 34, 34, 34, 34, - 34, 34, 33, 33, 0, 0, 0, 32, 1, 1, 2, 8, 9, 5, 4, 0, - 9, 9, 9, 7, 6, 0, 33, 33, 10, 12, 12, 12, 5, 3, 15, 15, - 0, 0, 4, 9, 0, 33, 33, 33, 33, 0, 0, 0, 1, 5, 4, 25, - 0, 0, 26, 0, 9, 4, 6, 0, 0, 0, 26, 26, 9, 9, 5, 1, - 1, 2, 4, 3, 9, 9, 9, 1, 1, 2, 5, 4, 3, 0, 0, 0, - 1, 1, 2, 5, 4, 0, 0, 0, 9, 1, 2, 5, 2, 9, 9, 9, - 9, 9, 5, 4, 0, 19, 19, 19, 9, 9, 9, 6, 0, 0, 18, 18, - 9, 1, 1, 0, -}; - -/* Indic_Syllabic_Category: 2560 bytes. */ - -RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_indic_syllabic_category_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_indic_syllabic_category_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_indic_syllabic_category_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_indic_syllabic_category_stage_4[pos + f] << 2; - value = re_indic_syllabic_category_stage_5[pos + code]; - - return value; -} - -/* Alphanumeric. */ - -static RE_UINT8 re_alphanumeric_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_alphanumeric_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 18, 19, 13, 20, 13, 21, 13, 13, 13, 13, 22, 7, 7, - 23, 24, 13, 13, 13, 13, 25, 26, 13, 13, 27, 13, 28, 29, 30, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 31, 7, 32, 33, 7, 34, 13, 13, 13, 13, 13, 35, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_alphanumeric_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, - 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, - 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 1, 59, - 60, 61, 62, 63, 64, 31, 31, 31, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 31, 74, 31, 75, 31, 31, 31, 1, 1, 1, 76, 77, 78, 31, 31, - 1, 1, 1, 1, 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 80, 31, - 1, 1, 81, 82, 31, 31, 31, 83, 1, 1, 1, 1, 1, 1, 1, 84, - 1, 1, 85, 31, 31, 31, 31, 31, 86, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 87, 31, 31, 31, 31, 31, 31, 31, 88, 89, 90, 91, - 92, 31, 31, 31, 31, 31, 31, 31, 93, 94, 31, 31, 31, 31, 95, 31, - 31, 96, 31, 31, 31, 31, 31, 31, 1, 1, 1, 1, 1, 1, 97, 1, - 1, 1, 1, 1, 1, 1, 1, 98, 99, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 100, 31, 1, 1, 101, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_alphanumeric_stage_4[] = { - 0, 1, 2, 2, 0, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 6, 7, 0, 0, 8, 9, 10, 11, 5, 12, - 5, 5, 5, 5, 13, 5, 5, 5, 5, 14, 15, 16, 17, 18, 19, 20, - 21, 5, 22, 23, 5, 5, 24, 25, 26, 5, 27, 5, 5, 28, 5, 29, - 30, 31, 32, 0, 0, 33, 34, 35, 5, 36, 37, 38, 39, 40, 41, 42, - 43, 44, 45, 46, 47, 48, 49, 50, 51, 48, 52, 53, 54, 55, 56, 57, - 58, 59, 60, 61, 58, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, - 16, 73, 74, 0, 75, 76, 77, 0, 78, 79, 80, 81, 82, 83, 0, 0, - 5, 84, 85, 86, 87, 5, 88, 89, 5, 5, 90, 5, 91, 92, 93, 5, - 94, 5, 95, 0, 96, 5, 5, 97, 16, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 98, 2, 5, 5, 99, 100, 101, 101, 102, 5, 103, 104, 79, - 1, 5, 5, 105, 5, 106, 5, 107, 108, 109, 110, 111, 5, 112, 113, 0, - 114, 5, 108, 115, 113, 116, 0, 0, 5, 117, 118, 0, 5, 119, 5, 120, - 5, 107, 121, 122, 123, 0, 0, 124, 5, 5, 5, 5, 5, 5, 0, 125, - 97, 5, 126, 122, 5, 127, 128, 129, 0, 0, 0, 130, 131, 0, 0, 0, - 132, 133, 134, 5, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 135, 5, 79, 5, 136, 108, 5, 5, 5, 5, 137, - 5, 88, 5, 138, 139, 140, 140, 5, 0, 141, 0, 0, 0, 0, 0, 0, - 142, 143, 16, 5, 144, 16, 5, 89, 145, 146, 5, 5, 147, 73, 0, 26, - 5, 5, 5, 5, 5, 107, 0, 0, 5, 5, 5, 5, 5, 5, 107, 0, - 5, 5, 5, 5, 31, 0, 26, 122, 148, 149, 5, 150, 5, 5, 5, 96, - 151, 152, 5, 5, 153, 154, 0, 151, 155, 17, 5, 101, 5, 5, 156, 157, - 5, 106, 158, 83, 5, 159, 160, 161, 5, 139, 162, 163, 5, 108, 164, 165, - 166, 167, 89, 168, 5, 5, 5, 169, 5, 5, 5, 5, 5, 170, 171, 114, - 5, 5, 5, 172, 5, 5, 173, 0, 174, 175, 176, 5, 5, 28, 177, 5, - 5, 122, 26, 5, 178, 5, 17, 179, 0, 0, 0, 180, 5, 5, 5, 83, - 1, 2, 2, 110, 5, 108, 181, 0, 182, 183, 184, 0, 5, 5, 5, 73, - 0, 0, 5, 185, 0, 0, 0, 0, 0, 0, 0, 0, 83, 5, 186, 0, - 5, 26, 106, 73, 122, 5, 187, 0, 5, 5, 5, 5, 122, 85, 188, 114, - 5, 189, 5, 190, 0, 0, 0, 0, 5, 139, 107, 17, 0, 0, 0, 0, - 191, 192, 107, 139, 108, 0, 0, 193, 107, 173, 0, 0, 5, 194, 0, 0, - 195, 101, 0, 83, 83, 0, 80, 196, 5, 107, 107, 158, 28, 0, 0, 0, - 5, 5, 123, 0, 5, 158, 5, 158, 5, 5, 197, 57, 152, 32, 26, 198, - 5, 199, 26, 200, 5, 5, 201, 0, 202, 203, 0, 0, 204, 205, 5, 198, - 39, 48, 206, 190, 0, 0, 0, 0, 5, 5, 207, 0, 5, 5, 208, 0, - 0, 0, 0, 0, 5, 209, 210, 0, 5, 108, 211, 0, 5, 107, 79, 0, - 212, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 213, - 0, 0, 0, 0, 0, 0, 5, 32, 214, 215, 216, 217, 178, 218, 0, 0, - 5, 5, 5, 5, 173, 0, 0, 0, 5, 5, 5, 147, 5, 5, 5, 5, - 5, 5, 190, 0, 0, 0, 0, 0, 5, 147, 0, 0, 0, 0, 0, 0, - 5, 5, 219, 0, 0, 0, 0, 0, 5, 32, 108, 79, 0, 0, 26, 220, - 5, 139, 221, 222, 96, 0, 0, 0, 5, 5, 223, 108, 177, 0, 0, 78, - 5, 5, 5, 5, 5, 5, 5, 31, 5, 5, 5, 5, 5, 5, 5, 158, - 224, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 225, 226, 0, 0, 0, - 5, 5, 227, 5, 228, 229, 230, 5, 231, 232, 233, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 234, 235, 89, 227, 227, 136, 136, 214, 214, 236, 5, - 237, 238, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 196, 0, - 5, 5, 239, 0, 0, 0, 0, 0, 230, 240, 241, 242, 243, 244, 0, 0, - 0, 26, 85, 85, 79, 0, 0, 0, 5, 5, 5, 5, 5, 5, 139, 0, - 5, 185, 5, 5, 5, 5, 5, 5, 122, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 224, 0, 0, 122, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_alphanumeric_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 7, 0, 4, 32, 4, - 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, - 32, 0, 0, 0, 0, 0, 223, 188, 64, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 3, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 2, - 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 255, 191, 182, 0, 255, 255, - 255, 7, 7, 0, 0, 0, 255, 7, 255, 255, 255, 254, 255, 195, 255, 255, - 255, 255, 239, 31, 254, 225, 255, 159, 0, 0, 255, 255, 0, 224, 255, 255, - 255, 255, 3, 0, 255, 7, 48, 4, 255, 255, 255, 252, 255, 31, 0, 0, - 255, 255, 255, 1, 255, 255, 223, 63, 0, 0, 240, 255, 248, 3, 255, 255, - 255, 255, 255, 239, 255, 223, 225, 255, 207, 255, 254, 255, 239, 159, 249, 255, - 255, 253, 197, 227, 159, 89, 128, 176, 207, 255, 3, 0, 238, 135, 249, 255, - 255, 253, 109, 195, 135, 25, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, - 255, 253, 237, 227, 191, 27, 1, 0, 207, 255, 0, 2, 238, 159, 249, 255, - 159, 25, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, 24, 199, 255, 195, - 199, 29, 129, 0, 192, 255, 0, 0, 239, 223, 253, 255, 255, 253, 255, 227, - 223, 29, 96, 7, 207, 255, 0, 0, 255, 253, 239, 227, 223, 29, 96, 64, - 207, 255, 6, 0, 238, 223, 253, 255, 255, 255, 255, 231, 223, 93, 240, 128, - 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 128, 95, 255, - 192, 255, 12, 0, 255, 255, 255, 7, 127, 32, 255, 3, 150, 37, 240, 254, - 174, 236, 255, 59, 95, 32, 255, 243, 1, 0, 0, 0, 255, 3, 0, 0, - 255, 254, 255, 255, 255, 31, 254, 255, 3, 255, 255, 254, 255, 255, 255, 31, - 255, 255, 127, 249, 255, 3, 255, 255, 231, 193, 255, 255, 127, 64, 255, 51, - 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, - 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 135, - 255, 255, 0, 0, 255, 255, 63, 63, 255, 159, 255, 255, 255, 199, 255, 1, - 255, 223, 15, 0, 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 207, 255, - 255, 1, 128, 16, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, - 255, 255, 255, 127, 255, 15, 255, 1, 192, 255, 255, 255, 255, 63, 31, 0, - 255, 15, 255, 255, 255, 3, 255, 3, 255, 255, 255, 15, 254, 255, 31, 0, - 128, 0, 0, 0, 255, 255, 239, 255, 239, 15, 255, 3, 255, 243, 255, 255, - 191, 255, 3, 0, 255, 227, 255, 255, 255, 255, 255, 63, 255, 1, 0, 0, - 0, 222, 111, 0, 128, 255, 31, 0, 63, 63, 255, 170, 255, 255, 223, 95, - 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, - 132, 252, 47, 62, 80, 189, 255, 243, 224, 67, 0, 0, 0, 0, 192, 255, - 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, 255, 255, 127, 0, - 127, 127, 127, 127, 0, 128, 0, 0, 224, 0, 0, 0, 254, 3, 62, 31, - 255, 255, 127, 224, 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, - 255, 31, 255, 255, 255, 15, 0, 0, 255, 127, 240, 143, 0, 0, 128, 255, - 252, 255, 255, 255, 255, 249, 255, 255, 255, 127, 255, 0, 187, 247, 255, 255, - 47, 0, 255, 3, 0, 0, 252, 40, 255, 255, 7, 0, 255, 255, 247, 255, - 0, 128, 255, 3, 223, 255, 255, 127, 255, 63, 255, 3, 255, 255, 127, 196, - 5, 0, 0, 56, 255, 255, 60, 0, 126, 126, 126, 0, 127, 127, 255, 255, - 63, 0, 255, 255, 255, 7, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, - 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, 255, 253, 127, 95, - 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 15, - 0, 0, 223, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, - 255, 63, 255, 63, 255, 255, 31, 0, 255, 255, 1, 0, 15, 255, 62, 0, - 255, 255, 15, 255, 255, 0, 255, 255, 15, 0, 0, 0, 63, 253, 255, 255, - 255, 255, 191, 145, 255, 255, 55, 0, 255, 255, 255, 192, 111, 240, 239, 254, - 31, 0, 0, 0, 63, 0, 0, 0, 255, 1, 255, 3, 255, 255, 199, 255, - 255, 255, 71, 0, 30, 0, 255, 23, 255, 255, 251, 255, 255, 255, 159, 64, - 127, 189, 255, 191, 255, 1, 255, 255, 159, 25, 129, 224, 187, 7, 255, 3, - 179, 0, 255, 3, 255, 255, 63, 127, 0, 0, 0, 63, 17, 0, 255, 3, - 255, 255, 255, 227, 255, 3, 0, 128, 255, 253, 255, 255, 255, 255, 127, 127, - 1, 0, 255, 3, 0, 0, 252, 255, 255, 254, 127, 0, 127, 0, 0, 0, - 255, 63, 0, 0, 15, 0, 255, 3, 248, 255, 255, 224, 31, 0, 255, 255, - 3, 0, 0, 0, 255, 7, 255, 31, 255, 1, 255, 67, 255, 255, 223, 255, - 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, - 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, - 247, 207, 255, 255, 127, 255, 255, 249, 219, 7, 0, 0, 143, 0, 255, 3, - 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, - 238, 251, 255, 15, -}; - -/* Alphanumeric: 2229 bytes. */ - -RE_UINT32 re_get_alphanumeric(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_alphanumeric_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_alphanumeric_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_alphanumeric_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_alphanumeric_stage_4[pos + f] << 5; - pos += code; - value = (re_alphanumeric_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Any. */ - -RE_UINT32 re_get_any(RE_UINT32 ch) { - return 1; -} - -/* Blank. */ - -static RE_UINT8 re_blank_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_blank_stage_2[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_blank_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_blank_stage_4[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 3, 1, 1, 1, 1, 1, 4, 5, 1, 1, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_blank_stage_5[] = { - 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 255, 7, 0, 0, 0, 128, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, -}; - -/* Blank: 169 bytes. */ - -RE_UINT32 re_get_blank(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_blank_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_blank_stage_2[pos + f] << 4; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_blank_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_blank_stage_4[pos + f] << 6; - pos += code; - value = (re_blank_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Graph. */ - -static RE_UINT8 re_graph_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 3, 3, 3, 3, 3, 16, 17, 18, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 20, 19, 19, 19, 19, 19, 19, 19, 3, 3, 3, 3, 3, 3, 3, 21, - 3, 3, 3, 3, 3, 3, 3, 21, -}; - -static RE_UINT8 re_graph_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 10, 10, 19, 20, 21, 22, 23, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 24, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 25, - 10, 10, 26, 27, 28, 29, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 30, 31, 31, 31, 31, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 31, - 10, 50, 51, 31, 31, 31, 31, 31, 10, 10, 52, 31, 31, 31, 31, 31, - 31, 31, 10, 53, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 10, 54, 31, 55, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 56, 10, 57, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 58, 31, 31, 31, 31, 31, 59, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 60, 61, 62, 63, 10, 64, 31, 31, - 65, 31, 31, 31, 66, 31, 31, 67, 68, 69, 10, 70, 71, 31, 31, 31, - 10, 10, 10, 72, 10, 10, 10, 10, 10, 10, 10, 73, 74, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 75, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 10, 76, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 77, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 78, -}; - -static RE_UINT8 re_graph_stage_3[] = { - 0, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 2, - 2, 2, 2, 2, 5, 6, 7, 8, 9, 2, 2, 2, 10, 11, 12, 13, - 14, 15, 16, 17, 2, 2, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 2, 40, 41, 42, - 2, 2, 2, 43, 2, 2, 2, 2, 2, 44, 45, 46, 47, 48, 49, 50, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 51, 52, 53, 54, 2, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 2, 68, 2, 69, - 70, 71, 72, 73, 2, 2, 2, 74, 2, 2, 2, 2, 75, 76, 77, 78, - 79, 80, 81, 82, 2, 2, 83, 2, 2, 2, 2, 2, 2, 2, 2, 1, - 84, 85, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 86, 87, 88, - 89, 90, 2, 91, 92, 93, 94, 95, 2, 96, 97, 98, 2, 2, 2, 99, - 100, 100, 101, 2, 102, 2, 103, 104, 90, 2, 2, 1, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 59, 2, 2, 2, 2, 2, 2, 2, 2, 105, - 2, 2, 106, 107, 2, 2, 2, 2, 108, 2, 2, 57, 2, 2, 109, 110, - 111, 57, 2, 112, 2, 113, 2, 114, 115, 116, 2, 117, 118, 119, 2, 120, - 2, 2, 2, 2, 2, 2, 104, 121, 67, 67, 67, 67, 67, 67, 67, 67, - 2, 122, 2, 123, 124, 125, 2, 126, 2, 2, 2, 2, 2, 127, 128, 129, - 49, 130, 2, 131, 100, 2, 1, 132, 133, 134, 2, 13, 135, 2, 136, 137, - 67, 67, 138, 139, 104, 140, 141, 142, 2, 2, 143, 144, 145, 146, 67, 67, - 2, 2, 2, 2, 115, 147, 67, 67, 148, 149, 150, 151, 152, 67, 153, 128, - 154, 155, 156, 157, 158, 159, 160, 67, 2, 72, 161, 162, 67, 67, 67, 67, - 67, 163, 67, 67, 67, 67, 67, 67, 2, 164, 2, 165, 77, 166, 2, 167, - 168, 67, 169, 170, 171, 172, 67, 67, 2, 173, 2, 174, 67, 67, 175, 176, - 2, 177, 57, 178, 179, 67, 67, 67, 67, 67, 180, 181, 67, 67, 67, 67, - 67, 67, 67, 52, 67, 67, 67, 67, 182, 183, 184, 67, 67, 67, 67, 67, - 2, 2, 2, 2, 2, 2, 123, 67, 2, 185, 2, 2, 2, 186, 67, 67, - 187, 67, 67, 67, 67, 67, 67, 67, 2, 188, 67, 67, 67, 67, 67, 67, - 52, 189, 67, 190, 2, 191, 192, 67, 67, 67, 67, 67, 2, 193, 194, 195, - 2, 2, 2, 2, 2, 2, 2, 196, 2, 2, 2, 161, 67, 67, 67, 67, - 197, 67, 67, 67, 67, 67, 67, 67, 2, 198, 199, 67, 67, 67, 67, 67, - 2, 2, 2, 59, 200, 2, 2, 201, 2, 202, 67, 67, 2, 203, 67, 67, - 2, 204, 205, 206, 207, 208, 2, 2, 2, 2, 209, 2, 2, 2, 2, 210, - 2, 2, 211, 67, 67, 67, 67, 67, 212, 67, 67, 67, 67, 67, 67, 67, - 2, 2, 2, 213, 2, 214, 67, 67, 215, 216, 217, 218, 67, 67, 67, 67, - 62, 2, 219, 220, 221, 62, 196, 222, 223, 224, 67, 67, 2, 2, 2, 2, - 2, 2, 2, 225, 2, 98, 2, 226, 83, 227, 228, 67, 229, 230, 231, 232, - 2, 2, 2, 233, 2, 2, 2, 2, 2, 2, 2, 2, 234, 2, 2, 2, - 235, 2, 2, 2, 2, 2, 2, 2, 2, 2, 236, 67, 67, 67, 67, 67, - 176, 67, 67, 67, 67, 67, 67, 67, 237, 2, 67, 67, 2, 2, 2, 238, - 2, 2, 2, 2, 2, 2, 2, 239, -}; - -static RE_UINT8 re_graph_stage_4[] = { - 0, 0, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 4, - 5, 2, 6, 2, 2, 2, 2, 1, 2, 7, 1, 2, 8, 1, 2, 2, - 9, 2, 10, 11, 2, 12, 2, 2, 13, 2, 2, 2, 14, 2, 2, 2, - 2, 2, 2, 15, 2, 2, 2, 10, 2, 2, 16, 3, 2, 17, 0, 0, - 0, 0, 2, 18, 0, 19, 2, 2, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 21, 22, 29, 30, 31, 32, 33, 34, 6, 22, 35, 36, 37, 26, 38, - 39, 21, 22, 35, 40, 41, 26, 9, 42, 43, 44, 45, 46, 47, 32, 10, - 48, 49, 22, 50, 51, 52, 26, 53, 48, 49, 22, 54, 51, 55, 26, 56, - 57, 49, 2, 14, 58, 19, 26, 2, 59, 60, 2, 61, 62, 63, 32, 64, - 1, 2, 2, 65, 2, 27, 0, 0, 66, 67, 68, 69, 70, 71, 0, 0, - 72, 2, 73, 1, 2, 72, 2, 12, 12, 10, 0, 0, 74, 2, 2, 2, - 75, 76, 2, 2, 75, 2, 2, 77, 78, 79, 2, 2, 2, 78, 2, 2, - 2, 14, 2, 73, 2, 80, 2, 2, 2, 2, 2, 81, 1, 73, 2, 2, - 2, 2, 2, 82, 12, 11, 2, 83, 2, 84, 12, 85, 2, 16, 80, 80, - 3, 80, 2, 2, 2, 2, 2, 9, 2, 2, 10, 2, 2, 2, 2, 33, - 2, 3, 27, 27, 86, 2, 16, 11, 2, 2, 27, 2, 80, 87, 2, 2, - 2, 88, 2, 2, 2, 3, 2, 89, 80, 80, 16, 3, 0, 0, 0, 0, - 27, 2, 2, 73, 2, 2, 2, 90, 2, 2, 2, 91, 50, 2, 2, 2, - 82, 0, 0, 0, 9, 2, 2, 92, 2, 2, 2, 93, 2, 81, 2, 2, - 81, 94, 2, 16, 2, 2, 2, 95, 95, 96, 2, 97, 98, 2, 99, 2, - 2, 3, 95, 100, 3, 73, 2, 3, 0, 2, 2, 37, 27, 2, 2, 2, - 2, 2, 83, 0, 10, 0, 2, 2, 2, 2, 2, 26, 2, 101, 2, 50, - 22, 15, 102, 0, 2, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 103, - 2, 2, 74, 2, 2, 2, 104, 105, 2, 83, 106, 106, 106, 106, 2, 2, - 11, 0, 0, 0, 2, 107, 2, 2, 2, 2, 2, 84, 2, 33, 0, 27, - 1, 2, 2, 2, 2, 7, 2, 2, 108, 2, 16, 1, 3, 2, 2, 10, - 2, 2, 84, 2, 2, 33, 0, 0, 73, 2, 2, 2, 83, 2, 2, 2, - 2, 2, 27, 0, 2, 2, 3, 9, 0, 0, 0, 109, 2, 2, 27, 80, - 110, 80, 2, 16, 2, 111, 2, 73, 13, 45, 2, 3, 2, 2, 2, 83, - 16, 71, 2, 2, 112, 98, 2, 83, 113, 114, 106, 2, 2, 2, 33, 2, - 2, 2, 16, 80, 115, 2, 2, 27, 2, 2, 16, 2, 2, 80, 0, 0, - 83, 116, 2, 117, 118, 2, 2, 2, 15, 119, 2, 2, 0, 2, 2, 2, - 2, 120, 2, 2, 9, 0, 0, 16, 2, 121, 122, 95, 2, 2, 2, 89, - 123, 124, 106, 125, 126, 2, 79, 127, 16, 16, 0, 0, 128, 2, 2, 129, - 3, 27, 37, 0, 0, 2, 2, 16, 2, 73, 2, 2, 2, 37, 2, 27, - 10, 2, 2, 10, 2, 13, 2, 2, 130, 33, 0, 0, 2, 16, 80, 2, - 2, 130, 2, 27, 2, 2, 9, 2, 2, 2, 111, 0, 2, 33, 9, 0, - 131, 2, 2, 132, 2, 133, 2, 2, 2, 3, 109, 0, 0, 0, 2, 134, - 2, 135, 2, 136, 2, 2, 2, 137, 138, 139, 2, 140, 9, 82, 2, 2, - 2, 2, 0, 0, 2, 2, 115, 83, 2, 2, 2, 141, 2, 101, 2, 142, - 2, 143, 144, 0, 2, 2, 2, 112, 2, 2, 2, 145, 0, 0, 2, 3, - 16, 120, 2, 146, 15, 2, 82, 80, 84, 2, 2, 83, 16, 2, 1, 11, - 2, 6, 2, 3, 147, 13, 80, 2, 2, 2, 10, 80, 20, 21, 22, 35, - 40, 148, 149, 11, 2, 150, 0, 0, 9, 80, 0, 0, 2, 2, 2, 101, - 2, 16, 0, 0, 11, 80, 73, 0, 80, 0, 0, 0, 2, 50, 27, 2, - 0, 0, 2, 2, 2, 2, 2, 151, 22, 2, 2, 79, 33, 2, 73, 2, - 2, 120, 72, 83, 2, 2, 3, 11, 84, 0, 0, 0, 2, 2, 3, 0, - 83, 0, 0, 0, 2, 3, 45, 0, 0, 2, 16, 33, 33, 107, 6, 152, - 2, 0, 0, 0, 11, 2, 2, 3, 146, 2, 0, 0, 0, 0, 37, 0, - 2, 2, 73, 0, 15, 0, 0, 0, 2, 2, 10, 73, 82, 71, 84, 0, - 2, 2, 7, 2, 2, 2, 82, 0, 33, 0, 0, 0, 2, 83, 2, 15, - 2, 95, 2, 2, 2, 12, 153, 154, 155, 2, 2, 2, 156, 157, 2, 158, - 159, 49, 2, 2, 2, 2, 101, 2, 88, 2, 2, 2, 27, 98, 1, 0, - 79, 160, 161, 0, 162, 83, 0, 0, 10, 45, 0, 0, 155, 2, 163, 164, - 165, 166, 167, 168, 107, 27, 169, 27, 0, 0, 0, 15, 2, 84, 3, 1, - 1, 1, 2, 33, 73, 2, 3, 2, 0, 0, 32, 2, 112, 2, 2, 27, - 82, 15, 0, 0, 2, 112, 73, 83, 2, 11, 0, 0, 9, 80, 2, 2, - 9, 2, 16, 0, 0, 3, 9, 170, 27, 3, 0, 0, 2, 15, 0, 0, - 37, 0, 0, 0, 2, 83, 0, 0, 2, 2, 2, 11, 2, 16, 2, 2, - 2, 2, 15, 0, 171, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 16, -}; - -static RE_UINT8 re_graph_stage_5[] = { - 0, 0, 254, 255, 255, 255, 255, 127, 255, 252, 240, 215, 251, 255, 127, 254, - 255, 230, 255, 0, 255, 7, 31, 0, 255, 223, 255, 191, 255, 231, 3, 0, - 255, 63, 255, 79, 223, 63, 240, 255, 239, 159, 249, 255, 255, 253, 197, 243, - 159, 121, 128, 176, 207, 255, 255, 15, 238, 135, 109, 211, 135, 57, 2, 94, - 192, 255, 63, 0, 238, 191, 237, 243, 191, 59, 1, 0, 3, 2, 238, 159, - 159, 57, 192, 176, 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, - 239, 223, 253, 255, 255, 227, 223, 61, 96, 7, 0, 255, 239, 243, 96, 64, - 6, 0, 238, 223, 223, 253, 236, 255, 127, 252, 251, 47, 127, 132, 95, 255, - 28, 0, 255, 135, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, 255, 243, - 255, 254, 255, 31, 191, 32, 255, 61, 127, 61, 61, 127, 61, 255, 127, 255, - 255, 3, 63, 63, 255, 1, 127, 0, 15, 0, 13, 0, 241, 255, 255, 199, - 255, 207, 255, 159, 15, 240, 255, 248, 127, 3, 63, 248, 255, 170, 223, 255, - 207, 239, 220, 127, 0, 248, 255, 124, 243, 255, 63, 255, 0, 240, 15, 254, - 255, 128, 1, 128, 127, 127, 255, 251, 224, 255, 128, 255, 63, 192, 15, 128, - 7, 0, 126, 126, 126, 0, 127, 248, 248, 224, 127, 95, 219, 255, 248, 255, - 252, 255, 247, 255, 127, 15, 252, 252, 252, 28, 0, 62, 255, 239, 255, 183, - 135, 255, 143, 255, 15, 255, 63, 253, 191, 145, 191, 255, 55, 248, 255, 143, - 255, 131, 255, 240, 111, 240, 239, 254, 15, 135, 63, 254, 7, 255, 3, 30, - 0, 254, 7, 252, 0, 128, 127, 189, 129, 224, 207, 31, 255, 43, 7, 128, - 255, 224, 100, 222, 255, 235, 239, 255, 191, 231, 223, 223, 255, 123, 95, 252, - 255, 249, 219, 7, 159, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, - 247, 94, 238, 251, 249, 127, 2, 0, -}; - -/* Graph: 2424 bytes. */ - -RE_UINT32 re_get_graph(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_graph_stage_1[f] << 4; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_graph_stage_2[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_graph_stage_3[pos + f] << 2; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_graph_stage_4[pos + f] << 4; - pos += code; - value = (re_graph_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Print. */ - -static RE_UINT8 re_print_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 3, 3, 3, 3, 3, 16, 17, 18, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 20, 19, 19, 19, 19, 19, 19, 19, 3, 3, 3, 3, 3, 3, 3, 21, - 3, 3, 3, 3, 3, 3, 3, 21, -}; - -static RE_UINT8 re_print_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 10, 10, 19, 20, 21, 22, 23, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 24, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 25, - 10, 10, 26, 27, 28, 29, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 30, 31, 31, 31, 31, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 31, - 10, 50, 51, 31, 31, 31, 31, 31, 10, 10, 52, 31, 31, 31, 31, 31, - 31, 31, 10, 53, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 10, 54, 31, 55, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 56, 10, 57, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 58, 31, 31, 31, 31, 31, 59, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 60, 61, 62, 63, 10, 64, 31, 31, - 65, 31, 31, 31, 66, 31, 31, 67, 68, 69, 10, 70, 71, 31, 31, 31, - 10, 10, 10, 72, 10, 10, 10, 10, 10, 10, 10, 73, 74, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 75, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 10, 76, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 77, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 78, -}; - -static RE_UINT8 re_print_stage_3[] = { - 0, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 2, - 2, 2, 2, 2, 5, 6, 7, 8, 9, 2, 2, 2, 10, 11, 12, 13, - 14, 15, 16, 17, 2, 2, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 2, 40, 41, 42, - 2, 2, 2, 43, 2, 2, 2, 2, 2, 44, 45, 46, 47, 48, 49, 50, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 51, 52, 53, 54, 2, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 2, 68, 2, 69, - 70, 71, 72, 73, 2, 2, 2, 74, 2, 2, 2, 2, 75, 76, 77, 78, - 79, 80, 81, 82, 2, 2, 83, 2, 2, 2, 2, 2, 2, 2, 2, 1, - 84, 85, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 86, 87, 88, - 89, 90, 2, 91, 92, 93, 94, 95, 2, 96, 97, 98, 2, 2, 2, 99, - 2, 100, 101, 2, 102, 2, 103, 104, 90, 2, 2, 1, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 59, 2, 2, 2, 2, 2, 2, 2, 2, 105, - 2, 2, 106, 107, 2, 2, 2, 2, 108, 2, 2, 57, 2, 2, 109, 110, - 111, 57, 2, 112, 2, 113, 2, 114, 115, 116, 2, 117, 118, 119, 2, 120, - 2, 2, 2, 2, 2, 2, 104, 121, 67, 67, 67, 67, 67, 67, 67, 67, - 2, 122, 2, 123, 124, 125, 2, 126, 2, 2, 2, 2, 2, 127, 128, 129, - 49, 130, 2, 131, 100, 2, 1, 132, 133, 134, 2, 13, 135, 2, 136, 137, - 67, 67, 51, 138, 104, 139, 140, 141, 2, 2, 142, 143, 144, 145, 67, 67, - 2, 2, 2, 2, 115, 146, 67, 67, 147, 148, 149, 150, 151, 67, 152, 128, - 153, 154, 155, 156, 157, 158, 159, 67, 2, 72, 160, 161, 67, 67, 67, 67, - 67, 162, 67, 67, 67, 67, 67, 67, 2, 163, 2, 164, 77, 165, 2, 166, - 167, 67, 168, 169, 170, 171, 67, 67, 2, 172, 2, 173, 67, 67, 174, 175, - 2, 176, 57, 177, 178, 67, 67, 67, 67, 67, 0, 179, 67, 67, 67, 67, - 67, 67, 67, 52, 67, 67, 67, 67, 180, 181, 182, 67, 67, 67, 67, 67, - 2, 2, 2, 2, 2, 2, 123, 67, 2, 183, 2, 2, 2, 184, 67, 67, - 185, 67, 67, 67, 67, 67, 67, 67, 2, 186, 67, 67, 67, 67, 67, 67, - 52, 187, 67, 188, 2, 189, 190, 67, 67, 67, 67, 67, 2, 191, 192, 193, - 2, 2, 2, 2, 2, 2, 2, 194, 2, 2, 2, 160, 67, 67, 67, 67, - 195, 67, 67, 67, 67, 67, 67, 67, 2, 196, 197, 67, 67, 67, 67, 67, - 2, 2, 2, 59, 198, 2, 2, 199, 2, 200, 67, 67, 2, 201, 67, 67, - 2, 202, 203, 204, 205, 206, 2, 2, 2, 2, 207, 2, 2, 2, 2, 208, - 2, 2, 209, 67, 67, 67, 67, 67, 210, 67, 67, 67, 67, 67, 67, 67, - 2, 2, 2, 211, 2, 212, 67, 67, 213, 214, 215, 216, 67, 67, 67, 67, - 62, 2, 217, 218, 219, 62, 194, 220, 221, 222, 67, 67, 2, 2, 2, 2, - 2, 2, 2, 223, 2, 98, 2, 224, 83, 225, 226, 67, 227, 228, 229, 230, - 2, 2, 2, 231, 2, 2, 2, 2, 2, 2, 2, 2, 232, 2, 2, 2, - 233, 2, 2, 2, 2, 2, 2, 2, 2, 2, 234, 67, 67, 67, 67, 67, - 175, 67, 67, 67, 67, 67, 67, 67, 235, 2, 67, 67, 2, 2, 2, 236, - 2, 2, 2, 2, 2, 2, 2, 237, -}; - -static RE_UINT8 re_print_stage_4[] = { - 0, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, - 4, 1, 5, 1, 1, 1, 1, 6, 1, 7, 6, 1, 8, 6, 1, 1, - 9, 1, 10, 11, 1, 12, 1, 1, 13, 1, 1, 1, 14, 1, 1, 1, - 1, 1, 1, 15, 1, 1, 1, 10, 1, 1, 16, 2, 1, 17, 0, 0, - 0, 0, 1, 18, 0, 19, 1, 1, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 21, 22, 29, 30, 31, 32, 33, 34, 5, 22, 35, 36, 37, 26, 38, - 39, 21, 22, 35, 40, 41, 26, 9, 42, 43, 44, 45, 46, 47, 32, 10, - 48, 49, 22, 50, 51, 52, 26, 53, 48, 49, 22, 54, 51, 55, 26, 56, - 57, 49, 1, 14, 58, 19, 26, 1, 59, 60, 1, 61, 62, 63, 32, 64, - 6, 1, 1, 65, 1, 27, 0, 0, 66, 67, 68, 69, 70, 71, 0, 0, - 72, 1, 73, 6, 1, 72, 1, 12, 12, 10, 0, 0, 74, 1, 1, 1, - 75, 76, 1, 1, 75, 1, 1, 77, 78, 79, 1, 1, 1, 78, 1, 1, - 1, 14, 1, 73, 1, 80, 1, 1, 1, 1, 1, 81, 1, 73, 1, 1, - 1, 1, 1, 82, 12, 11, 1, 83, 1, 84, 12, 85, 1, 16, 80, 80, - 2, 80, 1, 1, 1, 1, 1, 9, 1, 1, 10, 1, 1, 1, 1, 33, - 1, 2, 27, 27, 86, 1, 16, 11, 1, 1, 27, 1, 80, 87, 1, 1, - 1, 88, 1, 1, 1, 2, 1, 89, 80, 80, 16, 2, 0, 0, 0, 0, - 27, 1, 1, 73, 1, 1, 1, 90, 1, 1, 1, 91, 50, 1, 1, 1, - 82, 0, 0, 0, 9, 1, 1, 92, 1, 1, 1, 93, 1, 81, 1, 1, - 81, 94, 1, 16, 1, 1, 1, 95, 95, 96, 1, 97, 1, 1, 3, 1, - 1, 1, 95, 98, 2, 73, 1, 2, 0, 1, 1, 37, 27, 1, 1, 1, - 1, 1, 83, 0, 10, 0, 1, 1, 1, 1, 1, 26, 1, 99, 1, 50, - 22, 15, 100, 0, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 101, - 1, 1, 74, 1, 1, 1, 102, 103, 1, 83, 104, 104, 104, 104, 1, 1, - 11, 0, 0, 0, 1, 105, 1, 1, 1, 1, 1, 84, 1, 33, 0, 27, - 6, 1, 1, 1, 1, 7, 1, 1, 106, 1, 16, 6, 2, 1, 1, 10, - 1, 1, 84, 1, 1, 33, 0, 0, 73, 1, 1, 1, 83, 1, 1, 1, - 1, 1, 27, 0, 1, 1, 2, 9, 0, 0, 0, 107, 1, 1, 27, 80, - 108, 80, 1, 16, 1, 109, 1, 73, 13, 45, 1, 2, 1, 1, 1, 83, - 16, 71, 1, 1, 110, 111, 1, 83, 112, 113, 104, 1, 1, 1, 33, 1, - 1, 1, 16, 80, 114, 1, 1, 27, 1, 1, 16, 1, 1, 80, 0, 0, - 83, 115, 1, 116, 117, 1, 1, 1, 15, 118, 1, 1, 0, 1, 1, 1, - 1, 119, 1, 1, 9, 0, 0, 16, 1, 120, 121, 95, 1, 1, 1, 89, - 122, 123, 104, 124, 125, 1, 79, 126, 16, 16, 0, 0, 127, 1, 1, 128, - 2, 27, 37, 0, 0, 1, 1, 16, 1, 37, 1, 27, 10, 1, 1, 10, - 1, 13, 1, 1, 129, 33, 0, 0, 1, 16, 80, 1, 1, 129, 1, 27, - 1, 1, 9, 1, 1, 1, 109, 0, 1, 33, 9, 0, 130, 1, 1, 131, - 1, 132, 1, 1, 1, 2, 107, 0, 0, 0, 1, 133, 1, 134, 1, 135, - 1, 1, 1, 136, 137, 138, 1, 139, 9, 82, 1, 1, 1, 1, 0, 0, - 1, 1, 114, 83, 1, 1, 1, 140, 1, 99, 1, 141, 1, 142, 143, 0, - 1, 1, 1, 110, 1, 1, 1, 144, 0, 0, 1, 2, 16, 119, 1, 145, - 15, 1, 82, 80, 84, 1, 1, 83, 16, 1, 6, 11, 1, 5, 1, 2, - 146, 13, 80, 1, 1, 1, 10, 80, 20, 21, 22, 35, 40, 147, 148, 11, - 1, 149, 0, 0, 9, 80, 0, 0, 1, 1, 1, 99, 1, 16, 0, 0, - 11, 80, 73, 0, 80, 0, 0, 0, 1, 50, 27, 1, 1, 1, 1, 150, - 22, 1, 1, 79, 33, 1, 73, 1, 1, 119, 72, 83, 1, 1, 2, 11, - 84, 0, 0, 0, 1, 1, 2, 0, 83, 0, 0, 0, 1, 2, 45, 0, - 0, 1, 16, 33, 33, 105, 5, 151, 1, 0, 0, 0, 11, 1, 1, 2, - 145, 1, 0, 0, 0, 0, 37, 0, 1, 1, 73, 0, 15, 0, 0, 0, - 1, 1, 10, 73, 82, 71, 84, 0, 1, 1, 7, 1, 1, 1, 82, 0, - 33, 0, 0, 0, 1, 83, 1, 15, 1, 95, 1, 1, 1, 12, 152, 153, - 154, 1, 1, 1, 155, 156, 1, 157, 158, 49, 1, 1, 1, 1, 99, 1, - 88, 1, 1, 1, 27, 111, 6, 0, 79, 159, 160, 0, 161, 83, 0, 0, - 10, 45, 0, 0, 154, 1, 162, 163, 164, 165, 166, 167, 105, 27, 168, 27, - 0, 0, 0, 15, 1, 84, 2, 6, 6, 6, 1, 33, 73, 1, 2, 1, - 0, 0, 32, 1, 110, 1, 1, 27, 82, 15, 0, 0, 1, 110, 73, 83, - 1, 11, 0, 0, 9, 80, 1, 1, 9, 1, 16, 0, 0, 2, 9, 169, - 27, 2, 0, 0, 1, 15, 0, 0, 37, 0, 0, 0, 1, 83, 0, 0, - 1, 1, 1, 11, 1, 16, 1, 1, 1, 1, 15, 0, 170, 0, 1, 1, - 1, 1, 1, 0, 1, 1, 1, 16, -}; - -static RE_UINT8 re_print_stage_5[] = { - 0, 0, 255, 255, 255, 127, 255, 252, 240, 215, 251, 255, 254, 255, 127, 254, - 255, 230, 255, 0, 255, 7, 31, 0, 255, 223, 255, 191, 255, 231, 3, 0, - 255, 63, 255, 79, 223, 63, 240, 255, 239, 159, 249, 255, 255, 253, 197, 243, - 159, 121, 128, 176, 207, 255, 255, 15, 238, 135, 109, 211, 135, 57, 2, 94, - 192, 255, 63, 0, 238, 191, 237, 243, 191, 59, 1, 0, 3, 2, 238, 159, - 159, 57, 192, 176, 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, - 239, 223, 253, 255, 255, 227, 223, 61, 96, 7, 0, 255, 239, 243, 96, 64, - 6, 0, 238, 223, 223, 253, 236, 255, 127, 252, 251, 47, 127, 132, 95, 255, - 28, 0, 255, 135, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, 255, 243, - 255, 254, 255, 31, 191, 32, 255, 61, 127, 61, 61, 127, 61, 255, 127, 255, - 255, 3, 63, 63, 255, 1, 127, 0, 15, 0, 13, 0, 241, 255, 255, 199, - 255, 207, 255, 159, 15, 240, 255, 248, 127, 3, 63, 248, 255, 170, 223, 255, - 207, 239, 220, 127, 243, 255, 63, 255, 0, 240, 15, 254, 255, 128, 1, 128, - 127, 127, 255, 251, 224, 255, 128, 255, 63, 192, 15, 128, 7, 0, 0, 248, - 126, 126, 126, 0, 127, 248, 248, 224, 127, 95, 219, 255, 248, 255, 252, 255, - 247, 255, 127, 15, 252, 252, 252, 28, 0, 62, 255, 239, 255, 183, 135, 255, - 143, 255, 15, 255, 63, 253, 191, 145, 191, 255, 55, 248, 255, 143, 255, 131, - 255, 240, 111, 240, 239, 254, 15, 135, 63, 254, 7, 255, 3, 30, 0, 254, - 7, 252, 0, 128, 127, 189, 129, 224, 207, 31, 255, 43, 7, 128, 255, 224, - 100, 222, 255, 235, 239, 255, 191, 231, 223, 223, 255, 123, 95, 252, 255, 249, - 219, 7, 159, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, - 238, 251, 249, 127, 2, 0, -}; - -/* Print: 2414 bytes. */ - -RE_UINT32 re_get_print(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_print_stage_1[f] << 4; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_print_stage_2[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_print_stage_3[pos + f] << 2; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_print_stage_4[pos + f] << 4; - pos += code; - value = (re_print_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Word. */ - -static RE_UINT8 re_word_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, - 6, 6, -}; - -static RE_UINT8 re_word_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 18, 19, 13, 20, 13, 21, 13, 13, 13, 13, 22, 7, 7, - 23, 24, 13, 13, 13, 13, 25, 26, 13, 13, 27, 28, 29, 30, 31, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 32, 7, 33, 34, 7, 35, 13, 13, 13, 13, 13, 36, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 37, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_word_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, - 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, - 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 1, 59, - 60, 61, 62, 63, 64, 31, 31, 31, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 31, 74, 31, 75, 31, 31, 31, 1, 1, 1, 76, 77, 78, 31, 31, - 1, 1, 1, 1, 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 80, 31, - 1, 1, 81, 82, 31, 31, 31, 83, 1, 1, 1, 1, 1, 1, 1, 84, - 1, 1, 85, 31, 31, 31, 31, 31, 86, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 87, 31, 31, 31, 31, 88, 89, 31, 90, 91, 92, 93, - 31, 31, 94, 31, 31, 31, 31, 31, 95, 31, 31, 31, 31, 31, 31, 31, - 96, 97, 31, 31, 31, 31, 98, 31, 31, 99, 31, 31, 31, 31, 31, 31, - 1, 1, 1, 1, 1, 1, 100, 1, 1, 1, 1, 1, 1, 1, 1, 101, - 102, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 103, 31, - 1, 1, 104, 31, 31, 31, 31, 31, 31, 105, 31, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_word_stage_4[] = { - 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, - 6, 6, 6, 6, 11, 6, 6, 6, 6, 13, 14, 15, 16, 17, 18, 19, - 20, 6, 6, 21, 6, 6, 22, 23, 24, 6, 25, 6, 6, 26, 6, 27, - 6, 28, 29, 0, 0, 30, 31, 11, 6, 6, 6, 32, 33, 34, 35, 36, - 37, 38, 39, 40, 41, 42, 43, 44, 45, 42, 46, 47, 48, 49, 50, 51, - 52, 53, 54, 55, 52, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, - 15, 67, 68, 0, 69, 70, 71, 0, 72, 73, 74, 75, 76, 77, 78, 0, - 6, 6, 79, 6, 80, 6, 81, 82, 6, 6, 83, 6, 84, 85, 86, 6, - 87, 6, 60, 0, 88, 6, 6, 89, 15, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 90, 3, 6, 6, 91, 92, 93, 94, 95, 6, 6, 96, 97, - 98, 6, 6, 99, 6, 100, 6, 101, 102, 103, 104, 105, 6, 106, 107, 0, - 29, 6, 102, 108, 107, 109, 0, 0, 6, 6, 110, 111, 6, 6, 6, 94, - 6, 99, 112, 80, 113, 0, 114, 115, 6, 6, 6, 6, 6, 6, 6, 116, - 89, 6, 117, 80, 6, 118, 119, 120, 121, 122, 123, 124, 125, 0, 24, 126, - 127, 128, 129, 6, 113, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 130, 6, 97, 6, 131, 102, 6, 6, 6, 6, 132, - 6, 81, 6, 133, 134, 135, 135, 6, 0, 136, 0, 0, 0, 0, 0, 0, - 137, 138, 15, 6, 139, 15, 6, 82, 140, 141, 6, 6, 142, 67, 0, 24, - 6, 6, 6, 6, 6, 101, 0, 0, 6, 6, 6, 6, 6, 6, 101, 0, - 6, 6, 6, 6, 143, 0, 24, 80, 144, 145, 6, 146, 6, 6, 6, 26, - 147, 148, 6, 6, 149, 150, 0, 147, 6, 151, 6, 94, 6, 6, 152, 153, - 6, 154, 94, 77, 6, 6, 155, 102, 6, 134, 156, 157, 6, 6, 158, 159, - 160, 161, 82, 162, 6, 6, 6, 163, 6, 6, 6, 6, 6, 164, 165, 29, - 6, 6, 6, 154, 6, 6, 166, 0, 167, 168, 169, 6, 6, 26, 170, 6, - 6, 80, 24, 6, 171, 6, 151, 172, 88, 173, 174, 175, 6, 6, 6, 77, - 1, 2, 3, 104, 6, 102, 176, 0, 177, 178, 179, 0, 6, 6, 6, 67, - 0, 0, 6, 93, 0, 0, 0, 180, 0, 0, 0, 0, 77, 6, 126, 181, - 6, 24, 100, 67, 80, 6, 182, 0, 6, 6, 6, 6, 80, 79, 183, 29, - 6, 184, 6, 185, 0, 0, 0, 0, 6, 134, 101, 151, 0, 0, 0, 0, - 186, 187, 101, 134, 102, 0, 0, 188, 101, 166, 0, 0, 6, 189, 0, 0, - 190, 191, 0, 77, 77, 0, 74, 192, 6, 101, 101, 193, 26, 0, 0, 0, - 6, 6, 113, 0, 6, 193, 6, 193, 6, 6, 192, 194, 6, 67, 24, 195, - 6, 196, 24, 197, 6, 6, 198, 0, 199, 200, 0, 0, 201, 202, 6, 203, - 33, 42, 204, 205, 0, 0, 0, 0, 6, 6, 203, 0, 6, 6, 206, 0, - 0, 0, 0, 0, 6, 207, 208, 0, 6, 6, 209, 0, 6, 99, 97, 0, - 210, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 211, - 0, 0, 0, 0, 0, 0, 6, 212, 213, 5, 214, 215, 171, 216, 0, 0, - 6, 6, 6, 6, 166, 0, 0, 0, 6, 6, 6, 142, 6, 6, 6, 6, - 6, 6, 185, 0, 0, 0, 0, 0, 6, 142, 0, 0, 0, 0, 0, 0, - 6, 6, 192, 0, 0, 0, 0, 0, 6, 212, 102, 97, 0, 0, 24, 105, - 6, 134, 217, 218, 88, 0, 0, 0, 6, 6, 219, 102, 220, 0, 0, 181, - 6, 6, 6, 6, 6, 6, 6, 143, 6, 6, 6, 6, 6, 6, 6, 193, - 221, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 222, 223, 0, 0, 0, - 0, 0, 0, 224, 225, 226, 0, 0, 0, 0, 227, 0, 0, 0, 0, 0, - 6, 6, 196, 6, 228, 229, 230, 6, 231, 232, 233, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 234, 235, 82, 196, 196, 131, 131, 213, 213, 236, 6, - 6, 237, 6, 238, 239, 240, 0, 0, 241, 242, 0, 0, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 243, 0, 6, 6, 203, 0, 0, 0, 0, 0, - 230, 244, 245, 246, 247, 248, 0, 0, 0, 24, 79, 79, 97, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 134, 0, 6, 93, 6, 6, 6, 6, 6, 6, - 80, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 221, 0, 0, - 80, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 88, -}; - -static RE_UINT8 re_word_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, - 0, 4, 32, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, - 31, 80, 0, 0, 255, 255, 223, 188, 64, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 255, 255, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, - 255, 0, 254, 255, 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, - 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, - 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, - 255, 63, 0, 0, 255, 255, 255, 15, 255, 255, 223, 63, 0, 0, 240, 255, - 207, 255, 254, 255, 239, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, - 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, - 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, - 207, 255, 0, 2, 238, 159, 249, 255, 159, 57, 192, 176, 207, 255, 2, 0, - 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 0, 0, - 239, 223, 253, 255, 255, 253, 255, 227, 223, 61, 96, 7, 207, 255, 0, 0, - 255, 253, 239, 243, 223, 61, 96, 64, 207, 255, 6, 0, 238, 223, 253, 255, - 255, 255, 255, 231, 223, 125, 240, 128, 207, 255, 0, 252, 236, 255, 127, 252, - 255, 255, 251, 47, 127, 132, 95, 255, 192, 255, 12, 0, 255, 255, 255, 7, - 255, 127, 255, 3, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, 255, 243, - 1, 0, 0, 3, 255, 3, 160, 194, 255, 254, 255, 255, 255, 31, 254, 255, - 223, 255, 255, 254, 255, 255, 255, 31, 64, 0, 0, 0, 255, 3, 255, 255, - 255, 255, 255, 63, 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, - 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, - 255, 255, 0, 0, 255, 255, 63, 63, 255, 159, 255, 255, 255, 199, 255, 1, - 255, 223, 31, 0, 255, 255, 31, 0, 255, 255, 15, 0, 255, 223, 13, 0, - 255, 255, 143, 48, 255, 3, 0, 0, 0, 56, 255, 3, 255, 255, 255, 0, - 255, 7, 255, 255, 255, 255, 63, 0, 255, 255, 255, 127, 255, 15, 255, 15, - 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, 255, 3, 255, 3, - 255, 255, 255, 159, 128, 0, 255, 127, 255, 15, 255, 3, 0, 248, 15, 0, - 255, 227, 255, 255, 255, 1, 0, 0, 0, 0, 247, 255, 255, 255, 127, 3, - 255, 255, 63, 248, 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, - 255, 31, 220, 31, 0, 48, 0, 0, 0, 0, 0, 128, 1, 0, 16, 0, - 0, 0, 2, 128, 0, 0, 255, 31, 255, 255, 1, 0, 132, 252, 47, 62, - 80, 189, 255, 243, 224, 67, 0, 0, 0, 0, 192, 255, 255, 127, 255, 255, - 31, 248, 15, 0, 255, 128, 0, 128, 255, 255, 127, 0, 127, 127, 127, 127, - 0, 128, 0, 0, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 230, - 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, - 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 247, 191, 0, 0, 128, 255, - 252, 255, 255, 255, 255, 249, 255, 255, 255, 127, 255, 0, 255, 0, 0, 0, - 63, 0, 255, 3, 255, 255, 255, 40, 255, 63, 255, 255, 1, 128, 255, 3, - 255, 63, 255, 3, 255, 255, 127, 252, 7, 0, 0, 56, 255, 255, 124, 0, - 126, 126, 126, 0, 127, 127, 255, 255, 63, 0, 255, 255, 255, 55, 255, 3, - 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, - 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, - 0, 0, 255, 15, 255, 255, 24, 0, 0, 224, 0, 0, 0, 0, 223, 255, - 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, - 0, 0, 0, 32, 1, 0, 0, 0, 15, 255, 62, 0, 255, 255, 15, 255, - 255, 0, 255, 255, 15, 0, 0, 0, 63, 253, 255, 255, 255, 255, 191, 145, - 255, 255, 55, 0, 255, 255, 255, 192, 111, 240, 239, 254, 255, 255, 15, 135, - 127, 0, 0, 0, 255, 255, 7, 0, 192, 255, 0, 128, 255, 1, 255, 3, - 255, 255, 223, 255, 255, 255, 79, 0, 31, 28, 255, 23, 255, 255, 251, 255, - 255, 255, 255, 64, 127, 189, 255, 191, 255, 1, 255, 255, 255, 7, 255, 3, - 159, 57, 129, 224, 207, 31, 31, 0, 191, 0, 255, 3, 255, 255, 63, 255, - 1, 0, 0, 63, 17, 0, 255, 3, 255, 255, 255, 227, 255, 3, 0, 128, - 255, 255, 255, 1, 255, 253, 255, 255, 1, 0, 255, 3, 0, 0, 252, 255, - 255, 254, 127, 0, 15, 0, 255, 3, 248, 255, 255, 224, 31, 0, 255, 255, - 0, 128, 255, 255, 3, 0, 0, 0, 255, 7, 255, 31, 255, 1, 255, 99, - 224, 227, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, - 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, - 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, - 247, 207, 255, 255, 255, 255, 127, 248, 255, 31, 32, 0, 16, 0, 0, 248, - 254, 255, 0, 0, 127, 255, 255, 249, 219, 7, 0, 0, 31, 0, 127, 0, - 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, - 238, 251, 255, 15, -}; - -/* Word: 2310 bytes. */ - -RE_UINT32 re_get_word(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_word_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_word_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_word_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_word_stage_4[pos + f] << 5; - pos += code; - value = (re_word_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* XDigit. */ - -static RE_UINT8 re_xdigit_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_xdigit_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 5, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, - 8, 4, 9, 10, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12, 4, 4, 13, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_xdigit_stage_3[] = { - 0, 1, 1, 1, 1, 1, 2, 3, 1, 4, 4, 4, 4, 4, 5, 6, - 7, 1, 1, 1, 1, 1, 1, 8, 9, 10, 11, 12, 13, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, - 14, 15, 16, 17, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 18, - 1, 1, 1, 1, 19, 1, 1, 1, 20, 21, 17, 1, 5, 1, 22, 23, - 8, 1, 1, 1, 16, 1, 1, 1, 1, 1, 24, 16, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 25, 1, 16, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_xdigit_stage_4[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 2, 0, 2, 2, 2, 4, - 2, 5, 2, 5, 2, 6, 2, 6, 3, 2, 2, 2, 2, 4, 6, 2, - 2, 2, 2, 3, 6, 2, 2, 2, 2, 7, 2, 6, 2, 2, 8, 2, - 2, 6, 0, 2, 2, 8, 2, 2, 2, 2, 2, 6, 4, 2, 2, 9, - 2, 6, 2, 2, 2, 2, 2, 0, 10, 11, 2, 2, 2, 2, 3, 2, - 2, 5, 2, 0, 12, 2, 2, 6, 2, 6, 2, 4, 0, 2, 2, 2, - 2, 3, 2, 2, 2, 2, 2, 13, -}; - -static RE_UINT8 re_xdigit_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 3, 0, 0, - 255, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 255, 0, 0, - 0, 0, 255, 3, 0, 0, 0, 0, 192, 255, 0, 0, 0, 0, 0, 0, - 255, 3, 255, 3, 0, 0, 0, 0, 0, 0, 255, 3, 0, 0, 255, 3, - 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 192, 255, 0, 192, 255, 255, 255, 255, 255, 255, -}; - -/* XDigit: 441 bytes. */ - -RE_UINT32 re_get_xdigit(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_xdigit_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_xdigit_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_xdigit_stage_3[pos + f] << 2; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_xdigit_stage_4[pos + f] << 6; - pos += code; - value = (re_xdigit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Posix_Digit. */ - -static RE_UINT8 re_posix_digit_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_posix_digit_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_posix_digit_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_posix_digit_stage_4[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_posix_digit_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -/* Posix_Digit: 97 bytes. */ - -RE_UINT32 re_get_posix_digit(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_posix_digit_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_posix_digit_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_posix_digit_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_posix_digit_stage_4[pos + f] << 6; - pos += code; - value = (re_posix_digit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Posix_AlNum. */ - -static RE_UINT8 re_posix_alnum_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_posix_alnum_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 18, 19, 13, 20, 13, 21, 13, 13, 13, 13, 22, 7, 7, - 23, 24, 13, 13, 13, 13, 25, 26, 13, 13, 27, 13, 28, 29, 30, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 31, 7, 32, 33, 7, 34, 13, 13, 13, 13, 13, 35, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_posix_alnum_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, - 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, - 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 1, 59, - 60, 61, 62, 63, 64, 31, 31, 31, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 31, 74, 31, 75, 31, 31, 31, 1, 1, 1, 76, 77, 78, 31, 31, - 1, 1, 1, 1, 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 80, 31, - 1, 1, 81, 82, 31, 31, 31, 83, 1, 1, 1, 1, 1, 1, 1, 84, - 1, 1, 85, 31, 31, 31, 31, 31, 86, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 87, 31, 31, 31, 31, 31, 31, 31, 88, 89, 90, 91, - 92, 31, 31, 31, 31, 31, 31, 31, 93, 94, 31, 31, 31, 31, 95, 31, - 31, 96, 31, 31, 31, 31, 31, 31, 1, 1, 1, 1, 1, 1, 97, 1, - 1, 1, 1, 1, 1, 1, 1, 98, 99, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 100, 31, 1, 1, 101, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_posix_alnum_stage_4[] = { - 0, 1, 2, 2, 0, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 6, 7, 0, 0, 8, 9, 10, 11, 5, 12, - 5, 5, 5, 5, 13, 5, 5, 5, 5, 14, 15, 16, 17, 18, 19, 20, - 21, 5, 22, 23, 5, 5, 24, 25, 26, 5, 27, 5, 5, 28, 29, 30, - 31, 32, 33, 0, 0, 34, 35, 36, 5, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 49, 53, 54, 55, 56, 57, 0, - 58, 59, 60, 61, 58, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, - 16, 73, 74, 0, 75, 76, 77, 0, 78, 0, 79, 80, 81, 82, 0, 0, - 5, 83, 26, 84, 85, 5, 86, 87, 5, 5, 88, 5, 89, 90, 91, 5, - 92, 5, 93, 0, 94, 5, 5, 95, 16, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 96, 2, 5, 5, 97, 98, 99, 99, 100, 5, 101, 102, 0, - 0, 5, 5, 103, 5, 104, 5, 105, 106, 107, 26, 108, 5, 109, 110, 0, - 111, 5, 106, 112, 0, 113, 0, 0, 5, 114, 115, 0, 5, 116, 5, 117, - 5, 105, 118, 119, 120, 0, 0, 121, 5, 5, 5, 5, 5, 5, 0, 122, - 95, 5, 123, 119, 5, 124, 125, 126, 0, 0, 0, 127, 128, 0, 0, 0, - 129, 130, 131, 5, 120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 132, 5, 110, 5, 133, 106, 5, 5, 5, 5, 134, - 5, 86, 5, 135, 136, 137, 137, 5, 0, 138, 0, 0, 0, 0, 0, 0, - 139, 140, 16, 5, 141, 16, 5, 87, 142, 143, 5, 5, 144, 73, 0, 26, - 5, 5, 5, 5, 5, 105, 0, 0, 5, 5, 5, 5, 5, 5, 105, 0, - 5, 5, 5, 5, 32, 0, 26, 119, 145, 146, 5, 147, 5, 5, 5, 94, - 148, 149, 5, 5, 150, 151, 0, 148, 152, 17, 5, 99, 5, 5, 153, 154, - 29, 104, 155, 82, 5, 156, 138, 157, 5, 136, 158, 159, 5, 106, 160, 161, - 162, 163, 87, 164, 5, 5, 5, 165, 5, 5, 5, 5, 5, 166, 167, 111, - 5, 5, 5, 168, 5, 5, 169, 0, 170, 171, 172, 5, 5, 28, 173, 5, - 5, 119, 26, 5, 174, 5, 17, 175, 0, 0, 0, 176, 5, 5, 5, 82, - 0, 2, 2, 177, 5, 106, 178, 0, 179, 180, 181, 0, 5, 5, 5, 73, - 0, 0, 5, 182, 0, 0, 0, 0, 0, 0, 0, 0, 82, 5, 183, 0, - 5, 26, 104, 73, 119, 5, 184, 0, 5, 5, 5, 5, 119, 26, 185, 111, - 5, 186, 5, 61, 0, 0, 0, 0, 5, 136, 105, 17, 0, 0, 0, 0, - 187, 188, 105, 136, 106, 0, 0, 189, 105, 169, 0, 0, 5, 190, 0, 0, - 191, 99, 0, 82, 82, 0, 79, 192, 5, 105, 105, 155, 28, 0, 0, 0, - 5, 5, 120, 0, 5, 155, 5, 155, 5, 5, 193, 0, 149, 33, 26, 120, - 5, 155, 26, 194, 5, 5, 195, 0, 196, 197, 0, 0, 198, 199, 5, 120, - 40, 49, 200, 61, 0, 0, 0, 0, 5, 5, 201, 0, 5, 5, 202, 0, - 0, 0, 0, 0, 5, 203, 204, 0, 5, 106, 205, 0, 5, 105, 0, 0, - 206, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 207, - 0, 0, 0, 0, 0, 0, 5, 33, 208, 209, 78, 210, 174, 211, 0, 0, - 5, 5, 5, 5, 169, 0, 0, 0, 5, 5, 5, 144, 5, 5, 5, 5, - 5, 5, 61, 0, 0, 0, 0, 0, 5, 144, 0, 0, 0, 0, 0, 0, - 5, 5, 212, 0, 0, 0, 0, 0, 5, 33, 106, 0, 0, 0, 26, 158, - 5, 136, 61, 213, 94, 0, 0, 0, 5, 5, 214, 106, 173, 0, 0, 78, - 5, 5, 5, 5, 5, 5, 5, 32, 5, 5, 5, 5, 5, 5, 5, 155, - 215, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 216, 217, 0, 0, 0, - 5, 5, 218, 5, 219, 220, 221, 5, 222, 223, 224, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 225, 226, 87, 218, 218, 133, 133, 208, 208, 227, 0, - 228, 229, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 192, 0, - 5, 5, 230, 0, 0, 0, 0, 0, 221, 231, 232, 233, 234, 235, 0, 0, - 0, 26, 236, 236, 110, 0, 0, 0, 5, 5, 5, 5, 5, 5, 136, 0, - 5, 182, 5, 5, 5, 5, 5, 5, 119, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 215, 0, 0, 119, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_posix_alnum_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 7, 0, 4, 32, 4, - 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, - 32, 0, 0, 0, 0, 0, 223, 188, 64, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 3, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 2, - 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 255, 191, 182, 0, 255, 255, - 255, 7, 7, 0, 0, 0, 255, 7, 255, 255, 255, 254, 0, 192, 255, 255, - 255, 255, 239, 31, 254, 225, 0, 156, 0, 0, 255, 255, 0, 224, 255, 255, - 255, 255, 3, 0, 0, 252, 255, 255, 255, 7, 48, 4, 255, 255, 255, 252, - 255, 31, 0, 0, 255, 255, 255, 1, 255, 255, 223, 63, 0, 0, 240, 255, - 248, 3, 255, 255, 255, 255, 255, 239, 255, 223, 225, 255, 15, 0, 254, 255, - 239, 159, 249, 255, 255, 253, 197, 227, 159, 89, 128, 176, 15, 0, 3, 0, - 238, 135, 249, 255, 255, 253, 109, 195, 135, 25, 2, 94, 0, 0, 63, 0, - 238, 191, 251, 255, 255, 253, 237, 227, 191, 27, 1, 0, 15, 0, 0, 2, - 238, 159, 249, 255, 159, 25, 192, 176, 15, 0, 2, 0, 236, 199, 61, 214, - 24, 199, 255, 195, 199, 29, 129, 0, 239, 223, 253, 255, 255, 253, 255, 227, - 223, 29, 96, 7, 15, 0, 0, 0, 255, 253, 239, 227, 223, 29, 96, 64, - 15, 0, 6, 0, 238, 223, 253, 255, 255, 255, 255, 231, 223, 93, 240, 128, - 15, 0, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 128, 95, 255, - 0, 0, 12, 0, 255, 255, 255, 7, 127, 32, 0, 0, 150, 37, 240, 254, - 174, 236, 255, 59, 95, 32, 0, 240, 1, 0, 0, 0, 255, 254, 255, 255, - 255, 31, 254, 255, 3, 255, 255, 254, 255, 255, 255, 31, 255, 255, 127, 249, - 231, 193, 255, 255, 127, 64, 0, 48, 191, 32, 255, 255, 255, 255, 255, 247, - 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, - 255, 255, 61, 255, 255, 255, 255, 135, 255, 255, 0, 0, 255, 255, 63, 63, - 255, 159, 255, 255, 255, 199, 255, 1, 255, 223, 15, 0, 255, 255, 15, 0, - 255, 223, 13, 0, 255, 255, 207, 255, 255, 1, 128, 16, 255, 255, 255, 0, - 255, 7, 255, 255, 255, 255, 63, 0, 255, 255, 255, 127, 255, 15, 255, 1, - 255, 63, 31, 0, 255, 15, 255, 255, 255, 3, 0, 0, 255, 255, 255, 15, - 254, 255, 31, 0, 128, 0, 0, 0, 255, 255, 239, 255, 239, 15, 0, 0, - 255, 243, 0, 252, 191, 255, 3, 0, 0, 224, 0, 252, 255, 255, 255, 63, - 255, 1, 0, 0, 0, 222, 111, 0, 128, 255, 31, 0, 63, 63, 255, 170, - 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, - 0, 0, 255, 31, 132, 252, 47, 62, 80, 189, 255, 243, 224, 67, 0, 0, - 0, 0, 192, 255, 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, - 255, 255, 127, 0, 127, 127, 127, 127, 0, 128, 0, 0, 224, 0, 0, 0, - 254, 3, 62, 31, 255, 255, 127, 224, 224, 255, 255, 255, 255, 63, 254, 255, - 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, 255, 127, 240, 143, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 249, 255, 255, 255, 127, 255, 0, - 187, 247, 255, 255, 47, 0, 0, 0, 0, 0, 252, 40, 255, 255, 7, 0, - 255, 255, 247, 255, 223, 255, 0, 124, 255, 63, 0, 0, 255, 255, 127, 196, - 5, 0, 0, 56, 255, 255, 60, 0, 126, 126, 126, 0, 127, 127, 255, 255, - 63, 0, 255, 255, 255, 7, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, - 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, 255, 253, 127, 95, - 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 15, - 0, 0, 223, 255, 192, 255, 255, 255, 252, 252, 252, 28, 255, 239, 255, 255, - 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 31, 0, 255, 255, 1, 0, - 15, 255, 62, 0, 255, 255, 15, 255, 255, 0, 255, 255, 63, 253, 255, 255, - 255, 255, 191, 145, 255, 255, 55, 0, 255, 255, 255, 192, 111, 240, 239, 254, - 31, 0, 0, 0, 63, 0, 0, 0, 255, 255, 71, 0, 30, 0, 0, 20, - 255, 255, 251, 255, 255, 255, 159, 64, 127, 189, 255, 191, 255, 1, 255, 255, - 159, 25, 129, 224, 187, 7, 0, 0, 179, 0, 0, 0, 255, 255, 63, 127, - 0, 0, 0, 63, 17, 0, 0, 0, 255, 255, 255, 227, 0, 0, 0, 128, - 255, 253, 255, 255, 255, 255, 127, 127, 0, 0, 252, 255, 255, 254, 127, 0, - 127, 0, 0, 0, 248, 255, 255, 224, 31, 0, 255, 255, 3, 0, 0, 0, - 255, 7, 255, 31, 255, 1, 255, 67, 255, 255, 223, 255, 255, 255, 255, 223, - 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, - 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 247, 15, 0, 0, - 127, 255, 255, 249, 219, 7, 0, 0, 143, 0, 0, 0, 150, 254, 247, 10, - 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, - 255, 3, 255, 255, -}; - -/* Posix_AlNum: 2197 bytes. */ - -RE_UINT32 re_get_posix_alnum(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_posix_alnum_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_posix_alnum_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_posix_alnum_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_posix_alnum_stage_4[pos + f] << 5; - pos += code; - value = (re_posix_alnum_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Posix_Punct. */ - -static RE_UINT8 re_posix_punct_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_posix_punct_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 9, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 11, - 12, 13, 14, 15, 16, 7, 7, 7, 7, 7, 7, 7, 7, 17, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 18, 7, 7, 19, 20, 7, 21, 22, 23, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - -static RE_UINT8 re_posix_punct_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 1, 17, 18, 1, 19, 20, 21, 22, 23, 24, 25, 1, 1, 26, - 27, 28, 29, 30, 31, 29, 29, 32, 29, 29, 29, 33, 34, 35, 36, 37, - 38, 39, 40, 29, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 41, 1, 1, 1, 1, 1, 1, 42, 1, 43, 44, - 45, 46, 47, 48, 1, 1, 1, 1, 1, 1, 1, 49, 1, 50, 51, 52, - 1, 53, 1, 54, 1, 55, 1, 1, 56, 57, 58, 59, 1, 1, 1, 1, - 60, 61, 62, 1, 63, 64, 65, 66, 1, 1, 1, 1, 67, 1, 1, 1, - 1, 1, 1, 1, 68, 1, 1, 1, 1, 1, 69, 70, 1, 1, 1, 1, - 1, 1, 1, 1, 71, 1, 1, 1, 72, 73, 74, 75, 1, 1, 76, 77, - 29, 29, 78, 1, 1, 1, 1, 1, 1, 79, 1, 1, 1, 1, 10, 1, - 80, 81, 82, 29, 29, 29, 83, 84, 85, 86, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_posix_punct_stage_4[] = { - 0, 1, 2, 3, 0, 4, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, 0, 8, 9, 0, 0, 10, - 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 12, 0, 13, 14, 15, 16, - 17, 0, 0, 18, 0, 0, 19, 20, 21, 0, 0, 0, 0, 0, 0, 22, - 0, 23, 14, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 25, - 0, 0, 0, 0, 0, 0, 0, 26, 0, 0, 0, 27, 0, 0, 0, 28, - 0, 0, 0, 29, 0, 0, 0, 0, 0, 0, 30, 31, 0, 0, 0, 32, - 0, 29, 33, 0, 0, 0, 0, 0, 34, 35, 0, 0, 36, 37, 38, 0, - 0, 0, 39, 0, 37, 0, 0, 40, 0, 0, 0, 41, 42, 0, 0, 0, - 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 45, 0, 0, 46, - 0, 47, 0, 0, 0, 0, 48, 0, 49, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 50, 0, 0, 0, 37, 51, 37, 0, 0, 0, 0, 52, 0, 0, - 0, 0, 12, 53, 0, 0, 0, 54, 0, 55, 0, 37, 0, 0, 56, 0, - 0, 0, 0, 0, 0, 57, 58, 59, 60, 61, 62, 63, 64, 62, 0, 0, - 65, 66, 67, 0, 68, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, - 51, 51, 51, 51, 51, 51, 51, 62, 51, 69, 49, 0, 54, 70, 0, 0, - 51, 51, 51, 70, 71, 51, 51, 51, 51, 51, 51, 72, 73, 74, 75, 76, - 0, 0, 0, 0, 0, 0, 0, 77, 0, 0, 0, 27, 0, 0, 0, 0, - 51, 78, 79, 0, 80, 51, 51, 81, 51, 51, 51, 51, 51, 51, 70, 82, - 83, 84, 0, 0, 45, 43, 0, 40, 0, 0, 0, 0, 85, 0, 51, 86, - 62, 87, 88, 51, 87, 89, 51, 62, 0, 0, 0, 0, 0, 0, 51, 51, - 0, 0, 0, 0, 60, 51, 69, 37, 90, 0, 0, 91, 0, 0, 0, 92, - 93, 94, 0, 0, 95, 0, 0, 0, 0, 96, 0, 97, 0, 0, 98, 99, - 0, 98, 29, 0, 0, 0, 100, 0, 0, 0, 54, 101, 0, 0, 37, 26, - 0, 0, 40, 0, 0, 0, 0, 102, 0, 103, 0, 0, 0, 104, 94, 0, - 0, 37, 0, 0, 0, 0, 0, 105, 42, 60, 106, 107, 0, 0, 0, 0, - 1, 2, 2, 108, 0, 0, 0, 109, 110, 111, 0, 112, 113, 43, 60, 114, - 0, 0, 0, 0, 29, 0, 27, 0, 0, 0, 0, 30, 0, 0, 0, 0, - 0, 0, 5, 115, 0, 0, 0, 0, 29, 29, 0, 0, 0, 0, 0, 0, - 0, 0, 116, 29, 0, 0, 117, 118, 0, 112, 0, 0, 119, 0, 0, 0, - 0, 0, 120, 0, 0, 121, 94, 0, 0, 0, 86, 122, 0, 0, 123, 0, - 0, 124, 0, 0, 0, 103, 0, 0, 0, 0, 125, 0, 0, 0, 126, 0, - 0, 0, 0, 0, 0, 0, 127, 0, 0, 0, 128, 129, 0, 0, 0, 0, - 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 130, 26, 0, 0, 0, 0, - 0, 0, 0, 131, 0, 0, 0, 0, 0, 0, 0, 98, 0, 0, 0, 132, - 0, 111, 133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 134, 0, 0, 0, - 51, 51, 51, 51, 51, 51, 51, 70, 51, 135, 51, 136, 137, 138, 51, 41, - 51, 51, 139, 0, 0, 0, 0, 0, 51, 51, 93, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 140, 40, 132, 132, 30, 30, 103, 103, 141, 0, - 0, 142, 0, 143, 144, 0, 0, 0, 0, 0, 37, 0, 0, 0, 0, 0, - 51, 145, 51, 51, 81, 146, 147, 70, 60, 148, 39, 149, 87, 129, 0, 150, - 151, 152, 153, 0, 0, 0, 0, 0, 51, 51, 51, 51, 51, 51, 154, 155, - 51, 51, 51, 81, 51, 51, 156, 0, 145, 51, 157, 51, 61, 21, 0, 0, - 23, 158, 159, 0, 160, 0, 43, 0, -}; - -static RE_UINT8 re_posix_punct_stage_5[] = { - 0, 0, 0, 0, 254, 255, 0, 252, 1, 0, 0, 248, 1, 0, 0, 120, - 254, 219, 211, 137, 0, 0, 128, 0, 60, 0, 252, 255, 224, 175, 255, 255, - 0, 0, 32, 64, 176, 0, 0, 0, 0, 0, 64, 0, 4, 0, 0, 0, - 0, 0, 0, 252, 0, 230, 0, 0, 0, 0, 0, 64, 73, 0, 0, 0, - 0, 0, 24, 0, 192, 255, 0, 200, 0, 60, 0, 0, 0, 0, 16, 64, - 0, 2, 0, 96, 255, 63, 0, 0, 0, 0, 192, 3, 0, 0, 255, 127, - 48, 0, 1, 0, 0, 0, 12, 12, 0, 0, 3, 0, 0, 0, 1, 0, - 0, 0, 248, 7, 0, 0, 0, 128, 0, 128, 0, 0, 0, 0, 0, 2, - 0, 0, 16, 0, 0, 128, 0, 12, 254, 255, 255, 252, 0, 0, 80, 61, - 32, 0, 0, 0, 0, 0, 0, 192, 191, 223, 255, 7, 0, 252, 0, 0, - 0, 0, 0, 8, 255, 1, 0, 0, 0, 0, 255, 3, 1, 0, 0, 0, - 0, 96, 0, 0, 0, 0, 0, 24, 0, 56, 0, 0, 0, 0, 96, 0, - 0, 0, 112, 15, 255, 7, 0, 0, 49, 0, 0, 0, 255, 255, 255, 255, - 127, 63, 0, 0, 255, 7, 240, 31, 0, 0, 0, 240, 0, 0, 0, 248, - 255, 0, 8, 0, 0, 0, 0, 160, 3, 224, 0, 224, 0, 224, 0, 96, - 0, 0, 255, 255, 255, 0, 255, 255, 255, 255, 255, 127, 0, 0, 0, 124, - 0, 124, 0, 0, 123, 3, 208, 193, 175, 66, 0, 12, 31, 188, 0, 0, - 0, 12, 255, 255, 127, 0, 0, 0, 255, 255, 63, 0, 0, 0, 240, 255, - 255, 255, 207, 255, 255, 255, 63, 255, 255, 255, 255, 227, 255, 253, 3, 0, - 0, 240, 0, 0, 224, 7, 0, 222, 255, 127, 255, 255, 31, 0, 0, 0, - 255, 255, 255, 251, 255, 255, 15, 0, 0, 0, 255, 15, 30, 255, 255, 255, - 1, 0, 193, 224, 0, 0, 195, 255, 15, 0, 0, 0, 0, 252, 255, 255, - 255, 0, 1, 0, 255, 255, 1, 0, 0, 224, 0, 0, 0, 0, 8, 64, - 0, 0, 252, 0, 255, 255, 127, 0, 3, 0, 0, 0, 0, 6, 0, 0, - 0, 15, 192, 3, 0, 0, 240, 0, 0, 192, 0, 0, 0, 0, 0, 23, - 254, 63, 0, 192, 0, 0, 128, 3, 0, 8, 0, 0, 0, 2, 0, 0, - 0, 0, 252, 255, 0, 0, 0, 48, 255, 255, 247, 255, 127, 15, 0, 0, - 63, 0, 0, 0, 127, 127, 0, 48, 7, 0, 0, 0, 0, 0, 128, 255, - 0, 0, 0, 254, 255, 115, 255, 15, 255, 255, 255, 31, 0, 0, 128, 1, - 0, 0, 255, 1, 0, 1, 0, 0, 0, 0, 127, 0, 0, 0, 0, 30, - 128, 63, 0, 0, 0, 0, 0, 216, 0, 0, 48, 0, 224, 35, 0, 232, - 0, 0, 0, 63, 0, 248, 0, 40, 64, 0, 0, 0, 254, 255, 255, 0, - 14, 0, 0, 0, 255, 31, 0, 0, 62, 0, 0, 0, 0, 0, 31, 0, - 0, 0, 32, 0, 48, 0, 0, 0, 0, 0, 0, 144, 127, 254, 255, 255, - 31, 28, 0, 0, 24, 240, 255, 255, 255, 195, 255, 255, 35, 0, 0, 0, - 2, 0, 0, 8, 8, 0, 0, 0, 0, 0, 128, 7, 0, 224, 223, 255, - 239, 15, 0, 0, 255, 15, 255, 255, 255, 127, 254, 255, 254, 255, 254, 255, - 255, 127, 0, 0, 0, 12, 0, 0, 192, 255, 255, 255, 7, 0, 255, 255, - 255, 255, 255, 15, 255, 1, 3, 0, 255, 255, 7, 0, 255, 31, 127, 0, - 255, 255, 31, 0, 255, 0, 255, 3, 255, 0, 249, 127, 255, 15, 255, 127, - 255, 255, 3, 0, -}; - -/* Posix_Punct: 1645 bytes. */ - -RE_UINT32 re_get_posix_punct(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_posix_punct_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_posix_punct_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_posix_punct_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_posix_punct_stage_4[pos + f] << 5; - pos += code; - value = (re_posix_punct_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Posix_XDigit. */ - -static RE_UINT8 re_posix_xdigit_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_posix_xdigit_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_posix_xdigit_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_posix_xdigit_stage_4[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_posix_xdigit_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -/* Posix_XDigit: 97 bytes. */ - -RE_UINT32 re_get_posix_xdigit(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_posix_xdigit_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_posix_xdigit_stage_2[pos + f] << 3; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_posix_xdigit_stage_3[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_posix_xdigit_stage_4[pos + f] << 7; - pos += code; - value = (re_posix_xdigit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* All_Cases. */ - -static RE_UINT8 re_all_cases_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 6, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_all_cases_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 9, 10, 11, 12, - 6, 13, 6, 6, 14, 6, 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 17, 18, 6, 6, 6, 19, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 20, 6, 6, 6, 21, - 6, 6, 6, 6, 22, 6, 6, 6, 6, 6, 6, 6, 23, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 24, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 25, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_all_cases_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, - 0, 0, 0, 0, 0, 0, 9, 0, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 18, 18, 18, 18, 18, 19, 20, 21, 22, 18, 18, 18, 18, 18, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 21, 34, 18, 18, 35, 18, - 18, 18, 18, 18, 36, 18, 37, 38, 39, 18, 40, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 0, 51, 52, - 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 18, 18, 18, 64, 65, - 66, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 18, 18, 18, - 77, 78, 18, 18, 18, 18, 18, 18, 79, 80, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 81, 82, 82, 82, 83, 0, 84, 85, 85, 85, - 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 87, 87, 87, 87, 88, 89, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 91, 92, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 93, 94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 96, 18, 18, 18, - 18, 18, 97, 98, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 99, 100, 91, 92, 99, 100, 99, 100, 91, 92, 101, 102, 99, 100, 103, 104, - 99, 100, 99, 100, 99, 100, 105, 106, 107, 108, 109, 110, 111, 112, 107, 113, - 0, 0, 0, 0, 114, 115, 116, 0, 0, 117, 0, 0, 118, 118, 119, 119, - 120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 121, 122, 122, 122, 123, 123, 123, 124, 0, 0, - 82, 82, 82, 82, 82, 83, 85, 85, 85, 85, 85, 86, 125, 126, 127, 128, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 37, 129, 130, 0, - 131, 131, 131, 131, 132, 133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 18, 134, 18, 18, 18, 97, 0, 0, - 18, 18, 18, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 78, 18, 78, 18, 18, 18, 18, 18, 18, 18, 0, 135, - 18, 136, 51, 18, 18, 137, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139, 0, 0, 0, 140, 140, - 140, 140, 140, 140, 140, 140, 140, 140, 0, 0, 0, 0, 0, 0, 0, 0, - 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 11, 11, 4, 5, 15, 15, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 142, 142, 142, 142, 142, 143, 143, 143, 143, 143, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 142, 142, 142, 142, 144, 143, 143, 143, 143, 145, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 146, 146, 146, 146, 146, 146, 147, 0, 148, 148, 148, 148, 148, 148, 149, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 11, 11, 11, 11, 15, 15, 15, 15, 0, 0, 0, 0, - 150, 150, 150, 150, 151, 152, 152, 152, 153, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_all_cases_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, - 1, 1, 1, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, - 5, 6, 5, 7, 5, 5, 5, 5, 5, 5, 5, 8, 5, 5, 5, 5, - 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, - 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 11, - 5, 5, 5, 5, 5, 12, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 13, - 14, 15, 14, 15, 14, 15, 14, 15, 16, 17, 14, 15, 14, 15, 14, 15, - 0, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, - 15, 0, 14, 15, 14, 15, 14, 15, 18, 14, 15, 14, 15, 14, 15, 19, - 20, 21, 14, 15, 14, 15, 22, 14, 15, 23, 23, 14, 15, 0, 24, 25, - 26, 14, 15, 23, 27, 28, 29, 30, 14, 15, 31, 0, 29, 32, 33, 34, - 14, 15, 14, 15, 14, 15, 35, 14, 15, 35, 0, 0, 14, 15, 35, 14, - 15, 36, 36, 14, 15, 14, 15, 37, 14, 15, 0, 0, 14, 15, 0, 38, - 0, 0, 0, 0, 39, 40, 41, 39, 40, 41, 39, 40, 41, 14, 15, 14, - 15, 14, 15, 14, 15, 42, 14, 15, 0, 39, 40, 41, 14, 15, 43, 44, - 45, 0, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 0, 0, 0, 0, - 0, 0, 46, 14, 15, 47, 48, 49, 49, 14, 15, 50, 51, 52, 14, 15, - 53, 54, 55, 56, 57, 0, 58, 58, 0, 59, 0, 60, 61, 0, 0, 0, - 58, 62, 0, 63, 0, 64, 65, 0, 66, 67, 65, 68, 69, 0, 0, 67, - 0, 70, 71, 0, 0, 72, 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, - 74, 0, 0, 74, 0, 0, 0, 75, 74, 76, 77, 77, 78, 0, 0, 0, - 0, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 81, 0, - 0, 0, 0, 0, 0, 82, 0, 0, 14, 15, 14, 15, 0, 0, 14, 15, - 0, 0, 0, 33, 33, 33, 0, 83, 0, 0, 0, 0, 0, 0, 84, 0, - 85, 85, 85, 0, 86, 0, 87, 87, 88, 1, 89, 1, 1, 90, 1, 1, - 91, 92, 93, 1, 94, 1, 1, 1, 95, 96, 0, 97, 1, 1, 98, 1, - 1, 99, 1, 1, 100, 101, 101, 101, 102, 5, 103, 5, 5, 104, 5, 5, - 105, 106, 107, 5, 108, 5, 5, 5, 109, 110, 111, 112, 5, 5, 113, 5, - 5, 114, 5, 5, 115, 116, 116, 117, 118, 119, 0, 0, 0, 120, 121, 122, - 123, 124, 125, 126, 127, 128, 0, 14, 15, 129, 14, 15, 0, 45, 45, 45, - 130, 130, 130, 130, 130, 130, 130, 130, 1, 1, 131, 1, 132, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 133, 1, 1, 134, 135, 1, 1, 1, 1, 1, - 1, 1, 136, 1, 1, 1, 1, 1, 5, 5, 137, 5, 138, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 139, 5, 5, 140, 141, 5, 5, 5, 5, 5, - 5, 5, 142, 5, 5, 5, 5, 5, 143, 143, 143, 143, 143, 143, 143, 143, - 14, 15, 144, 145, 14, 15, 14, 15, 14, 15, 0, 0, 0, 0, 0, 0, - 0, 0, 14, 15, 14, 15, 14, 15, 146, 14, 15, 14, 15, 14, 15, 14, - 15, 14, 15, 14, 15, 14, 15, 147, 0, 148, 148, 148, 148, 148, 148, 148, - 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 0, - 0, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, - 149, 149, 149, 149, 149, 149, 149, 0, 150, 150, 150, 150, 150, 150, 150, 150, - 150, 150, 150, 150, 150, 150, 0, 150, 0, 0, 0, 0, 0, 150, 0, 0, - 151, 151, 151, 151, 151, 151, 151, 151, 117, 117, 117, 117, 117, 117, 0, 0, - 122, 122, 122, 122, 122, 122, 0, 0, 152, 153, 154, 155, 156, 157, 158, 159, - 160, 0, 0, 0, 0, 0, 0, 0, 0, 161, 0, 0, 0, 162, 0, 0, - 163, 164, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 0, 0, - 0, 0, 0, 165, 0, 0, 166, 0, 117, 117, 117, 117, 117, 117, 117, 117, - 122, 122, 122, 122, 122, 122, 122, 122, 0, 117, 0, 117, 0, 117, 0, 117, - 0, 122, 0, 122, 0, 122, 0, 122, 167, 167, 168, 168, 168, 168, 169, 169, - 170, 170, 171, 171, 172, 172, 0, 0, 117, 117, 0, 173, 0, 0, 0, 0, - 122, 122, 174, 174, 175, 0, 176, 0, 0, 0, 0, 173, 0, 0, 0, 0, - 177, 177, 177, 177, 175, 0, 0, 0, 117, 117, 0, 178, 0, 0, 0, 0, - 122, 122, 179, 179, 0, 0, 0, 0, 117, 117, 0, 180, 0, 125, 0, 0, - 122, 122, 181, 181, 129, 0, 0, 0, 182, 182, 183, 183, 175, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 184, 0, 0, 0, 185, 186, 0, 0, 0, 0, - 0, 0, 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, 0, - 189, 189, 189, 189, 189, 189, 189, 189, 190, 190, 190, 190, 190, 190, 190, 190, - 0, 0, 0, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 191, 191, - 191, 191, 191, 191, 191, 191, 191, 191, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 0, 0, 0, 0, 0, 0, 14, 15, 193, 194, 195, 196, 197, 14, - 15, 14, 15, 14, 15, 198, 199, 200, 201, 0, 14, 15, 0, 14, 15, 0, - 0, 0, 0, 0, 0, 0, 202, 202, 0, 0, 0, 14, 15, 14, 15, 0, - 0, 0, 14, 15, 0, 0, 0, 0, 203, 203, 203, 203, 203, 203, 203, 203, - 203, 203, 203, 203, 203, 203, 0, 203, 0, 0, 0, 0, 0, 203, 0, 0, - 14, 15, 204, 205, 14, 15, 14, 15, 0, 14, 15, 14, 15, 206, 14, 15, - 0, 0, 0, 14, 15, 207, 0, 0, 14, 15, 208, 209, 210, 211, 208, 0, - 212, 213, 214, 215, 14, 15, 14, 15, 0, 0, 0, 216, 0, 0, 0, 0, - 217, 217, 217, 217, 217, 217, 217, 217, 0, 0, 0, 0, 0, 14, 15, 0, - 218, 218, 218, 218, 218, 218, 218, 218, 219, 219, 219, 219, 219, 219, 219, 219, - 218, 218, 218, 218, 0, 0, 0, 0, 219, 219, 219, 219, 0, 0, 0, 0, - 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 0, 0, 0, 0, 0, - 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 0, 0, 0, 0, 0, - 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 221, 221, 221, 221, 221, 221, - 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 0, 0, 0, 0, -}; - -/* All_Cases: 2424 bytes. */ - -static RE_AllCases re_all_cases_table[] = { - {{ 0, 0, 0}}, - {{ 32, 0, 0}}, - {{ 32, 232, 0}}, - {{ 32, 8415, 0}}, - {{ 32, 300, 0}}, - {{ -32, 0, 0}}, - {{ -32, 199, 0}}, - {{ -32, 8383, 0}}, - {{ -32, 268, 0}}, - {{ 743, 775, 0}}, - {{ 32, 8294, 0}}, - {{ 7615, 0, 0}}, - {{ -32, 8262, 0}}, - {{ 121, 0, 0}}, - {{ 1, 0, 0}}, - {{ -1, 0, 0}}, - {{ -199, 0, 0}}, - {{ -232, 0, 0}}, - {{ -121, 0, 0}}, - {{ -300, -268, 0}}, - {{ 195, 0, 0}}, - {{ 210, 0, 0}}, - {{ 206, 0, 0}}, - {{ 205, 0, 0}}, - {{ 79, 0, 0}}, - {{ 202, 0, 0}}, - {{ 203, 0, 0}}, - {{ 207, 0, 0}}, - {{ 97, 0, 0}}, - {{ 211, 0, 0}}, - {{ 209, 0, 0}}, - {{ 163, 0, 0}}, - {{ 213, 0, 0}}, - {{ 130, 0, 0}}, - {{ 214, 0, 0}}, - {{ 218, 0, 0}}, - {{ 217, 0, 0}}, - {{ 219, 0, 0}}, - {{ 56, 0, 0}}, - {{ 1, 2, 0}}, - {{ -1, 1, 0}}, - {{ -2, -1, 0}}, - {{ -79, 0, 0}}, - {{ -97, 0, 0}}, - {{ -56, 0, 0}}, - {{ -130, 0, 0}}, - {{ 10795, 0, 0}}, - {{ -163, 0, 0}}, - {{ 10792, 0, 0}}, - {{ 10815, 0, 0}}, - {{ -195, 0, 0}}, - {{ 69, 0, 0}}, - {{ 71, 0, 0}}, - {{ 10783, 0, 0}}, - {{ 10780, 0, 0}}, - {{ 10782, 0, 0}}, - {{ -210, 0, 0}}, - {{ -206, 0, 0}}, - {{ -205, 0, 0}}, - {{ -202, 0, 0}}, - {{ -203, 0, 0}}, - {{ 42319, 0, 0}}, - {{ 42315, 0, 0}}, - {{ -207, 0, 0}}, - {{ 42280, 0, 0}}, - {{ 42308, 0, 0}}, - {{ -209, 0, 0}}, - {{ -211, 0, 0}}, - {{ 10743, 0, 0}}, - {{ 42305, 0, 0}}, - {{ 10749, 0, 0}}, - {{ -213, 0, 0}}, - {{ -214, 0, 0}}, - {{ 10727, 0, 0}}, - {{ -218, 0, 0}}, - {{ 42282, 0, 0}}, - {{ -69, 0, 0}}, - {{ -217, 0, 0}}, - {{ -71, 0, 0}}, - {{ -219, 0, 0}}, - {{ 42261, 0, 0}}, - {{ 42258, 0, 0}}, - {{ 84, 116, 7289}}, - {{ 116, 0, 0}}, - {{ 38, 0, 0}}, - {{ 37, 0, 0}}, - {{ 64, 0, 0}}, - {{ 63, 0, 0}}, - {{ 7235, 0, 0}}, - {{ 32, 62, 0}}, - {{ 32, 96, 0}}, - {{ 32, 57, 92}}, - {{ -84, 32, 7205}}, - {{ 32, 86, 0}}, - {{ -743, 32, 0}}, - {{ 32, 54, 0}}, - {{ 32, 80, 0}}, - {{ 31, 32, 0}}, - {{ 32, 47, 0}}, - {{ 32, 7549, 0}}, - {{ -38, 0, 0}}, - {{ -37, 0, 0}}, - {{ 7219, 0, 0}}, - {{ -32, 30, 0}}, - {{ -32, 64, 0}}, - {{ -32, 25, 60}}, - {{ -116, -32, 7173}}, - {{ -32, 54, 0}}, - {{ -775, -32, 0}}, - {{ -32, 22, 0}}, - {{ -32, 48, 0}}, - {{ -31, 1, 0}}, - {{ -32, -1, 0}}, - {{ -32, 15, 0}}, - {{ -32, 7517, 0}}, - {{ -64, 0, 0}}, - {{ -63, 0, 0}}, - {{ 8, 0, 0}}, - {{ -62, -30, 0}}, - {{ -57, -25, 35}}, - {{ -47, -15, 0}}, - {{ -54, -22, 0}}, - {{ -8, 0, 0}}, - {{ -86, -54, 0}}, - {{ -80, -48, 0}}, - {{ 7, 0, 0}}, - {{ -116, 0, 0}}, - {{ -92, -60, -35}}, - {{ -96, -64, 0}}, - {{ -7, 0, 0}}, - {{ 80, 0, 0}}, - {{ 32, 6254, 0}}, - {{ 32, 6253, 0}}, - {{ 32, 6244, 0}}, - {{ 32, 6242, 0}}, - {{ 32, 6242, 6243}}, - {{ 32, 6236, 0}}, - {{ -32, 6222, 0}}, - {{ -32, 6221, 0}}, - {{ -32, 6212, 0}}, - {{ -32, 6210, 0}}, - {{ -32, 6210, 6211}}, - {{ -32, 6204, 0}}, - {{ -80, 0, 0}}, - {{ 1, 6181, 0}}, - {{ -1, 6180, 0}}, - {{ 15, 0, 0}}, - {{ -15, 0, 0}}, - {{ 48, 0, 0}}, - {{ -48, 0, 0}}, - {{ 7264, 0, 0}}, - {{ 38864, 0, 0}}, - {{ -6254, -6222, 0}}, - {{ -6253, -6221, 0}}, - {{ -6244, -6212, 0}}, - {{ -6242, -6210, 0}}, - {{ -6242, -6210, 1}}, - {{ -6243, -6211, -1}}, - {{ -6236, -6204, 0}}, - {{ -6181, -6180, 0}}, - {{ 35266, 35267, 0}}, - {{ 35332, 0, 0}}, - {{ 3814, 0, 0}}, - {{ 1, 59, 0}}, - {{ -1, 58, 0}}, - {{ -59, -58, 0}}, - {{ -7615, 0, 0}}, - {{ 74, 0, 0}}, - {{ 86, 0, 0}}, - {{ 100, 0, 0}}, - {{ 128, 0, 0}}, - {{ 112, 0, 0}}, - {{ 126, 0, 0}}, - {{ 9, 0, 0}}, - {{ -74, 0, 0}}, - {{ -9, 0, 0}}, - {{ -7289, -7205, -7173}}, - {{ -86, 0, 0}}, - {{ -7235, 0, 0}}, - {{ -100, 0, 0}}, - {{ -7219, 0, 0}}, - {{ -112, 0, 0}}, - {{ -128, 0, 0}}, - {{ -126, 0, 0}}, - {{ -7549, -7517, 0}}, - {{ -8415, -8383, 0}}, - {{ -8294, -8262, 0}}, - {{ 28, 0, 0}}, - {{ -28, 0, 0}}, - {{ 16, 0, 0}}, - {{ -16, 0, 0}}, - {{ 26, 0, 0}}, - {{ -26, 0, 0}}, - {{-10743, 0, 0}}, - {{ -3814, 0, 0}}, - {{-10727, 0, 0}}, - {{-10795, 0, 0}}, - {{-10792, 0, 0}}, - {{-10780, 0, 0}}, - {{-10749, 0, 0}}, - {{-10783, 0, 0}}, - {{-10782, 0, 0}}, - {{-10815, 0, 0}}, - {{ -7264, 0, 0}}, - {{-35266, 1, 0}}, - {{-35267, -1, 0}}, - {{-35332, 0, 0}}, - {{-42280, 0, 0}}, - {{-42308, 0, 0}}, - {{-42319, 0, 0}}, - {{-42315, 0, 0}}, - {{-42305, 0, 0}}, - {{-42258, 0, 0}}, - {{-42282, 0, 0}}, - {{-42261, 0, 0}}, - {{ 928, 0, 0}}, - {{ -928, 0, 0}}, - {{-38864, 0, 0}}, - {{ 40, 0, 0}}, - {{ -40, 0, 0}}, - {{ 34, 0, 0}}, - {{ -34, 0, 0}}, -}; - -/* All_Cases: 2664 bytes. */ - -int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - RE_AllCases* all_cases; - int count; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_all_cases_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_all_cases_stage_2[pos + f] << 5; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_all_cases_stage_3[pos + f] << 3; - value = re_all_cases_stage_4[pos + code]; - - all_cases = &re_all_cases_table[value]; - - codepoints[0] = ch; - count = 1; - - while (count < RE_MAX_CASES && all_cases->diffs[count - 1] != 0) { - codepoints[count] = (RE_UINT32)((RE_INT32)ch + all_cases->diffs[count - - 1]); - ++count; - } - - return count; -} - -/* Simple_Case_Folding. */ - -static RE_UINT8 re_simple_case_folding_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 6, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_simple_case_folding_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 10, 11, - 6, 12, 6, 6, 13, 6, 6, 6, 6, 6, 6, 6, 14, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, 6, 17, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 18, - 6, 6, 6, 6, 19, 6, 6, 6, 6, 6, 6, 6, 20, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 21, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 22, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_simple_case_folding_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 4, 0, 2, 2, 5, 5, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 7, 8, 8, 7, 6, 6, 6, 6, 6, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 8, 20, 6, 6, 21, 6, - 6, 6, 6, 6, 22, 6, 23, 24, 25, 6, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 0, 0, 0, 0, 27, 28, - 29, 30, 1, 2, 31, 32, 0, 0, 33, 34, 35, 6, 6, 6, 36, 37, - 38, 38, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, - 39, 7, 6, 6, 6, 6, 6, 6, 40, 41, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 42, 43, 43, 43, 44, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 45, 45, 45, 45, 46, 47, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 49, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 51, 52, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 0, 53, 0, 48, 0, 53, 0, 53, 0, 48, 0, 54, 0, 53, 0, 0, - 0, 53, 0, 53, 0, 53, 0, 55, 0, 56, 0, 57, 0, 58, 0, 59, - 0, 0, 0, 0, 60, 61, 62, 0, 0, 0, 0, 0, 63, 63, 0, 0, - 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 65, 66, 66, 66, 0, 0, 0, 0, 0, 0, - 43, 43, 43, 43, 43, 44, 0, 0, 0, 0, 0, 0, 67, 68, 69, 70, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 23, 71, 33, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 51, 0, 0, - 6, 6, 6, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 7, 6, 7, 6, 6, 6, 6, 6, 6, 6, 0, 72, - 6, 73, 27, 6, 6, 74, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 76, - 76, 76, 76, 76, 76, 76, 76, 76, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 77, 77, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 77, 77, 77, 77, 78, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 79, 79, 79, 79, 79, 79, 80, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 81, 81, 81, 81, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_simple_case_folding_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, - 3, 0, 3, 0, 3, 0, 3, 0, 0, 0, 3, 0, 3, 0, 3, 0, - 0, 3, 0, 3, 0, 3, 0, 3, 4, 3, 0, 3, 0, 3, 0, 5, - 0, 6, 3, 0, 3, 0, 7, 3, 0, 8, 8, 3, 0, 0, 9, 10, - 11, 3, 0, 8, 12, 0, 13, 14, 3, 0, 0, 0, 13, 15, 0, 16, - 3, 0, 3, 0, 3, 0, 17, 3, 0, 17, 0, 0, 3, 0, 17, 3, - 0, 18, 18, 3, 0, 3, 0, 19, 3, 0, 0, 0, 3, 0, 0, 0, - 0, 0, 0, 0, 20, 3, 0, 20, 3, 0, 20, 3, 0, 3, 0, 3, - 0, 3, 0, 3, 0, 0, 3, 0, 0, 20, 3, 0, 3, 0, 21, 22, - 23, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 0, 0, 0, 0, - 0, 0, 24, 3, 0, 25, 26, 0, 0, 3, 0, 27, 28, 29, 3, 0, - 0, 0, 0, 0, 0, 30, 0, 0, 3, 0, 3, 0, 0, 0, 3, 0, - 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 31, 0, - 32, 32, 32, 0, 33, 0, 34, 34, 1, 1, 0, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 35, 36, 37, 0, 0, 0, 38, 39, 0, - 40, 41, 0, 0, 42, 43, 0, 3, 0, 44, 3, 0, 0, 23, 23, 23, - 45, 45, 45, 45, 45, 45, 45, 45, 3, 0, 0, 0, 0, 0, 0, 0, - 46, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 0, - 0, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 0, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 0, 48, 0, 0, 0, 0, 0, 48, 0, 0, - 49, 49, 49, 49, 49, 49, 0, 0, 50, 51, 52, 53, 53, 54, 55, 56, - 57, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 0, 0, 0, - 0, 0, 0, 58, 0, 0, 59, 0, 49, 49, 49, 49, 49, 49, 49, 49, - 0, 49, 0, 49, 0, 49, 0, 49, 49, 49, 60, 60, 61, 0, 62, 0, - 63, 63, 63, 63, 61, 0, 0, 0, 49, 49, 64, 64, 0, 0, 0, 0, - 49, 49, 65, 65, 44, 0, 0, 0, 66, 66, 67, 67, 61, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 69, 70, 0, 0, 0, 0, - 0, 0, 71, 0, 0, 0, 0, 0, 72, 72, 72, 72, 72, 72, 72, 72, - 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73, 73, - 73, 73, 73, 73, 73, 73, 73, 73, 3, 0, 74, 75, 76, 0, 0, 3, - 0, 3, 0, 3, 0, 77, 78, 79, 80, 0, 3, 0, 0, 3, 0, 0, - 0, 0, 0, 0, 0, 0, 81, 81, 0, 0, 0, 3, 0, 3, 0, 0, - 0, 3, 0, 3, 0, 82, 3, 0, 0, 0, 0, 3, 0, 83, 0, 0, - 3, 0, 84, 85, 86, 87, 84, 0, 88, 89, 90, 91, 3, 0, 3, 0, - 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, - 93, 93, 93, 93, 0, 0, 0, 0, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 0, 0, 0, 0, 0, 94, 94, 94, 94, 94, 94, 94, 94, - 94, 94, 0, 0, 0, 0, 0, 0, -}; - -/* Simple_Case_Folding: 1760 bytes. */ - -static RE_INT32 re_simple_case_folding_table[] = { - 0, - 32, - 775, - 1, - -121, - -268, - 210, - 206, - 205, - 79, - 202, - 203, - 207, - 211, - 209, - 213, - 214, - 218, - 217, - 219, - 2, - -97, - -56, - -130, - 10795, - -163, - 10792, - -195, - 69, - 71, - 116, - 38, - 37, - 64, - 63, - 8, - -30, - -25, - -15, - -22, - -54, - -48, - -60, - -64, - -7, - 80, - 15, - 48, - 7264, - -8, - -6222, - -6221, - -6212, - -6210, - -6211, - -6204, - -6180, - 35267, - -58, - -7615, - -74, - -9, - -7173, - -86, - -100, - -112, - -128, - -126, - -7517, - -8383, - -8262, - 28, - 16, - 26, - -10743, - -3814, - -10727, - -10780, - -10749, - -10783, - -10782, - -10815, - -35332, - -42280, - -42308, - -42319, - -42315, - -42305, - -42258, - -42282, - -42261, - 928, - -38864, - 40, - 34, -}; - -/* Simple_Case_Folding: 380 bytes. */ - -RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - RE_INT32 diff; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_simple_case_folding_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_simple_case_folding_stage_2[pos + f] << 5; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_simple_case_folding_stage_3[pos + f] << 3; - value = re_simple_case_folding_stage_4[pos + code]; - - diff = re_simple_case_folding_table[value]; - - return (RE_UINT32)((RE_INT32)ch + diff); -} - -/* Full_Case_Folding. */ - -static RE_UINT8 re_full_case_folding_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 6, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_full_case_folding_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 10, 11, - 6, 12, 6, 6, 13, 6, 6, 6, 6, 6, 6, 6, 14, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, 6, 17, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 18, 6, 6, 6, 19, - 6, 6, 6, 6, 20, 6, 6, 6, 6, 6, 6, 6, 21, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 22, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 23, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_full_case_folding_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 4, 0, 2, 2, 5, 6, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 8, 9, 9, 10, 7, 7, 7, 7, 7, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 9, 22, 7, 7, 23, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 27, 7, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, 0, 29, 30, - 31, 32, 33, 2, 34, 35, 36, 0, 37, 38, 39, 7, 7, 7, 40, 41, - 42, 42, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, - 43, 44, 7, 7, 7, 7, 7, 7, 45, 46, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 47, 48, 48, 48, 49, 0, 0, 0, 0, 0, - 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 51, 51, 51, 51, 52, 53, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 55, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 57, 58, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 0, 59, 0, 54, 0, 59, 0, 59, 0, 54, 60, 61, 0, 59, 0, 0, - 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 0, 0, 0, 0, 78, 79, 80, 0, 0, 0, 0, 0, 81, 81, 0, 0, - 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 83, 84, 84, 84, 0, 0, 0, 0, 0, 0, - 48, 48, 48, 48, 48, 49, 0, 0, 0, 0, 0, 0, 85, 86, 87, 88, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 25, 89, 37, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 90, 0, 0, - 7, 7, 7, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 44, 7, 44, 7, 7, 7, 7, 7, 7, 7, 0, 91, - 7, 92, 29, 7, 7, 93, 94, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 95, - 95, 95, 95, 95, 95, 95, 95, 95, 0, 0, 0, 0, 0, 0, 0, 0, - 96, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 98, 98, 98, 98, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 98, 98, 98, 98, 99, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 100, 100, 100, 100, 100, 100, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 102, 102, 102, 102, 103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_full_case_folding_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 3, 4, 0, 4, 0, 4, 0, 4, 0, - 5, 0, 4, 0, 4, 0, 4, 0, 0, 4, 0, 4, 0, 4, 0, 4, - 0, 6, 4, 0, 4, 0, 4, 0, 7, 4, 0, 4, 0, 4, 0, 8, - 0, 9, 4, 0, 4, 0, 10, 4, 0, 11, 11, 4, 0, 0, 12, 13, - 14, 4, 0, 11, 15, 0, 16, 17, 4, 0, 0, 0, 16, 18, 0, 19, - 4, 0, 4, 0, 4, 0, 20, 4, 0, 20, 0, 0, 4, 0, 20, 4, - 0, 21, 21, 4, 0, 4, 0, 22, 4, 0, 0, 0, 4, 0, 0, 0, - 0, 0, 0, 0, 23, 4, 0, 23, 4, 0, 23, 4, 0, 4, 0, 4, - 0, 4, 0, 4, 0, 0, 4, 0, 24, 23, 4, 0, 4, 0, 25, 26, - 27, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 0, 0, 0, 0, - 0, 0, 28, 4, 0, 29, 30, 0, 0, 4, 0, 31, 32, 33, 4, 0, - 0, 0, 0, 0, 0, 34, 0, 0, 4, 0, 4, 0, 0, 0, 4, 0, - 0, 0, 0, 0, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 35, 0, - 36, 36, 36, 0, 37, 0, 38, 38, 39, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, - 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 0, 0, 0, 44, 45, 0, - 46, 47, 0, 0, 48, 49, 0, 4, 0, 50, 4, 0, 0, 27, 27, 27, - 51, 51, 51, 51, 51, 51, 51, 51, 4, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 4, 0, 4, 0, 52, 4, 0, 4, 0, 4, 0, 4, - 0, 4, 0, 4, 0, 4, 0, 0, 0, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 0, - 0, 0, 0, 0, 0, 0, 0, 54, 55, 55, 55, 55, 55, 55, 55, 55, - 55, 55, 55, 55, 55, 55, 0, 55, 0, 0, 0, 0, 0, 55, 0, 0, - 56, 56, 56, 56, 56, 56, 0, 0, 57, 58, 59, 60, 60, 61, 62, 63, - 64, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 4, 0, 65, 66, - 67, 68, 69, 70, 0, 0, 71, 0, 56, 56, 56, 56, 56, 56, 56, 56, - 72, 0, 73, 0, 74, 0, 75, 0, 0, 56, 0, 56, 0, 56, 0, 56, - 76, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, - 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, - 80, 80, 80, 80, 80, 80, 80, 80, 81, 81, 81, 81, 81, 81, 81, 81, - 0, 0, 82, 83, 84, 0, 85, 86, 56, 56, 87, 87, 88, 0, 89, 0, - 0, 0, 90, 91, 92, 0, 93, 94, 95, 95, 95, 95, 96, 0, 0, 0, - 0, 0, 97, 98, 0, 0, 99, 100, 56, 56, 101, 101, 0, 0, 0, 0, - 0, 0, 102, 103, 104, 0, 105, 106, 56, 56, 107, 107, 50, 0, 0, 0, - 0, 0, 108, 109, 110, 0, 111, 112, 113, 113, 114, 114, 115, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 116, 0, 0, 0, 117, 118, 0, 0, 0, 0, - 0, 0, 119, 0, 0, 0, 0, 0, 120, 120, 120, 120, 120, 120, 120, 120, - 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 121, 4, 0, 122, 123, 124, 0, 0, 4, - 0, 4, 0, 4, 0, 125, 126, 127, 128, 0, 4, 0, 0, 4, 0, 0, - 0, 0, 0, 0, 0, 0, 129, 129, 0, 0, 0, 4, 0, 4, 0, 0, - 4, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 4, 0, 130, 4, 0, - 0, 0, 0, 4, 0, 131, 0, 0, 4, 0, 132, 133, 134, 135, 132, 0, - 136, 137, 138, 139, 4, 0, 4, 0, 140, 140, 140, 140, 140, 140, 140, 140, - 141, 142, 143, 144, 145, 146, 147, 0, 0, 0, 0, 148, 149, 150, 151, 152, - 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 0, 0, 0, 0, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 0, 0, 0, 0, 0, - 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 0, 0, 0, 0, 0, 0, -}; - -/* Full_Case_Folding: 1960 bytes. */ - -static RE_FullCaseFolding re_full_case_folding_table[] = { - { 0, { 0, 0}}, - { 32, { 0, 0}}, - { 775, { 0, 0}}, - { -108, { 115, 0}}, - { 1, { 0, 0}}, - { -199, { 775, 0}}, - { 371, { 110, 0}}, - { -121, { 0, 0}}, - { -268, { 0, 0}}, - { 210, { 0, 0}}, - { 206, { 0, 0}}, - { 205, { 0, 0}}, - { 79, { 0, 0}}, - { 202, { 0, 0}}, - { 203, { 0, 0}}, - { 207, { 0, 0}}, - { 211, { 0, 0}}, - { 209, { 0, 0}}, - { 213, { 0, 0}}, - { 214, { 0, 0}}, - { 218, { 0, 0}}, - { 217, { 0, 0}}, - { 219, { 0, 0}}, - { 2, { 0, 0}}, - { -390, { 780, 0}}, - { -97, { 0, 0}}, - { -56, { 0, 0}}, - { -130, { 0, 0}}, - { 10795, { 0, 0}}, - { -163, { 0, 0}}, - { 10792, { 0, 0}}, - { -195, { 0, 0}}, - { 69, { 0, 0}}, - { 71, { 0, 0}}, - { 116, { 0, 0}}, - { 38, { 0, 0}}, - { 37, { 0, 0}}, - { 64, { 0, 0}}, - { 63, { 0, 0}}, - { 41, { 776, 769}}, - { 21, { 776, 769}}, - { 8, { 0, 0}}, - { -30, { 0, 0}}, - { -25, { 0, 0}}, - { -15, { 0, 0}}, - { -22, { 0, 0}}, - { -54, { 0, 0}}, - { -48, { 0, 0}}, - { -60, { 0, 0}}, - { -64, { 0, 0}}, - { -7, { 0, 0}}, - { 80, { 0, 0}}, - { 15, { 0, 0}}, - { 48, { 0, 0}}, - { -34, {1410, 0}}, - { 7264, { 0, 0}}, - { -8, { 0, 0}}, - { -6222, { 0, 0}}, - { -6221, { 0, 0}}, - { -6212, { 0, 0}}, - { -6210, { 0, 0}}, - { -6211, { 0, 0}}, - { -6204, { 0, 0}}, - { -6180, { 0, 0}}, - { 35267, { 0, 0}}, - { -7726, { 817, 0}}, - { -7715, { 776, 0}}, - { -7713, { 778, 0}}, - { -7712, { 778, 0}}, - { -7737, { 702, 0}}, - { -58, { 0, 0}}, - { -7723, { 115, 0}}, - { -7051, { 787, 0}}, - { -7053, { 787, 768}}, - { -7055, { 787, 769}}, - { -7057, { 787, 834}}, - { -128, { 953, 0}}, - { -136, { 953, 0}}, - { -112, { 953, 0}}, - { -120, { 953, 0}}, - { -64, { 953, 0}}, - { -72, { 953, 0}}, - { -66, { 953, 0}}, - { -7170, { 953, 0}}, - { -7176, { 953, 0}}, - { -7173, { 834, 0}}, - { -7174, { 834, 953}}, - { -74, { 0, 0}}, - { -7179, { 953, 0}}, - { -7173, { 0, 0}}, - { -78, { 953, 0}}, - { -7180, { 953, 0}}, - { -7190, { 953, 0}}, - { -7183, { 834, 0}}, - { -7184, { 834, 953}}, - { -86, { 0, 0}}, - { -7189, { 953, 0}}, - { -7193, { 776, 768}}, - { -7194, { 776, 769}}, - { -7197, { 834, 0}}, - { -7198, { 776, 834}}, - { -100, { 0, 0}}, - { -7197, { 776, 768}}, - { -7198, { 776, 769}}, - { -7203, { 787, 0}}, - { -7201, { 834, 0}}, - { -7202, { 776, 834}}, - { -112, { 0, 0}}, - { -118, { 953, 0}}, - { -7210, { 953, 0}}, - { -7206, { 953, 0}}, - { -7213, { 834, 0}}, - { -7214, { 834, 953}}, - { -128, { 0, 0}}, - { -126, { 0, 0}}, - { -7219, { 953, 0}}, - { -7517, { 0, 0}}, - { -8383, { 0, 0}}, - { -8262, { 0, 0}}, - { 28, { 0, 0}}, - { 16, { 0, 0}}, - { 26, { 0, 0}}, - {-10743, { 0, 0}}, - { -3814, { 0, 0}}, - {-10727, { 0, 0}}, - {-10780, { 0, 0}}, - {-10749, { 0, 0}}, - {-10783, { 0, 0}}, - {-10782, { 0, 0}}, - {-10815, { 0, 0}}, - {-35332, { 0, 0}}, - {-42280, { 0, 0}}, - {-42308, { 0, 0}}, - {-42319, { 0, 0}}, - {-42315, { 0, 0}}, - {-42305, { 0, 0}}, - {-42258, { 0, 0}}, - {-42282, { 0, 0}}, - {-42261, { 0, 0}}, - { 928, { 0, 0}}, - {-38864, { 0, 0}}, - {-64154, { 102, 0}}, - {-64155, { 105, 0}}, - {-64156, { 108, 0}}, - {-64157, { 102, 105}}, - {-64158, { 102, 108}}, - {-64146, { 116, 0}}, - {-64147, { 116, 0}}, - {-62879, {1398, 0}}, - {-62880, {1381, 0}}, - {-62881, {1387, 0}}, - {-62872, {1398, 0}}, - {-62883, {1389, 0}}, - { 40, { 0, 0}}, - { 34, { 0, 0}}, -}; - -/* Full_Case_Folding: 1240 bytes. */ - -int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - RE_FullCaseFolding* case_folding; - int count; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_full_case_folding_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_full_case_folding_stage_2[pos + f] << 5; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_full_case_folding_stage_3[pos + f] << 3; - value = re_full_case_folding_stage_4[pos + code]; - - case_folding = &re_full_case_folding_table[value]; - - codepoints[0] = (RE_UINT32)((RE_INT32)ch + case_folding->diff); - count = 1; - - while (count < RE_MAX_FOLDED && case_folding->codepoints[count - 1] != 0) { - codepoints[count] = case_folding->codepoints[count - 1]; - ++count; - } - - return count; -} - -/* Property function table. */ - -RE_GetPropertyFunc re_get_property[] = { - re_get_general_category, - re_get_block, - re_get_script, - re_get_word_break, - re_get_grapheme_cluster_break, - re_get_sentence_break, - re_get_math, - re_get_alphabetic, - re_get_lowercase, - re_get_uppercase, - re_get_cased, - re_get_case_ignorable, - re_get_changes_when_lowercased, - re_get_changes_when_uppercased, - re_get_changes_when_titlecased, - re_get_changes_when_casefolded, - re_get_changes_when_casemapped, - re_get_id_start, - re_get_id_continue, - re_get_xid_start, - re_get_xid_continue, - re_get_default_ignorable_code_point, - re_get_grapheme_extend, - re_get_grapheme_base, - re_get_grapheme_link, - re_get_white_space, - re_get_bidi_control, - re_get_join_control, - re_get_dash, - re_get_hyphen, - re_get_quotation_mark, - re_get_terminal_punctuation, - re_get_other_math, - re_get_hex_digit, - re_get_ascii_hex_digit, - re_get_other_alphabetic, - re_get_ideographic, - re_get_diacritic, - re_get_extender, - re_get_other_lowercase, - re_get_other_uppercase, - re_get_noncharacter_code_point, - re_get_other_grapheme_extend, - re_get_ids_binary_operator, - re_get_ids_trinary_operator, - re_get_radical, - re_get_unified_ideograph, - re_get_other_default_ignorable_code_point, - re_get_deprecated, - re_get_soft_dotted, - re_get_logical_order_exception, - re_get_other_id_start, - re_get_other_id_continue, - re_get_sentence_terminal, - re_get_variation_selector, - re_get_pattern_white_space, - re_get_pattern_syntax, - re_get_prepended_concatenation_mark, - re_get_hangul_syllable_type, - re_get_bidi_class, - re_get_canonical_combining_class, - re_get_decomposition_type, - re_get_east_asian_width, - re_get_joining_group, - re_get_joining_type, - re_get_line_break, - re_get_numeric_type, - re_get_numeric_value, - re_get_bidi_mirrored, - re_get_indic_positional_category, - re_get_indic_syllabic_category, - re_get_alphanumeric, - re_get_any, - re_get_blank, - re_get_graph, - re_get_print, - re_get_word, - re_get_xdigit, - re_get_posix_digit, - re_get_posix_alnum, - re_get_posix_punct, - re_get_posix_xdigit, -}; diff --git a/src/regex/_regex_unicode.h b/src/regex/_regex_unicode.h deleted file mode 100644 index 98d214f4b6..0000000000 --- a/src/regex/_regex_unicode.h +++ /dev/null @@ -1,237 +0,0 @@ -typedef unsigned char RE_UINT8; -typedef signed char RE_INT8; -typedef unsigned short RE_UINT16; -typedef signed short RE_INT16; -typedef unsigned int RE_UINT32; -typedef signed int RE_INT32; - -typedef unsigned char BOOL; -enum {FALSE, TRUE}; - -#define RE_ASCII_MAX 0x7F -#define RE_LOCALE_MAX 0xFF -#define RE_UNICODE_MAX 0x10FFFF - -#define RE_MAX_CASES 4 -#define RE_MAX_FOLDED 3 - -typedef struct RE_Property { - RE_UINT16 name; - RE_UINT8 id; - RE_UINT8 value_set; -} RE_Property; - -typedef struct RE_PropertyValue { - RE_UINT16 name; - RE_UINT8 value_set; - RE_UINT16 id; -} RE_PropertyValue; - -typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch); - -#define RE_PROP_GC 0x0 -#define RE_PROP_CASED 0xA -#define RE_PROP_UPPERCASE 0x9 -#define RE_PROP_LOWERCASE 0x8 - -#define RE_PROP_C 30 -#define RE_PROP_L 31 -#define RE_PROP_M 32 -#define RE_PROP_N 33 -#define RE_PROP_P 34 -#define RE_PROP_S 35 -#define RE_PROP_Z 36 -#define RE_PROP_ASSIGNED 38 -#define RE_PROP_CASEDLETTER 37 - -#define RE_PROP_CN 0 -#define RE_PROP_LU 1 -#define RE_PROP_LL 2 -#define RE_PROP_LT 3 -#define RE_PROP_LM 4 -#define RE_PROP_LO 5 -#define RE_PROP_MN 6 -#define RE_PROP_ME 7 -#define RE_PROP_MC 8 -#define RE_PROP_ND 9 -#define RE_PROP_NL 10 -#define RE_PROP_NO 11 -#define RE_PROP_ZS 12 -#define RE_PROP_ZL 13 -#define RE_PROP_ZP 14 -#define RE_PROP_CC 15 -#define RE_PROP_CF 16 -#define RE_PROP_CO 17 -#define RE_PROP_CS 18 -#define RE_PROP_PD 19 -#define RE_PROP_PS 20 -#define RE_PROP_PE 21 -#define RE_PROP_PC 22 -#define RE_PROP_PO 23 -#define RE_PROP_SM 24 -#define RE_PROP_SC 25 -#define RE_PROP_SK 26 -#define RE_PROP_SO 27 -#define RE_PROP_PI 28 -#define RE_PROP_PF 29 - -#define RE_PROP_C_MASK 0x00078001 -#define RE_PROP_L_MASK 0x0000003E -#define RE_PROP_M_MASK 0x000001C0 -#define RE_PROP_N_MASK 0x00000E00 -#define RE_PROP_P_MASK 0x30F80000 -#define RE_PROP_S_MASK 0x0F000000 -#define RE_PROP_Z_MASK 0x00007000 - -#define RE_PROP_ALNUM 0x470001 -#define RE_PROP_ALPHA 0x070001 -#define RE_PROP_ANY 0x480001 -#define RE_PROP_ASCII 0x010001 -#define RE_PROP_BLANK 0x490001 -#define RE_PROP_CNTRL 0x00000F -#define RE_PROP_DIGIT 0x000009 -#define RE_PROP_GRAPH 0x4A0001 -#define RE_PROP_LOWER 0x080001 -#define RE_PROP_PRINT 0x4B0001 -#define RE_PROP_SPACE 0x190001 -#define RE_PROP_UPPER 0x090001 -#define RE_PROP_WORD 0x4C0001 -#define RE_PROP_XDIGIT 0x4D0001 -#define RE_PROP_POSIX_ALNUM 0x4F0001 -#define RE_PROP_POSIX_DIGIT 0x4E0001 -#define RE_PROP_POSIX_PUNCT 0x500001 -#define RE_PROP_POSIX_XDIGIT 0x510001 - -#define RE_BREAK_OTHER 0 -#define RE_BREAK_DOUBLEQUOTE 1 -#define RE_BREAK_SINGLEQUOTE 2 -#define RE_BREAK_HEBREWLETTER 3 -#define RE_BREAK_CR 4 -#define RE_BREAK_LF 5 -#define RE_BREAK_NEWLINE 6 -#define RE_BREAK_EXTEND 7 -#define RE_BREAK_REGIONALINDICATOR 8 -#define RE_BREAK_FORMAT 9 -#define RE_BREAK_KATAKANA 10 -#define RE_BREAK_ALETTER 11 -#define RE_BREAK_MIDLETTER 12 -#define RE_BREAK_MIDNUM 13 -#define RE_BREAK_MIDNUMLET 14 -#define RE_BREAK_NUMERIC 15 -#define RE_BREAK_EXTENDNUMLET 16 -#define RE_BREAK_EBASE 17 -#define RE_BREAK_EMODIFIER 18 -#define RE_BREAK_ZWJ 19 -#define RE_BREAK_GLUEAFTERZWJ 20 -#define RE_BREAK_EBASEGAZ 21 - -#define RE_GBREAK_OTHER 0 -#define RE_GBREAK_PREPEND 1 -#define RE_GBREAK_CR 2 -#define RE_GBREAK_LF 3 -#define RE_GBREAK_CONTROL 4 -#define RE_GBREAK_EXTEND 5 -#define RE_GBREAK_REGIONALINDICATOR 6 -#define RE_GBREAK_SPACINGMARK 7 -#define RE_GBREAK_L 8 -#define RE_GBREAK_V 9 -#define RE_GBREAK_T 10 -#define RE_GBREAK_LV 11 -#define RE_GBREAK_LVT 12 -#define RE_GBREAK_EBASE 13 -#define RE_GBREAK_EMODIFIER 14 -#define RE_GBREAK_ZWJ 15 -#define RE_GBREAK_GLUEAFTERZWJ 16 -#define RE_GBREAK_EBASEGAZ 17 - -extern char* re_strings[1336]; -extern RE_Property re_properties[150]; -extern RE_PropertyValue re_property_values[1469]; -extern RE_UINT16 re_expand_on_folding[104]; -extern RE_GetPropertyFunc re_get_property[82]; - -RE_UINT32 re_get_general_category(RE_UINT32 ch); -RE_UINT32 re_get_block(RE_UINT32 ch); -RE_UINT32 re_get_script(RE_UINT32 ch); -RE_UINT32 re_get_word_break(RE_UINT32 ch); -RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch); -RE_UINT32 re_get_sentence_break(RE_UINT32 ch); -RE_UINT32 re_get_math(RE_UINT32 ch); -RE_UINT32 re_get_alphabetic(RE_UINT32 ch); -RE_UINT32 re_get_lowercase(RE_UINT32 ch); -RE_UINT32 re_get_uppercase(RE_UINT32 ch); -RE_UINT32 re_get_cased(RE_UINT32 ch); -RE_UINT32 re_get_case_ignorable(RE_UINT32 ch); -RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch); -RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch); -RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch); -RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch); -RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch); -RE_UINT32 re_get_id_start(RE_UINT32 ch); -RE_UINT32 re_get_id_continue(RE_UINT32 ch); -RE_UINT32 re_get_xid_start(RE_UINT32 ch); -RE_UINT32 re_get_xid_continue(RE_UINT32 ch); -RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch); -RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch); -RE_UINT32 re_get_grapheme_base(RE_UINT32 ch); -RE_UINT32 re_get_grapheme_link(RE_UINT32 ch); -RE_UINT32 re_get_white_space(RE_UINT32 ch); -RE_UINT32 re_get_bidi_control(RE_UINT32 ch); -RE_UINT32 re_get_join_control(RE_UINT32 ch); -RE_UINT32 re_get_dash(RE_UINT32 ch); -RE_UINT32 re_get_hyphen(RE_UINT32 ch); -RE_UINT32 re_get_quotation_mark(RE_UINT32 ch); -RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch); -RE_UINT32 re_get_other_math(RE_UINT32 ch); -RE_UINT32 re_get_hex_digit(RE_UINT32 ch); -RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch); -RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch); -RE_UINT32 re_get_ideographic(RE_UINT32 ch); -RE_UINT32 re_get_diacritic(RE_UINT32 ch); -RE_UINT32 re_get_extender(RE_UINT32 ch); -RE_UINT32 re_get_other_lowercase(RE_UINT32 ch); -RE_UINT32 re_get_other_uppercase(RE_UINT32 ch); -RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch); -RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch); -RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch); -RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch); -RE_UINT32 re_get_radical(RE_UINT32 ch); -RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch); -RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch); -RE_UINT32 re_get_deprecated(RE_UINT32 ch); -RE_UINT32 re_get_soft_dotted(RE_UINT32 ch); -RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch); -RE_UINT32 re_get_other_id_start(RE_UINT32 ch); -RE_UINT32 re_get_other_id_continue(RE_UINT32 ch); -RE_UINT32 re_get_sentence_terminal(RE_UINT32 ch); -RE_UINT32 re_get_variation_selector(RE_UINT32 ch); -RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch); -RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch); -RE_UINT32 re_get_prepended_concatenation_mark(RE_UINT32 ch); -RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch); -RE_UINT32 re_get_bidi_class(RE_UINT32 ch); -RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch); -RE_UINT32 re_get_decomposition_type(RE_UINT32 ch); -RE_UINT32 re_get_east_asian_width(RE_UINT32 ch); -RE_UINT32 re_get_joining_group(RE_UINT32 ch); -RE_UINT32 re_get_joining_type(RE_UINT32 ch); -RE_UINT32 re_get_line_break(RE_UINT32 ch); -RE_UINT32 re_get_numeric_type(RE_UINT32 ch); -RE_UINT32 re_get_numeric_value(RE_UINT32 ch); -RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch); -RE_UINT32 re_get_indic_positional_category(RE_UINT32 ch); -RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch); -RE_UINT32 re_get_alphanumeric(RE_UINT32 ch); -RE_UINT32 re_get_any(RE_UINT32 ch); -RE_UINT32 re_get_blank(RE_UINT32 ch); -RE_UINT32 re_get_graph(RE_UINT32 ch); -RE_UINT32 re_get_print(RE_UINT32 ch); -RE_UINT32 re_get_word(RE_UINT32 ch); -RE_UINT32 re_get_xdigit(RE_UINT32 ch); -RE_UINT32 re_get_posix_digit(RE_UINT32 ch); -RE_UINT32 re_get_posix_alnum(RE_UINT32 ch); -RE_UINT32 re_get_posix_punct(RE_UINT32 ch); -RE_UINT32 re_get_posix_xdigit(RE_UINT32 ch); -int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints); -RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch); -int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints);