diff --git a/src/regex/__init__.py b/src/regex/__init__.py index 45d1920d6a..e8384c5511 100644 --- a/src/regex/__init__.py +++ b/src/regex/__init__.py @@ -225,31 +225,31 @@ __all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match", "V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex"] -__version__ = "2.4.39" +__version__ = "2.4.48" # -------------------------------------------------------------------- # Public interface. -def match(pattern, string, flags=0, pos=None, endpos=None, concurrent=None, - **kwargs): +def match(pattern, string, flags=0, pos=None, endpos=None, partial=False, + concurrent=None, **kwargs): """Try to apply the pattern at the start of the string, returning a match object, or None if no match was found.""" return _compile(pattern, flags, kwargs).match(string, pos, endpos, - concurrent) + concurrent, partial) -def fullmatch(pattern, string, flags=0, pos=None, endpos=None, concurrent=None, - **kwargs): +def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False, + concurrent=None, **kwargs): """Try to apply the pattern against all of the string, returning a match object, or None if no match was found.""" return _compile(pattern, flags, kwargs).fullmatch(string, pos, endpos, - concurrent) + concurrent, partial) -def search(pattern, string, flags=0, pos=None, endpos=None, concurrent=None, - **kwargs): +def search(pattern, string, flags=0, pos=None, endpos=None, partial=False, + concurrent=None, **kwargs): """Search through string looking for a match to the pattern, returning a match object, or None if no match was found.""" return _compile(pattern, flags, kwargs).search(string, pos, endpos, - concurrent) + concurrent, partial) def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, **kwargs): @@ -319,12 +319,12 @@ def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, overlapped, concurrent) def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, - concurrent=None, **kwargs): + partial=False, concurrent=None, **kwargs): """Return an iterator over all matches in the string. The matches may be overlapped if overlapped is True. For each match, the iterator returns a match object. Empty matches are included in the result.""" return _compile(pattern, flags, kwargs).finditer(string, pos, endpos, - overlapped, concurrent) + overlapped, concurrent, partial) def compile(pattern, flags=0, **kwargs): "Compile a regular expression pattern, returning a pattern object." @@ -392,6 +392,7 @@ from . import _regex_core from calibre.constants import plugins _regex = plugins['_regex'][0] from threading import RLock as _RLock +from locale import getlocale as _getlocale from ._regex_core import * from ._regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError, _UnscopedFlagSet, _check_group_features, _compile_firstset, @@ -414,6 +415,7 @@ _cache = {} _cache_lock = _RLock() _named_args = {} _replacement_cache = {} +_locale_sensitive = {} # Maximum size of the cache. _MAXCACHE = 500 @@ -421,6 +423,15 @@ _MAXREPCACHE = 500 def _compile(pattern, flags=0, kwargs={}): "Compiles a regular expression to a PatternObject." + # What locale is this pattern using? + locale_key = (type(pattern), pattern) + if _locale_sensitive.get(locale_key, True) or (flags & LOCALE) != 0: + # This pattern is, or might be, locale-sensitive. + pattern_locale = _getlocale() + else: + # This pattern is definitely not locale-sensitive. + pattern_locale = None + try: # Do we know what keyword arguments are needed? args_key = pattern, type(pattern), flags @@ -433,13 +444,13 @@ def _compile(pattern, flags=0, kwargs={}): try: args_supplied.add((k, frozenset(kwargs[k]))) except KeyError: - raise error("missing named list") + raise error("missing named list: {!r}".format(k)) args_supplied = frozenset(args_supplied) # Have we already seen this regular expression and named list? pattern_key = (pattern, type(pattern), flags, args_supplied, - DEFAULT_VERSION) + DEFAULT_VERSION, pattern_locale) return _cache[pattern_key] except KeyError: # It's a new pattern, or new named list for a known pattern. @@ -462,18 +473,19 @@ def _compile(pattern, flags=0, kwargs={}): _regex_core.DEFAULT_VERSION = DEFAULT_VERSION caught_exception = None + global_flags = flags while True: try: source = _Source(pattern) - info = _Info(flags, source.char_type, kwargs) + info = _Info(global_flags, source.char_type, kwargs) info.guess_encoding = guess_encoding source.ignore_space = bool(info.flags & VERBOSE) parsed = _parse_pattern(source, info) break except _UnscopedFlagSet: # Remember the global flags for the next attempt. - flags = info.global_flags + global_flags = info.global_flags except error, e: caught_exception = e @@ -500,6 +512,9 @@ def _compile(pattern, flags=0, kwargs={}): reverse = bool(info.flags & REVERSE) fuzzy = isinstance(parsed, _Fuzzy) + # Remember whether this pattern as an inline locale flag. + _locale_sensitive[locale_key] = info.inline_locale + # Should we print the parsed pattern? if flags & DEBUG: parsed.dump(indent=0, reverse=reverse) @@ -583,7 +598,8 @@ def _compile(pattern, flags=0, kwargs={}): args_needed = frozenset(args_needed) # Store this regular expression and named list. - pattern_key = (pattern, type(pattern), flags, args_needed, DEFAULT_VERSION) + pattern_key = (pattern, type(pattern), flags, args_needed, DEFAULT_VERSION, + pattern_locale) _cache[pattern_key] = compiled_pattern # Store what keyword arguments are needed. diff --git a/src/regex/_regex.c b/src/regex/_regex.c index 23ec638006..d15b45093d 100644 --- a/src/regex/_regex.c +++ b/src/regex/_regex.c @@ -50,9 +50,6 @@ #include "pyport.h" #include "pythread.h" -#define RE_MIN(X, Y) ((X) <= (Y) ? (X) : (Y)) -#define RE_MAX(X, Y) ((X) >= (Y) ? (X) : (Y)) - #if PY_VERSION_HEX < 0x02060000 #if SIZEOF_SIZE_T == SIZEOF_LONG_LONG #define T_PYSSIZET T_LONGLONG @@ -68,10 +65,12 @@ typedef unsigned short Py_UCS2; typedef RE_UINT32 RE_CODE; -/* Case-sensitive letters in the General Category. */ +/* Properties in the General Category. */ +#define RE_PROP_GC_CN ((RE_PROP_GC << 16) | RE_PROP_CN) #define RE_PROP_GC_LU ((RE_PROP_GC << 16) | RE_PROP_LU) #define RE_PROP_GC_LL ((RE_PROP_GC << 16) | RE_PROP_LL) #define RE_PROP_GC_LT ((RE_PROP_GC << 16) | RE_PROP_LT) +#define RE_PROP_GC_P ((RE_PROP_GC << 16) | RE_PROP_P) /* Unlimited repeat count. */ #define RE_UNLIMITED (~(RE_CODE)0) @@ -84,8 +83,17 @@ typedef unsigned short RE_STATUS_T; #define RE_CONC_YES 1 #define RE_CONC_DEFAULT 2 +/* the side that could truncate in a partial match. + * + * The values RE_PARTIAL_LEFT and RE_PARTIAL_RIGHT are also used as array + * indexes, so they need to be 0 and 1. + */ +#define RE_PARTIAL_NONE -1 +#define RE_PARTIAL_LEFT 0 +#define RE_PARTIAL_RIGHT 1 + /* Flags for the kind of 'sub' call: 'sub', 'subn', 'subf', 'subfn'. */ -#define RE_SUB 0x0 +#define RE_SUB 0x0 #define RE_SUBN 0x1 #if PY_VERSION_HEX >= 0x02060000 #define RE_SUBF 0x2 @@ -93,7 +101,6 @@ typedef unsigned short RE_STATUS_T; /* The name of this module, minus the leading underscore. */ #define RE_MODULE "regex" -#define RE_MODULE_UPPER "REGEX" /* Error codes. */ #define RE_ERROR_SUCCESS 1 /* Successful match. */ @@ -101,16 +108,17 @@ typedef unsigned short RE_STATUS_T; #define RE_ERROR_ILLEGAL -1 /* Illegal code. */ #define RE_ERROR_INTERNAL -2 /* Internal error. */ #define RE_ERROR_CONCURRENT -3 /* "concurrent" invalid. */ -#define RE_ERROR_MEMORY -9 /* Out of memory. */ -#define RE_ERROR_INTERRUPTED -10 /* Signal handler raised exception. */ -#define RE_ERROR_REPLACEMENT -11 /* Invalid replacement string. */ -#define RE_ERROR_INVALID_GROUP_REF -12 /* Invalid group reference. */ -#define RE_ERROR_GROUP_INDEX_TYPE -13 /* Group index type error. */ -#define RE_ERROR_NO_SUCH_GROUP -14 /* No such group. */ -#define RE_ERROR_INDEX -15 /* String index. */ -#define RE_ERROR_BACKTRACKING -16 /* Too much backtracking. */ -#define RE_ERROR_NOT_STRING -17 /* Not a string. */ -#define RE_ERROR_NOT_UNICODE -18 /* Not a Unicode string. */ +#define RE_ERROR_MEMORY -4 /* Out of memory. */ +#define RE_ERROR_INTERRUPTED -5 /* Signal handler raised exception. */ +#define RE_ERROR_REPLACEMENT -6 /* Invalid replacement string. */ +#define RE_ERROR_INVALID_GROUP_REF -7 /* Invalid group reference. */ +#define RE_ERROR_GROUP_INDEX_TYPE -8 /* Group index type error. */ +#define RE_ERROR_NO_SUCH_GROUP -9 /* No such group. */ +#define RE_ERROR_INDEX -10 /* String index. */ +#define RE_ERROR_BACKTRACKING -11 /* Too much backtracking. */ +#define RE_ERROR_NOT_STRING -12 /* Not a string. */ +#define RE_ERROR_NOT_UNICODE -13 /* Not a Unicode string. */ +#define RE_ERROR_PARTIAL -15 /* Partial match. */ /* The number of backtrack entries per allocated block. */ #define RE_BACKTRACK_BLOCK_SIZE 64 @@ -181,8 +189,6 @@ typedef unsigned short RE_STATUS_T; #define RE_FUZZY_ERR 3 #define RE_FUZZY_COUNT 3 -#define RE_FUZZY_THRESHOLD 10 - /* The various values in a FUZZY node. */ #define RE_FUZZY_VAL_MAX_SUB 1 #define RE_FUZZY_VAL_MAX_INS 2 @@ -196,7 +202,7 @@ typedef unsigned short RE_STATUS_T; #define RE_FUZZY_VAL_MAX_BASE 1 #define RE_FUZZY_VAL_COST_BASE 5 -/* The various values in a END_FUZZY node. */ +/* The various values in an END_FUZZY node. */ #define RE_FUZZY_VAL_MIN_SUB 1 #define RE_FUZZY_VAL_MIN_INS 2 #define RE_FUZZY_VAL_MIN_DEL 3 @@ -219,9 +225,34 @@ static PyObject* property_dict; typedef struct RE_State* RE_StatePtr; +/* Bit-flags for the common character properties supported by locale-sensitive + * matching. + */ +#define RE_LOCALE_ALNUM 0x001 +#define RE_LOCALE_ALPHA 0x002 +#define RE_LOCALE_CNTRL 0x004 +#define RE_LOCALE_DIGIT 0x008 +#define RE_LOCALE_GRAPH 0x010 +#define RE_LOCALE_LOWER 0x020 +#define RE_LOCALE_PRINT 0x040 +#define RE_LOCALE_PUNCT 0x080 +#define RE_LOCALE_SPACE 0x100 +#define RE_LOCALE_UPPER 0x200 + +/* Info about the current locale. + * + * Used by patterns that are locale-sensitive. + */ +typedef struct RE_LocaleInfo { + unsigned short properties[0x100]; + unsigned char uppercase[0x100]; + unsigned char lowercase[0x100]; +} RE_LocaleInfo; + /* Handlers for ASCII, locale and Unicode. */ typedef struct RE_EncodingTable { - BOOL (*has_property)(RE_CODE property, Py_UCS4 ch); + BOOL (*has_property)(RE_LocaleInfo* locale_info, RE_CODE property, Py_UCS4 + ch); BOOL (*at_boundary)(RE_StatePtr state, Py_ssize_t text_pos); BOOL (*at_word_start)(RE_StatePtr state, Py_ssize_t text_pos); BOOL (*at_word_end)(RE_StatePtr state, Py_ssize_t text_pos); @@ -232,11 +263,14 @@ typedef struct RE_EncodingTable { BOOL (*is_line_sep)(Py_UCS4 ch); BOOL (*at_line_start)(RE_StatePtr state, Py_ssize_t text_pos); BOOL (*at_line_end)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*possible_turkic)(Py_UCS4 ch); - int (*all_cases)(Py_UCS4 ch, Py_UCS4* codepoints); - Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - int (*all_turkic_i)(Py_UCS4 ch, Py_UCS4* cases); + BOOL (*possible_turkic)(RE_LocaleInfo* locale_info, Py_UCS4 ch); + int (*all_cases)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + codepoints); + Py_UCS4 (*simple_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch); + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + folded); + int (*all_turkic_i)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + cases); } RE_EncodingTable; /* Position within the regex and text. */ @@ -265,11 +299,11 @@ typedef struct RE_BacktrackData { struct { RE_FuzzyInfo fuzzy_info; Py_ssize_t text_pos; - RE_UINT32 index; + RE_CODE index; } fuzzy; struct { RE_Position position; - Py_ssize_t count; + size_t count; struct RE_Node* fuzzy_node; BOOL too_few_errors; } fuzzy_insert; @@ -277,11 +311,10 @@ typedef struct RE_BacktrackData { RE_Position position; RE_INT8 fuzzy_type; RE_INT8 step; - } fuzzy_one; + } fuzzy_item; struct { RE_Position position; Py_ssize_t string_pos; - size_t string_len; RE_INT8 fuzzy_type; RE_INT8 folded_pos; RE_INT8 folded_len; @@ -289,15 +322,11 @@ typedef struct RE_BacktrackData { RE_INT8 gfolded_len; RE_INT8 step; } fuzzy_string; - struct { - RE_Position position; - RE_INT8 fuzzy_type; - } fuzzy_zero; struct { Py_ssize_t text_pos; Py_ssize_t current_capture; - RE_UINT32 private_index; - RE_UINT32 public_index; + RE_CODE private_index; + RE_CODE public_index; BOOL capture; } group; struct { @@ -314,11 +343,8 @@ typedef struct RE_BacktrackData { size_t count; Py_ssize_t start; size_t capture_change; - RE_UINT32 index; + RE_CODE index; } repeat; - struct { - size_t* capture_counts; - } saved; }; RE_UINT8 op; } RE_BacktrackData; @@ -345,7 +371,6 @@ typedef struct RE_Info { RE_BacktrackBlock* current_backtrack_block; size_t backtrack_count; RE_SavedGroups* current_saved_groups; - size_t captures_count; struct RE_GroupCallFrame* current_group_call_frame; BOOL must_advance; } RE_Info; @@ -438,7 +463,6 @@ typedef struct RE_GroupInfo { RE_Node* node; BOOL referenced; BOOL has_name; - BOOL called; } RE_GroupInfo; /* Info about a call_ref. */ @@ -479,6 +503,7 @@ typedef struct RE_StringInfo { */ #define MAX_SEARCH_POSITIONS 7 +/* Info about a search position. */ typedef struct { Py_ssize_t start_pos; Py_ssize_t match_pos; @@ -512,20 +537,22 @@ typedef struct RE_State { /* Storage for backtrack info. */ RE_BacktrackBlock backtrack_block; RE_BacktrackBlock* current_backtrack_block; - size_t backtrack_allocated; + Py_ssize_t backtrack_allocated; RE_BacktrackData* backtrack; /* Storage for saved capture groups. */ RE_SavedGroups* first_saved_groups; RE_SavedGroups* current_saved_groups; RE_SavedRepeats* first_saved_repeats; RE_SavedRepeats* current_saved_repeats; - size_t min_width; /* The minimum width of the string to match (assuming it's not a fuzzy pattern). */ + Py_ssize_t min_width; /* The minimum width of the string to match (assuming it's not a fuzzy pattern). */ RE_EncodingTable* encoding; /* The 'encoding' of the string being searched. */ + RE_LocaleInfo* locale_info; /* Info about the locale, if needed. */ Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); void* (*point_to)(void* text, Py_ssize_t pos); PyThread_type_lock lock; /* A lock for accessing the state across threads. */ RE_FuzzyInfo fuzzy_info; /* Info about fuzzy matching. */ + size_t total_fuzzy_counts[RE_FUZZY_COUNT]; /* Totals for fuzzy matching. */ RE_FuzzyGuards* fuzzy_guards; /* The guards for a fuzzy match. */ size_t total_errors; /* The total number of errors of a fuzzy match. */ size_t total_cost; /* The total cost of a fuzzy match. */ @@ -538,13 +565,14 @@ typedef struct RE_State { size_t capture_change; /* Incremented every time a captive group changes. */ Py_ssize_t req_pos; /* The position where the required string matched. */ Py_ssize_t req_end; /* The end position where the required string matched. */ + int partial_side; /* The side that could truncate in a partial match. */ RE_UINT16 iterations; /* The number of iterations the matching engine has performed since checking for KeyboardInterrupt. */ BOOL is_unicode; /* Whether the string to be matched is Unicode. */ BOOL should_release; /* Whether the buffer should be released. */ BOOL overlapped; /* Whether the matches can be overlapped. */ BOOL reverse; /* Whether it's a reverse pattern. */ BOOL visible_captures; /* Whether the 'captures' method will be visible. */ - BOOL zero_width; /* Whether to enable the correct handling of zero-width matches. */ + BOOL version_0; /* Whether to perform version_0 behaviour (same as re module). */ BOOL must_advance; /* Whether the end of the match must advance past its start. */ BOOL is_multithreaded; /* Whether to release the GIL while matching. */ BOOL too_few_errors; /* Whether there were too few fuzzy errors. */ @@ -568,7 +596,7 @@ typedef struct RE_SafeState { typedef struct PatternObject { PyObject_HEAD PyObject* pattern; /* Pattern source (or None). */ - RE_CODE flags; /* Flags used when compiling pattern source. */ + Py_ssize_t flags; /* Flags used when compiling pattern source. */ PyObject* weakreflist; /* List of weak references */ /* Nodes into which the regular expression is compiled. */ RE_Node* start_node; @@ -580,6 +608,8 @@ typedef struct PatternObject { PyObject* groupindex; PyObject* indexgroup; PyObject* named_lists; + size_t named_lists_count; + PyObject** partial_named_lists[2]; PyObject* named_list_indexes; /* Storage for the pattern nodes. */ size_t node_capacity; @@ -596,8 +626,9 @@ typedef struct PatternObject { /* Info about the repeats. */ size_t repeat_info_capacity; RE_RepeatInfo* repeat_info; - size_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ + Py_ssize_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ RE_EncodingTable* encoding; /* Encoding handlers. */ + RE_LocaleInfo* locale_info; /* Info about the locale, if needed. */ RE_GroupData* groups_storage; RE_RepeatData* repeats_storage; size_t fuzzy_count; /* The number of fuzzy sections. */ @@ -624,6 +655,8 @@ typedef struct MatchObject { size_t group_count; /* The number of groups. */ RE_GroupData* groups; /* The capture groups. */ PyObject* regs; + size_t fuzzy_counts[RE_FUZZY_COUNT]; + BOOL partial; /* Whether it's a partial match. */ } MatchObject; /* The ScannerObject. */ @@ -651,7 +684,7 @@ typedef struct RE_CompileArgs { RE_CODE* code; /* The start of the compiled pattern. */ RE_CODE* end_code; /* The end of the compiled pattern. */ PatternObject* pattern; /* The pattern object. */ - size_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ + Py_ssize_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ RE_Node* start; /* The start node. */ RE_Node* end; /* The end node. */ size_t repeat_depth; /* The nesting depth of the repeat. */ @@ -676,6 +709,7 @@ typedef struct JoinInfo { BOOL is_unicode; /* Whether the string is Unicode. */ } JoinInfo; +/* Info about fuzzy matching. */ typedef struct { RE_Node* new_node; Py_ssize_t new_text_pos; @@ -693,72 +727,109 @@ typedef struct { /* Function types for getting info from a MatchObject. */ typedef PyObject* (*RE_GetByIndexFunc)(MatchObject* self, Py_ssize_t index); +/* Returns the magnitude of a 'Py_ssize_t' value. */ Py_LOCAL_INLINE(Py_ssize_t) abs_ssize_t(Py_ssize_t x) { return x >= 0 ? x : -x; } -/* Gets a character at the given position assuming 1 byte per character. */ +/* Returns the minimum of 2 'Py_ssize_t' values. */ +Py_LOCAL_INLINE(Py_ssize_t) min_ssize_t(Py_ssize_t x, Py_ssize_t y) { + return x <= y ? x : y; +} + +/* Returns the maximum of 2 'Py_ssize_t' values. */ +Py_LOCAL_INLINE(Py_ssize_t) max_ssize_t(Py_ssize_t x, Py_ssize_t y) { + return x >= y ? x : y; +} + +/* Returns the minimum of 2 'size_t' values. */ +Py_LOCAL_INLINE(size_t) min_size_t(size_t x, size_t y) { + return x <= y ? x : y; +} + +/* Returns the maximum of 2 'size_t' values. */ +Py_LOCAL_INLINE(size_t) max_size_t(size_t x, size_t y) { + return x >= y ? x : y; +} + +/* Returns the 'maximum' of 2 RE_STATUS_T values. */ +Py_LOCAL_INLINE(RE_STATUS_T) max_status_2(RE_STATUS_T x, RE_STATUS_T y) { + return x >= y ? x : y; +} + +/* Returns the 'maximum' of 3 RE_STATUS_T values. */ +Py_LOCAL_INLINE(RE_STATUS_T) max_status_3(RE_STATUS_T x, RE_STATUS_T y, + RE_STATUS_T z) { + return max_status_2(x, max_status_2(y, z)); +} + +/* Returns the 'maximum' of 4 RE_STATUS_T values. */ +Py_LOCAL_INLINE(RE_STATUS_T) max_status_4(RE_STATUS_T w, RE_STATUS_T x, + RE_STATUS_T y, RE_STATUS_T z) { + return max_status_2(max_status_2(w, x), max_status_2(y, z)); +} + +/* Gets a character at a position assuming 1 byte per character. */ static Py_UCS4 bytes1_char_at(void* text, Py_ssize_t pos) { return *((Py_UCS1*)text + pos); } -/* Sets a character at the given position assuming 1 byte per character. */ +/* Sets a character at a position assuming 1 byte per character. */ static void bytes1_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { *((Py_UCS1*)text + pos) = (Py_UCS1)ch; } -/* Gets a pointer to the character at the given position assuming 1 byte per - * character. - */ +/* Gets a pointer to a position assuming 1 byte per character. */ static void* bytes1_point_to(void* text, Py_ssize_t pos) { return (Py_UCS1*)text + pos; } -/* Gets a character at the given position assuming 2 bytes per character. */ +/* Gets a character at a position assuming 2 bytes per character. */ static Py_UCS4 bytes2_char_at(void* text, Py_ssize_t pos) { return *((Py_UCS2*)text + pos); } -/* Sets a character at the given position assuming 2 bytes per character. */ +/* Sets a character at a position assuming 2 bytes per character. */ static void bytes2_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { *((Py_UCS2*)text + pos) = (Py_UCS2)ch; } -/* Gets a pointer to the character at the given position assuming 2 bytes per - * character. - */ +/* Gets a pointer to a position assuming 2 bytes per character. */ static void* bytes2_point_to(void* text, Py_ssize_t pos) { return (Py_UCS2*)text + pos; } -/* Gets a character at the given position assuming 4 bytes per character. */ +/* Gets a character at a position assuming 4 bytes per character. */ static Py_UCS4 bytes4_char_at(void* text, Py_ssize_t pos) { return *((Py_UCS4*)text + pos); } -/* Sets a character at the given position assuming 4 bytes per character. */ +/* Sets a character at a position assuming 4 bytes per character. */ static void bytes4_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { *((Py_UCS4*)text + pos) = (Py_UCS4)ch; } -/* Gets a pointer to the character at the given position assuming 4 bytes per - * character. - */ +/* Gets a pointer to a position assuming 4 bytes per character. */ static void* bytes4_point_to(void* text, Py_ssize_t pos) { return (Py_UCS4*)text + pos; } -/* Default for whether the current text position is on a boundary. */ +/* Default for whether a position is on a word boundary. */ static BOOL at_boundary_always(RE_State* state, Py_ssize_t text_pos) { return TRUE; } +/* Converts a BOOL to success/failure. */ +Py_LOCAL_INLINE(int) bool_as_status(BOOL value) { + return value ? RE_ERROR_SUCCESS : RE_ERROR_FAILURE; +} + /* ASCII-specific. */ -static BOOL unicode_has_property(RE_CODE property, Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) unicode_has_property(RE_CODE property, Py_UCS4 ch); -/* Checks whether a character has the given property. */ -static BOOL ascii_has_property(RE_CODE property, Py_UCS4 ch) { +/* Checks whether a character has a property. */ +Py_LOCAL_INLINE(BOOL) ascii_has_property(RE_CODE property, Py_UCS4 ch) { if (ch > RE_ASCII_MAX) { /* Outside the ASCII range. */ RE_UINT32 value; @@ -771,43 +842,55 @@ static BOOL ascii_has_property(RE_CODE property, Py_UCS4 ch) { return unicode_has_property(property, ch); } -/* Checks whether the current text position is on a word boundary. */ +/* Wrapper for calling 'ascii_has_property' via a pointer. */ +static BOOL ascii_has_property_wrapper(RE_LocaleInfo* locale_info, RE_CODE + property, Py_UCS4 ch) { + return ascii_has_property(property, ch); +} + +/* Checks whether there's a word character to the left. */ +Py_LOCAL_INLINE(BOOL) ascii_word_left(RE_State* state, Py_ssize_t text_pos) { + return text_pos > 0 && ascii_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); +} + +/* Checks whether there's a word character to the right. */ +Py_LOCAL_INLINE(BOOL) ascii_word_right(RE_State* state, Py_ssize_t text_pos) { + return text_pos < state->text_length && ascii_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); +} + +/* Checks whether a position is on a word boundary. */ static BOOL ascii_at_boundary(RE_State* state, Py_ssize_t text_pos) { - BOOL before; - BOOL after; + BOOL left; + BOOL right; - before = text_pos > 0 && ascii_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); - after = text_pos < state->text_length && ascii_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); + left = ascii_word_left(state, text_pos); + right = ascii_word_right(state, text_pos); - return before != after; + return left != right; } -/* Checks whether the current text position is at the start of a word. */ +/* Checks whether a position is at the start of a word. */ static BOOL ascii_at_word_start(RE_State* state, Py_ssize_t text_pos) { - BOOL before; - BOOL after; + BOOL left; + BOOL right; - before = text_pos > 0 && ascii_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); - after = text_pos < state->text_length && ascii_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); + left = ascii_word_left(state, text_pos); + right = ascii_word_right(state, text_pos); - return !before && after; + return !left && right; } -/* Checks whether the current text position is at the end of a word. */ +/* Checks whether a position is at the end of a word. */ static BOOL ascii_at_word_end(RE_State* state, Py_ssize_t text_pos) { - BOOL before; - BOOL after; + BOOL left; + BOOL right; - before = text_pos > 0 && ascii_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); - after = text_pos < state->text_length && ascii_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); + left = ascii_word_left(state, text_pos); + right = ascii_word_right(state, text_pos); - return before && !after; + return left && !right; } /* Checks whether a character is a line separator. */ @@ -815,24 +898,27 @@ static BOOL ascii_is_line_sep(Py_UCS4 ch) { return 0x0A <= ch && ch <= 0x0D; } -/* Checks whether the current text position is at the start of a line. */ +/* Checks whether a position is at the start of a line. */ static BOOL ascii_at_line_start(RE_State* state, Py_ssize_t text_pos) { Py_UCS4 ch; - if (text_pos == 0) + if (text_pos <= 0) return TRUE; ch = state->char_at(state->text, text_pos - 1); - if (ch == 0x0D) + if (ch == 0x0D) { + if (text_pos >= state->text_length) + return TRUE; + /* No line break inside CRLF. */ - return text_pos >= state->text_length || state->char_at(state->text, - text_pos) != 0x0A; + return state->char_at(state->text, text_pos) != 0x0A; + } return 0x0A <= ch && ch <= 0x0D; } -/* Checks whether the current text position is at the end of a line. */ +/* Checks whether a position is at the end of a line. */ static BOOL ascii_at_line_end(RE_State* state, Py_ssize_t text_pos) { Py_UCS4 ch; @@ -841,10 +927,13 @@ static BOOL ascii_at_line_end(RE_State* state, Py_ssize_t text_pos) { ch = state->char_at(state->text, text_pos); - if (ch == 0x0A) + if (ch == 0x0A) { + if (text_pos <= 0) + return TRUE; + /* No line break inside CRLF. */ - return text_pos >= 1 || state->char_at(state->text, text_pos - 1) != - 0x0D; + return state->char_at(state->text, text_pos - 1) != 0x0D; + } return 0x0A <= ch && ch <= 0x0D; } @@ -852,12 +941,13 @@ static BOOL ascii_at_line_end(RE_State* state, Py_ssize_t text_pos) { /* Checks whether a character could be Turkic (variants of I/i). For ASCII, it * won't be. */ -static BOOL ascii_possible_turkic(Py_UCS4 ch) { +static BOOL ascii_possible_turkic(RE_LocaleInfo* locale_info, Py_UCS4 ch) { return FALSE; } /* Gets all the cases of a character. */ -static int ascii_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { +static int ascii_all_cases(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + codepoints) { int count; count = 0; @@ -872,7 +962,7 @@ static int ascii_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { } /* Returns a character with its case folded. */ -static Py_UCS4 ascii_simple_case_fold(Py_UCS4 ch) { +static Py_UCS4 ascii_simple_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch) { if ('A' <= ch && ch <= 'Z') /* Uppercase folds to lowercase. */ return ch ^ 0x20; @@ -881,7 +971,8 @@ static Py_UCS4 ascii_simple_case_fold(Py_UCS4 ch) { } /* Returns a character with its case folded. */ -static int ascii_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { +static int ascii_full_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch, + Py_UCS4* folded) { if ('A' <= ch && ch <= 'Z') /* Uppercase folds to lowercase. */ folded[0] = ch ^ 0x20; @@ -894,7 +985,8 @@ static int ascii_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { /* Gets all the case variants of Turkic 'I'. The given character will be listed * first. */ -static int ascii_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { +static int ascii_all_turkic_i(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + cases) { int count; count = 0; @@ -912,7 +1004,7 @@ static int ascii_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { /* The handlers for ASCII characters. */ static RE_EncodingTable ascii_encoding = { - ascii_has_property, + ascii_has_property_wrapper, ascii_at_boundary, ascii_at_word_start, ascii_at_word_end, @@ -932,8 +1024,81 @@ static RE_EncodingTable ascii_encoding = { /* Locale-specific. */ -/* Checks whether a character has the given property. */ -static BOOL locale_has_property(RE_CODE property, Py_UCS4 ch) { +/* Checks whether a character has the 'alnum' property in the given locale. */ +Py_LOCAL_INLINE(BOOL) locale_isalnum(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & + RE_LOCALE_ALNUM) != 0; +} + +/* Checks whether a character has the 'alpha' property in the given locale. */ +Py_LOCAL_INLINE(BOOL) locale_isalpha(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & + RE_LOCALE_ALPHA) != 0; +} + +/* Checks whether a character has the 'cntrl' property in the given locale. */ +Py_LOCAL_INLINE(BOOL) locale_iscntrl(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & + RE_LOCALE_CNTRL) != 0; +} + +/* Checks whether a character has the 'digit' property in the given locale. */ +Py_LOCAL_INLINE(BOOL) locale_isdigit(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & + RE_LOCALE_DIGIT) != 0; +} + +/* Checks whether a character has the 'graph' property in the given locale. */ +Py_LOCAL_INLINE(BOOL) locale_isgraph(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & + RE_LOCALE_GRAPH) != 0; +} + +/* Checks whether a character has the 'lower' property in the given locale. */ +Py_LOCAL_INLINE(BOOL) locale_islower(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & + RE_LOCALE_LOWER) != 0; +} + +/* Checks whether a character has the 'print' property in the given locale. */ +Py_LOCAL_INLINE(BOOL) locale_isprint(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & + RE_LOCALE_PRINT) != 0; +} + +/* Checks whether a character has the 'punct' property in the given locale. */ +Py_LOCAL_INLINE(BOOL) locale_ispunct(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & + RE_LOCALE_PUNCT) != 0; +} + +/* Checks whether a character has the 'space' property in the given locale. */ +Py_LOCAL_INLINE(BOOL) locale_isspace(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & + RE_LOCALE_SPACE) != 0; +} + +/* Checks whether a character has the 'upper' property in the given locale. */ +Py_LOCAL_INLINE(BOOL) locale_isupper(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return ch <= RE_LOCALE_MAX && (locale_info->properties[ch] & + RE_LOCALE_UPPER) != 0; +} + +/* Converts a character to lowercase in the given locale. */ +Py_LOCAL_INLINE(Py_UCS4) locale_tolower(RE_LocaleInfo* locale_info, Py_UCS4 ch) + { + return ch <= RE_LOCALE_MAX ? locale_info->lowercase[ch] : ch; +} + +/* Converts a character to uppercase in the given locale. */ +Py_LOCAL_INLINE(Py_UCS4) locale_toupper(RE_LocaleInfo* locale_info, Py_UCS4 ch) + { + return ch <= RE_LOCALE_MAX ? locale_info->uppercase[ch] : ch; +} + +/* Checks whether a character has a property. */ +Py_LOCAL_INLINE(BOOL) locale_has_property(RE_LocaleInfo* locale_info, RE_CODE + property, Py_UCS4 ch) { RE_UINT32 value; RE_UINT32 v; @@ -945,10 +1110,10 @@ static BOOL locale_has_property(RE_CODE property, Py_UCS4 ch) { switch (property >> 16) { case RE_PROP_ALNUM >> 16: - v = isalnum(ch) != 0; + v = locale_isalnum(locale_info, ch); break; case RE_PROP_ALPHA >> 16: - v = isalpha(ch) != 0; + v = locale_isalpha(locale_info, ch); break; case RE_PROP_ANY >> 16: v = 1; @@ -956,28 +1121,34 @@ static BOOL locale_has_property(RE_CODE property, Py_UCS4 ch) { case RE_PROP_ASCII >> 16: v = ch <= RE_ASCII_MAX; break; - case RE_PROP_ASSIGNED >> 16: - v = ch <= RE_LOCALE_MAX; - break; case RE_PROP_BLANK >> 16: v = ch == '\t' || ch == ' '; break; case RE_PROP_GC: switch (property) { + case RE_PROP_ASSIGNED: + v = ch <= RE_LOCALE_MAX; + break; + case RE_PROP_CASEDLETTER: + v = locale_isalpha(locale_info, ch) ? value : 0xFFFF; + break; case RE_PROP_CNTRL: - v = iscntrl(ch) ? value : 0xFFFF; + v = locale_iscntrl(locale_info, ch) ? value : 0xFFFF; break; case RE_PROP_DIGIT: - v = isdigit(ch) ? value : 0xFFFF; + v = locale_isdigit(locale_info, ch) ? value : 0xFFFF; + break; + case RE_PROP_GC_CN: + v = ch > RE_LOCALE_MAX; break; case RE_PROP_GC_LL: - v = islower(ch) ? value : 0xFFFF; + v = locale_islower(locale_info, ch) ? value : 0xFFFF; break; case RE_PROP_GC_LU: - v = isupper(ch) ? value : 0xFFFF; + v = locale_isupper(locale_info, ch) ? value : 0xFFFF; break; - case RE_PROP_PUNCT: - v = ispunct(ch) ? value : 0xFFFF; + case RE_PROP_GC_P: + v = locale_ispunct(locale_info, ch) ? value : 0xFFFF; break; default: v = 0xFFFF; @@ -985,22 +1156,22 @@ static BOOL locale_has_property(RE_CODE property, Py_UCS4 ch) { } break; case RE_PROP_GRAPH >> 16: - v = isgraph(ch) != 0; + v = locale_isgraph(locale_info, ch); break; case RE_PROP_LOWER >> 16: - v = islower(ch) != 0; + v = locale_islower(locale_info, ch); break; case RE_PROP_PRINT >> 16: - v = isprint(ch) != 0; + v = locale_isprint(locale_info, ch); break; case RE_PROP_SPACE >> 16: - v = isspace(ch) != 0; + v = locale_isspace(locale_info, ch); break; case RE_PROP_UPPER >> 16: - v = isupper(ch) != 0; + v = locale_isupper(locale_info, ch); break; case RE_PROP_WORD >> 16: - v = ch == '_' || isalnum(ch) != 0; + v = ch == '_' || locale_isalnum(locale_info, ch); break; case RE_PROP_XDIGIT >> 16: v = re_get_hex_digit(ch) != 0; @@ -1013,52 +1184,67 @@ static BOOL locale_has_property(RE_CODE property, Py_UCS4 ch) { return v == value; } -/* Checks whether the current text position is on a word boundary. */ +/* Wrapper for calling 'locale_has_property' via a pointer. */ +static BOOL locale_has_property_wrapper(RE_LocaleInfo* locale_info, RE_CODE + property, Py_UCS4 ch) { + return locale_has_property(locale_info, property, ch); +} + +/* Checks whether there's a word character to the left. */ +Py_LOCAL_INLINE(BOOL) locale_word_left(RE_State* state, Py_ssize_t text_pos) { + return text_pos > 0 && locale_has_property(state->locale_info, + RE_PROP_WORD, state->char_at(state->text, text_pos - 1)); +} + +/* Checks whether there's a word character to the right. */ +Py_LOCAL_INLINE(BOOL) locale_word_right(RE_State* state, Py_ssize_t text_pos) { + return text_pos < state->text_length && + locale_has_property(state->locale_info, RE_PROP_WORD, + state->char_at(state->text, text_pos)); +} + +/* Checks whether a position is on a word boundary. */ static BOOL locale_at_boundary(RE_State* state, Py_ssize_t text_pos) { - BOOL before; - BOOL after; + BOOL left; + BOOL right; - before = text_pos > 0 && locale_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); - after = text_pos < state->text_length && locale_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); + left = locale_word_left(state, text_pos); + right = locale_word_right(state, text_pos); - return before != after; + return left != right; } -/* Checks whether the current text position is at the start of a word. */ +/* Checks whether a position is at the start of a word. */ static BOOL locale_at_word_start(RE_State* state, Py_ssize_t text_pos) { - BOOL before; - BOOL after; + BOOL left; + BOOL right; - before = text_pos > 0 && locale_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); - after = text_pos < state->text_length && locale_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); + left = locale_word_left(state, text_pos); + right = locale_word_right(state, text_pos); - return !before && after; + return !left && right; } -/* Checks whether the current text position is at the end of a word. */ +/* Checks whether a position is at the end of a word. */ static BOOL locale_at_word_end(RE_State* state, Py_ssize_t text_pos) { - BOOL before; - BOOL after; + BOOL left; + BOOL right; - before = text_pos > 0 && locale_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); - after = text_pos < state->text_length && locale_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); + left = locale_word_left(state, text_pos); + right = locale_word_right(state, text_pos); - return before && !after; + return left && !right; } /* Checks whether a character could be Turkic (variants of I/i). */ -static BOOL locale_possible_turkic(Py_UCS4 ch) { - return toupper(ch) == 'I' || tolower(ch) == 'i'; +static BOOL locale_possible_turkic(RE_LocaleInfo* locale_info, Py_UCS4 ch) { + return locale_toupper(locale_info, ch) == 'I' || + locale_tolower(locale_info, ch) == 'i'; } /* Gets all the cases of a character. */ -static int locale_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { +static int locale_all_cases(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + codepoints) { int count; Py_UCS4 other; @@ -1066,11 +1252,11 @@ static int locale_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { codepoints[count++] = ch; - other = toupper(ch); + other = locale_toupper(locale_info, ch); if (other != ch) codepoints[count++] = other; - other = tolower(ch); + other = locale_tolower(locale_info, ch); if (other != ch) codepoints[count++] = other; @@ -1078,19 +1264,15 @@ static int locale_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { } /* Returns a character with its case folded. */ -static Py_UCS4 locale_simple_case_fold(Py_UCS4 ch) { - if (ch <= RE_LOCALE_MAX) - return tolower(ch); - - return ch; +static Py_UCS4 locale_simple_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch) + { + return locale_tolower(locale_info, ch); } /* Returns a character with its case folded. */ -static int locale_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { - if (ch <= RE_LOCALE_MAX) - folded[0] = tolower(ch); - else - folded[0] = ch; +static int locale_full_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch, + Py_UCS4* folded) { + folded[0] = locale_tolower(locale_info, ch); return 1; } @@ -1098,7 +1280,8 @@ static int locale_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { /* Gets all the case variants of Turkic 'I'. The given character will be listed * first. */ -static int locale_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { +static int locale_all_turkic_i(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + cases) { int count; Py_UCS4 other; @@ -1113,12 +1296,12 @@ static int locale_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { cases[count++] = 'i'; /* Uppercase 'i' will be either dotted (Turkic) or dotless (non-Turkic). */ - other = toupper('i'); + other = locale_toupper(locale_info, 'i'); if (other != ch && other != 'I') cases[count++] = other; /* Lowercase 'I' will be either dotless (Turkic) or dotted (non-Turkic). */ - other = tolower('I'); + other = locale_tolower(locale_info, 'I'); if (other != ch && other != 'i') cases[count++] = other; @@ -1127,7 +1310,7 @@ static int locale_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { /* The handlers for locale characters. */ static RE_EncodingTable locale_encoding = { - locale_has_property, + locale_has_property_wrapper, locale_at_boundary, locale_at_word_start, locale_at_word_end, @@ -1147,8 +1330,8 @@ static RE_EncodingTable locale_encoding = { /* Unicode-specific. */ -/* Checks whether a Unicode character has the given property. */ -static BOOL unicode_has_property(RE_CODE property, Py_UCS4 ch) { +/* Checks whether a Unicode character has a property. */ +Py_LOCAL_INLINE(BOOL) unicode_has_property(RE_CODE property, Py_UCS4 ch) { RE_UINT32 prop; RE_UINT32 value; RE_UINT32 v; @@ -1165,8 +1348,12 @@ static BOOL unicode_has_property(RE_CODE property, Py_UCS4 ch) { if (prop == RE_PROP_GC) { switch (value) { + case RE_PROP_ASSIGNED: + return v != RE_PROP_CN; case RE_PROP_C: return (RE_PROP_C_MASK & (1 << v)) != 0; + case RE_PROP_CASEDLETTER: + return v == RE_PROP_LU || v == RE_PROP_LL || v == RE_PROP_LT; case RE_PROP_L: return (RE_PROP_L_MASK & (1 << v)) != 0; case RE_PROP_M: @@ -1185,43 +1372,56 @@ static BOOL unicode_has_property(RE_CODE property, Py_UCS4 ch) { return FALSE; } -/* Checks whether the current text position is on a word boundary. */ +/* Wrapper for calling 'unicode_has_property' via a pointer. */ +static BOOL unicode_has_property_wrapper(RE_LocaleInfo* locale_info, RE_CODE + property, Py_UCS4 ch) { + return unicode_has_property(property, ch); +} + +/* Checks whether there's a word character to the left. */ +Py_LOCAL_INLINE(BOOL) unicode_word_left(RE_State* state, Py_ssize_t text_pos) { + return text_pos > 0 && unicode_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); +} + +/* Checks whether there's a word character to the right. */ +Py_LOCAL_INLINE(BOOL) unicode_word_right(RE_State* state, Py_ssize_t text_pos) + { + return text_pos < state->text_length && unicode_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); +} + +/* Checks whether a position is on a word boundary. */ static BOOL unicode_at_boundary(RE_State* state, Py_ssize_t text_pos) { - BOOL before; - BOOL after; + BOOL left; + BOOL right; - before = text_pos > 0 && unicode_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); - after = text_pos < state->text_length && unicode_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); + left = unicode_word_left(state, text_pos); + right = unicode_word_right(state, text_pos); - return before != after; + return left != right; } -/* Checks whether the current text position is at the start of a word. */ +/* Checks whether a position is at the start of a word. */ static BOOL unicode_at_word_start(RE_State* state, Py_ssize_t text_pos) { - BOOL before; - BOOL after; + BOOL left; + BOOL right; - before = text_pos > 0 && unicode_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); - after = text_pos < state->text_length && unicode_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); + left = unicode_word_left(state, text_pos); + right = unicode_word_right(state, text_pos); - return !before && after; + return !left && right; } -/* Checks whether the current text position is at the end of a word. */ +/* Checks whether a position is at the end of a word. */ static BOOL unicode_at_word_end(RE_State* state, Py_ssize_t text_pos) { - BOOL before; - BOOL after; + BOOL left; + BOOL right; - before = text_pos > 0 && unicode_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); - after = text_pos < state->text_length && unicode_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); + left = unicode_word_left(state, text_pos); + right = unicode_word_right(state, text_pos); - return before && !after; + return left && !right; } /* Checks whether a character is a Unicode vowel. @@ -1229,7 +1429,7 @@ static BOOL unicode_at_word_end(RE_State* state, Py_ssize_t text_pos) { * Only a limited number are treated as vowels. */ Py_LOCAL_INLINE(BOOL) is_unicode_vowel(Py_UCS4 ch) { - switch (Py_UNICODE_TOLOWER(ch)) { + switch (Py_UNICODE_TOLOWER((Py_UNICODE)ch)) { case 'a': case 0xE0: case 0xE1: case 0xE2: case 'e': case 0xE8: case 0xE9: case 0xEA: case 'i': case 0xEC: case 0xED: case 0xEE: @@ -1241,14 +1441,13 @@ Py_LOCAL_INLINE(BOOL) is_unicode_vowel(Py_UCS4 ch) { } } -/* Checks whether the current text position is on a default word boundary. +/* Checks whether a position is on a default word boundary. * * The rules are defined here: * http://www.unicode.org/reports/tr29/#Default_Word_Boundaries */ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) { Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; int prop; int prop_m1; Py_ssize_t pos_m1; @@ -1260,63 +1459,78 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) { int prop_p1; /* Break at the start and end of the text. */ - if (text_pos <= 0 || text_pos >= state->text_length) + /* WB1 */ + if (text_pos <= 0) + return TRUE; + + /* WB2 */ + if (text_pos >= state->text_length) return TRUE; char_at = state->char_at; - text = state->text; - prop = (int)re_get_word_break(char_at(text, text_pos)); - prop_m1 = (int)re_get_word_break(char_at(text, text_pos - 1)); + prop = (int)re_get_word_break(char_at(state->text, text_pos)); + prop_m1 = (int)re_get_word_break(char_at(state->text, text_pos - 1)); /* Don't break within CRLF. */ + /* WB3 */ if (prop_m1 == RE_BREAK_CR && prop == RE_BREAK_LF) return FALSE; /* Otherwise break before and after Newlines (including CR and LF). */ + /* WB3a and WB3b */ if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 == RE_BREAK_LF || prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == RE_BREAK_LF) return TRUE; - /* Get the property of the previous character. */ + /* WB4 */ + /* Get the property of the previous character, ignoring Format and Extend + * characters. + */ pos_m1 = text_pos - 1; prop_m1 = RE_BREAK_OTHER; while (pos_m1 >= 0) { - prop_m1 = (int)re_get_word_break(char_at(text, pos_m1)); + prop_m1 = (int)re_get_word_break(char_at(state->text, pos_m1)); if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) break; --pos_m1; } - /* Get the property of the preceding character. */ + /* Get the property of the preceding character, ignoring Format and Extend + * characters. + */ pos_m2 = pos_m1 - 1; prop_m2 = RE_BREAK_OTHER; while (pos_m2 >= 0) { - prop_m2 = (int)re_get_word_break(char_at(text, pos_m2)); + prop_m2 = (int)re_get_word_break(char_at(state->text, pos_m2)); if (prop_m2 != RE_BREAK_EXTEND && prop_m2 != RE_BREAK_FORMAT) break; --pos_m2; } - /* Get the property of the next character. */ + /* Get the property of the next character, ignoring Format and Extend + * characters. + */ pos_p0 = text_pos; prop_p0 = prop; while (pos_p0 < state->text_length) { - prop_p0 = (int)re_get_word_break(char_at(text, pos_p0)); + prop_p0 = (int)re_get_word_break(char_at(state->text, pos_p0)); if (prop_p0 != RE_BREAK_EXTEND && prop_p0 != RE_BREAK_FORMAT) break; ++pos_p0; } - /* Get the property of the following character. */ + /* Get the property of the following character, ignoring Format and Extend + * characters. + */ pos_p1 = pos_p0 + 1; prop_p1 = RE_BREAK_OTHER; while (pos_p1 < state->text_length) { - prop_p1 = (int)re_get_word_break(char_at(text, pos_p1)); + prop_p1 = (int)re_get_word_break(char_at(state->text, pos_p1)); if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT) break; @@ -1324,31 +1538,38 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) { } /* Don't break between most letters. */ + /* WB5 */ if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && (prop_p0 == RE_BREAK_ALETTER || prop_p0 == RE_BREAK_HEBREWLETTER)) return FALSE; /* Break between apostrophe and vowels (French, Italian). */ - if (pos_m1 >= 0 && char_at(text, pos_m1) == '\'' && - is_unicode_vowel(char_at(text, text_pos))) + /* WB5a */ + if (pos_m1 >= 0 && char_at(state->text, pos_m1) == '\'' && + is_unicode_vowel(char_at(state->text, text_pos))) return TRUE; /* Don't break letters across certain punctuation. */ + /* WB6 */ if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && (prop_p0 == RE_BREAK_MIDLETTER || prop_p0 == RE_BREAK_MIDNUMLET || prop_p0 == RE_BREAK_SINGLEQUOTE) && (prop_p1 == RE_BREAK_ALETTER || prop_p1 == RE_BREAK_HEBREWLETTER)) return FALSE; + /* WB7 */ if ((prop_m2 == RE_BREAK_ALETTER || prop_m2 == RE_BREAK_HEBREWLETTER) && (prop_m1 == RE_BREAK_MIDLETTER || prop_m1 == RE_BREAK_MIDNUMLET || prop_m1 == RE_BREAK_SINGLEQUOTE) && (prop_p0 == RE_BREAK_ALETTER || prop_p0 == RE_BREAK_HEBREWLETTER)) return FALSE; + /* WB7a */ if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_SINGLEQUOTE) return FALSE; + /* WB7b */ if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_DOUBLEQUOTE && prop_p1 == RE_BREAK_HEBREWLETTER) return FALSE; + /* WB7c */ if (prop_m2 == RE_BREAK_HEBREWLETTER && prop_m1 == RE_BREAK_DOUBLEQUOTE && prop_p0 == RE_BREAK_HEBREWLETTER) return FALSE; @@ -1356,53 +1577,62 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) { /* Don't break within sequences of digits, or digits adjacent to letters * ("3a", or "A3"). */ + /* WB8 */ if (prop_m1 == RE_BREAK_NUMERIC && prop_p0 == RE_BREAK_NUMERIC) return FALSE; + /* WB9 */ if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && prop_p0 == RE_BREAK_NUMERIC) return FALSE; + /* WB10 */ if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_ALETTER || prop_p0 == RE_BREAK_HEBREWLETTER)) return FALSE; /* Don't break within sequences, such as "3.2" or "3,456.789". */ + /* WB11 */ if (prop_m2 == RE_BREAK_NUMERIC && (prop_m1 == RE_BREAK_MIDNUM || prop_m1 == RE_BREAK_MIDNUMLET || prop_m1 == RE_BREAK_SINGLEQUOTE) && prop_p0 == RE_BREAK_NUMERIC) return FALSE; + /* WB12 */ if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_MIDNUM || prop_p0 == RE_BREAK_MIDNUMLET || prop_p0 == RE_BREAK_SINGLEQUOTE) && prop_p1 == RE_BREAK_NUMERIC) return FALSE; /* Don't break between Katakana. */ + /* WB13 */ if (prop_m1 == RE_BREAK_KATAKANA && prop_p0 == RE_BREAK_KATAKANA) return FALSE; /* Don't break from extenders. */ + /* WB13a */ if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER || prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_KATAKANA || prop_m1 == RE_BREAK_EXTENDNUMLET) && prop_p0 == RE_BREAK_EXTENDNUMLET) return FALSE; + /* WB13b */ if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop_p0 == RE_BREAK_ALETTER || prop_p0 == RE_BREAK_HEBREWLETTER || prop_p0 == RE_BREAK_NUMERIC || prop_p0 == RE_BREAK_KATAKANA)) return FALSE; /* Don't break between regional indicator symbols. */ + /* WB13c */ if (prop_m1 == RE_BREAK_REGIONALINDICATOR && prop_p0 == RE_BREAK_REGIONALINDICATOR) return FALSE; /* Otherwise, break everywhere (including around ideographs). */ + /* WB14 */ return TRUE; } -/* Checks whether the current text position is at the start/end of a word. */ +/* Checks whether a position is at the start/end of a word. */ Py_LOCAL_INLINE(BOOL) unicode_at_default_word_start_or_end(RE_State* state, Py_ssize_t text_pos, BOOL at_start) { Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; BOOL before; BOOL after; Py_UCS4 char_0; @@ -1418,14 +1648,11 @@ Py_LOCAL_INLINE(BOOL) unicode_at_default_word_start_or_end(RE_State* state, int prop_m2; char_at = state->char_at; - text = state->text; /* At the start or end of the text. */ if (text_pos <= 0 || text_pos >= state->text_length) { - before = text_pos > 0 && unicode_has_property(RE_PROP_WORD, - char_at(state->text, text_pos - 1)); - after = text_pos < state->text_length && - unicode_has_property(RE_PROP_WORD, char_at(state->text, text_pos)); + before = unicode_word_left(state, text_pos); + after = unicode_word_right(state, text_pos); return before != at_start && after == at_start; } @@ -1457,10 +1684,11 @@ Py_LOCAL_INLINE(BOOL) unicode_at_default_word_start_or_end(RE_State* state, pos_m1 = text_pos - 1; prop_m1 = RE_BREAK_OTHER; while (pos_m1 >= 0) { - char_m1 = char_at(text, pos_m1); + char_m1 = char_at(state->text, pos_m1); prop_m1 = (int)re_get_word_break(char_m1); if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) break; + --pos_m1; } @@ -1474,11 +1702,12 @@ Py_LOCAL_INLINE(BOOL) unicode_at_default_word_start_or_end(RE_State* state, pos_p1 = text_pos + 1; prop_p1 = RE_BREAK_OTHER; while (pos_p1 < state->text_length) { - char_p1 = char_at(text, pos_p1); + char_p1 = char_at(state->text, pos_p1); prop_p1 = (int)re_get_word_break(char_p1); if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT) break; - --pos_p1; + + ++pos_p1; } /* No break letters across certain punctuation. */ @@ -1489,10 +1718,11 @@ Py_LOCAL_INLINE(BOOL) unicode_at_default_word_start_or_end(RE_State* state, pos_m2 = pos_m1 - 1; prop_m2 = RE_BREAK_OTHER; while (pos_m2 >= 0) { - char_m2 = char_at(text, pos_m2); + char_m2 = char_at(state->text, pos_m2); prop_m2 = (int)re_get_word_break(char_m2); if (prop_m2 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) break; + --pos_m2; } @@ -1540,18 +1770,18 @@ Py_LOCAL_INLINE(BOOL) unicode_at_default_word_start_or_end(RE_State* state, return before != at_start && after == at_start; } -/* Checks whether the current text position is at the start of a word. */ +/* Checks whether a position is at the start of a word. */ static BOOL unicode_at_default_word_start(RE_State* state, Py_ssize_t text_pos) { return unicode_at_default_word_start_or_end(state, text_pos, TRUE); } -/* Checks whether the current text position is at the end of a word. */ +/* Checks whether a position is at the end of a word. */ static BOOL unicode_at_default_word_end(RE_State* state, Py_ssize_t text_pos) { return unicode_at_default_word_start_or_end(state, text_pos, FALSE); } -/* Checks whether the current text position is on a grapheme boundary. +/* Checks whether a position is on a grapheme boundary. * * The rules are defined here: * http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries @@ -1559,58 +1789,72 @@ static BOOL unicode_at_default_word_end(RE_State* state, Py_ssize_t text_pos) { static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos) { Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; int prop; int prop_m1; /* Break at the start and end of the text. */ - if (text_pos <= 0 || text_pos >= state->text_length) + /* GB1 */ + if (text_pos <= 0) + return TRUE; + + /* GB2 */ + if (text_pos >= state->text_length) return TRUE; char_at = state->char_at; - text = state->text; - prop = (int)re_get_grapheme_cluster_break(char_at(text, text_pos)); - prop_m1 = (int)re_get_grapheme_cluster_break(char_at(text, text_pos - 1)); + prop = (int)re_get_grapheme_cluster_break(char_at(state->text, text_pos)); + prop_m1 = (int)re_get_grapheme_cluster_break(char_at(state->text, text_pos + - 1)); /* Don't break within CRLF. */ + /* GB3 */ if (prop_m1 == RE_GBREAK_CR && prop == RE_GBREAK_LF) return FALSE; /* Otherwise break before and after controls (including CR and LF). */ + /* GB4 and GB5 */ if (prop_m1 == RE_GBREAK_CONTROL || prop_m1 == RE_GBREAK_CR || prop_m1 == RE_GBREAK_LF || prop == RE_GBREAK_CONTROL || prop == RE_GBREAK_CR || prop == RE_GBREAK_LF) return TRUE; /* Don't break Hangul syllable sequences. */ + /* GB6 */ if (prop_m1 == RE_GBREAK_L && (prop == RE_GBREAK_L || prop == RE_GBREAK_V || prop == RE_GBREAK_LV || prop == RE_GBREAK_LVT)) return FALSE; + /* GB7 */ if ((prop_m1 == RE_GBREAK_LV || prop_m1 == RE_GBREAK_V) && (prop == RE_GBREAK_V || prop == RE_GBREAK_T)) return FALSE; + /* GB8 */ if ((prop_m1 == RE_GBREAK_LVT || prop_m1 == RE_GBREAK_T) && (prop == RE_GBREAK_T)) return FALSE; /* Don't break between regional indicator symbols. */ + /* GB8a */ if (prop_m1 == RE_GBREAK_REGIONALINDICATOR && prop == RE_GBREAK_REGIONALINDICATOR) return FALSE; /* Don't break just before Extend characters. */ + /* GB9 */ if (prop == RE_GBREAK_EXTEND) return FALSE; /* Don't break before SpacingMarks, or after Prepend characters. */ + /* GB9a */ if (prop == RE_GBREAK_SPACINGMARK) return FALSE; + /* GB9b */ if (prop_m1 == RE_GBREAK_PREPEND) return FALSE; /* Otherwise, break everywhere. */ + /* GB10 */ return TRUE; } @@ -1620,25 +1864,28 @@ static BOOL unicode_is_line_sep(Py_UCS4 ch) { 0x2029; } -/* Checks whether the current text position is at the start of a line. */ +/* Checks whether a position is at the start of a line. */ static BOOL unicode_at_line_start(RE_State* state, Py_ssize_t text_pos) { Py_UCS4 ch; - if (text_pos == 0) + if (text_pos <= 0) return TRUE; ch = state->char_at(state->text, text_pos - 1); - if (ch == 0x0D) + if (ch == 0x0D) { + if (text_pos >= state->text_length) + return TRUE; + /* No line break inside CRLF. */ - return text_pos >= state->text_length || state->char_at(state->text, - text_pos) != 0x0A; + return state->char_at(state->text, text_pos) != 0x0A; + } return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == 0x2029; } -/* Checks whether the current text position is at the end of a line. */ +/* Checks whether a position is at the end of a line. */ static BOOL unicode_at_line_end(RE_State* state, Py_ssize_t text_pos) { Py_UCS4 ch; @@ -1647,29 +1894,34 @@ static BOOL unicode_at_line_end(RE_State* state, Py_ssize_t text_pos) { ch = state->char_at(state->text, text_pos); - if (ch == 0x0A) + if (ch == 0x0A) { + if (text_pos <= 0) + return TRUE; + /* No line break inside CRLF. */ - return text_pos >= 1 || state->char_at(state->text, text_pos - 1) != - 0x0D; + return state->char_at(state->text, text_pos - 1) != 0x0D; + } return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == 0x2029; } /* Checks whether a character could be Turkic (variants of I/i). */ -static BOOL unicode_possible_turkic(Py_UCS4 ch) { +static BOOL unicode_possible_turkic(RE_LocaleInfo* locale_info, Py_UCS4 ch) { return ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131; } /* Gets all the cases of a character. */ -static int unicode_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { +static int unicode_all_cases(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + codepoints) { return re_get_all_cases(ch, codepoints); } /* Returns a character with its case folded, unless it could be Turkic * (variants of I/i). */ -static Py_UCS4 unicode_simple_case_fold(Py_UCS4 ch) { +static Py_UCS4 unicode_simple_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch) + { /* Is it a possible Turkic character? If so, pass it through unchanged. */ if (ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131) return ch; @@ -1680,7 +1932,8 @@ static Py_UCS4 unicode_simple_case_fold(Py_UCS4 ch) { /* Returns a character with its case folded, unless it could be Turkic * (variants of I/i). */ -static int unicode_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { +static int unicode_full_case_fold(RE_LocaleInfo* locale_info, Py_UCS4 ch, + Py_UCS4* folded) { /* Is it a possible Turkic character? If so, pass it through unchanged. */ if (ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131) { folded[0] = ch; @@ -1691,7 +1944,8 @@ static int unicode_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { } /* Gets all the case variants of Turkic 'I'. */ -static int unicode_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { +static int unicode_all_turkic_i(RE_LocaleInfo* locale_info, Py_UCS4 ch, + Py_UCS4* cases) { int count; count = 0; @@ -1716,7 +1970,7 @@ static int unicode_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { /* The handlers for Unicode characters. */ static RE_EncodingTable unicode_encoding = { - unicode_has_property, + unicode_has_property_wrapper, unicode_at_boundary, unicode_at_word_start, unicode_at_word_end, @@ -1901,9 +2155,37 @@ Py_LOCAL_INLINE(BOOL) in_range(Py_UCS4 lower, Py_UCS4 upper, Py_UCS4 ch) { return lower <= ch && ch <= upper; } +/* Checks whether a character is in a range, ignoring case. */ +Py_LOCAL_INLINE(BOOL) in_range_ign(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, Py_UCS4 lower, Py_UCS4 upper, Py_UCS4 ch) { + Py_UCS4 cases[RE_MAX_CASES]; + int count; + int i; + + count = encoding->all_cases(locale_info, ch, cases); + + for (i = 0; i < count; i++) { + if (in_range(lower, upper, cases[i])) + return TRUE; + } + + return FALSE; +} + +/* Checks whether 2 characters are the same. */ +Py_LOCAL_INLINE(BOOL) same_char(Py_UCS4 ch1, Py_UCS4 ch2) { + return ch1 == ch2; +} + +/* Wrapper for calling 'same_char' via a pointer. */ +static BOOL same_char_wrapper(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, Py_UCS4 ch1, Py_UCS4 ch2) { + return same_char(ch1, ch2); +} + /* Checks whether 2 characters are the same, ignoring case. */ -static BOOL same_char_ign(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2) - { +Py_LOCAL_INLINE(BOOL) same_char_ign(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, Py_UCS4 ch1, Py_UCS4 ch2) { Py_UCS4 cases[RE_MAX_CASES]; int count; int i; @@ -1911,7 +2193,7 @@ static BOOL same_char_ign(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2) if (ch1 == ch2) return TRUE; - count = encoding->all_cases(ch1, cases); + count = encoding->all_cases(locale_info, ch1, cases); for (i = 1; i < count; i++) { if (cases[i] == ch2) @@ -1921,12 +2203,49 @@ static BOOL same_char_ign(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2) return FALSE; } -/* Checks whether a character has or hasn't the given property, ignoring case. - */ -Py_LOCAL_INLINE(BOOL) has_property_ign(RE_EncodingTable* encoding, RE_CODE - property, Py_UCS4 ch) { +/* Wrapper for calling 'same_char' via a pointer. */ +static BOOL same_char_ign_wrapper(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, Py_UCS4 ch1, Py_UCS4 ch2) { + return same_char_ign(encoding, locale_info, ch1, ch2); +} + +/* Checks whether a character is anything except a newline. */ +Py_LOCAL_INLINE(BOOL) matches_ANY(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + return ch != '\n'; +} + +/* Checks whether a character is anything except a line separator. */ +Py_LOCAL_INLINE(BOOL) matches_ANY_U(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + return !encoding->is_line_sep(ch); +} + +/* Checks whether 2 characters are the same. */ +Py_LOCAL_INLINE(BOOL) matches_CHARACTER(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { + return same_char(node->values[0], ch); +} + +/* Checks whether 2 characters are the same, ignoring case. */ +Py_LOCAL_INLINE(BOOL) matches_CHARACTER_IGN(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { + return same_char_ign(encoding, locale_info, node->values[0], ch); +} + +/* Checks whether a character has a property. */ +Py_LOCAL_INLINE(BOOL) matches_PROPERTY(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { + return encoding->has_property(locale_info, node->values[0], ch); +} + +/* Checks whether a character has a property, ignoring case. */ +Py_LOCAL_INLINE(BOOL) matches_PROPERTY_IGN(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { + RE_UINT32 property; RE_UINT32 prop; + property = node->values[0]; prop = property >> 16; /* We need to do special handling of case-sensitive properties according to @@ -1966,44 +2285,42 @@ Py_LOCAL_INLINE(BOOL) has_property_ign(RE_EncodingTable* encoding, RE_CODE /* We are working with Locale. */ if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property == RE_PROP_GC_LT) - return (isupper(ch) || islower(ch)) != 0; + return locale_isupper(locale_info, ch) || + locale_islower(locale_info, ch); else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) - return (isupper(ch) || islower(ch)) != 0; + return locale_isupper(locale_info, ch) || + locale_islower(locale_info, ch); /* The property is case-insensitive. */ - return locale_has_property(property, ch); + return locale_has_property(locale_info, property, ch); } } +/* Checks whether a character is in a range. */ +Py_LOCAL_INLINE(BOOL) matches_RANGE(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, RE_Node* node, Py_UCS4 ch) { + return in_range(node->values[0], node->values[1], ch); +} + /* Checks whether a character is in a range, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_range_ign(RE_EncodingTable* encoding, Py_UCS4 lower, - Py_UCS4 upper, Py_UCS4 ch) { - Py_UCS4 cases[RE_MAX_CASES]; - int count; - int i; - - count = encoding->all_cases(ch, cases); - - for (i = 0; i < count; i++) { - if (in_range(lower, upper, cases[i])) - return TRUE; - } - - return FALSE; +Py_LOCAL_INLINE(BOOL) matches_RANGE_IGN(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { + return in_range_ign(encoding, locale_info, node->values[0], + node->values[1], ch); } -Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch); -Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch); -Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, RE_Node* - node, Py_UCS4 ch); -Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, RE_Node* node, Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, RE_Node* node, Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, RE_Node* node, Py_UCS4 ch); /* Checks whether a character matches a set member. */ -Py_LOCAL_INLINE(BOOL) matches_member(RE_EncodingTable* encoding, RE_Node* - member, Py_UCS4 ch) { +Py_LOCAL_INLINE(BOOL) matches_member(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, RE_Node* member, Py_UCS4 ch) { switch (member->op) { case RE_OP_CHARACTER: /* values are: char_code */ @@ -2014,7 +2331,7 @@ Py_LOCAL_INLINE(BOOL) matches_member(RE_EncodingTable* encoding, RE_Node* /* values are: property */ TRACE(("%s %d %d\n", re_op_text[member->op], member->match, member->values[0])) - return encoding->has_property(member->values[0], ch); + return encoding->has_property(locale_info, member->values[0], ch); case RE_OP_RANGE: /* values are: lower, upper */ TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match, @@ -2022,16 +2339,16 @@ Py_LOCAL_INLINE(BOOL) matches_member(RE_EncodingTable* encoding, RE_Node* return in_range(member->values[0], member->values[1], ch); case RE_OP_SET_DIFF: TRACE(("%s\n", re_op_text[member->op])) - return in_set_diff(encoding, member, ch); + return in_set_diff(encoding, locale_info, member, ch); case RE_OP_SET_INTER: TRACE(("%s\n", re_op_text[member->op])) - return in_set_inter(encoding, member, ch); + return in_set_inter(encoding, locale_info, member, ch); case RE_OP_SET_SYM_DIFF: TRACE(("%s\n", re_op_text[member->op])) - return in_set_sym_diff(encoding, member, ch); + return in_set_sym_diff(encoding, locale_info, member, ch); case RE_OP_SET_UNION: TRACE(("%s\n", re_op_text[member->op])) - return in_set_union(encoding, member, ch); + return in_set_union(encoding, locale_info, member, ch); case RE_OP_STRING: { /* values are: char_code, char_code, ... */ @@ -2051,8 +2368,9 @@ Py_LOCAL_INLINE(BOOL) matches_member(RE_EncodingTable* encoding, RE_Node* } /* Checks whether a character matches a set member, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_member_ign(RE_EncodingTable* encoding, RE_Node* - member, int case_count, Py_UCS4* cases) { +Py_LOCAL_INLINE(BOOL) matches_member_ign(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* member, int case_count, Py_UCS4* cases) + { int i; for (i = 0; i < case_count; i++) { @@ -2068,7 +2386,8 @@ Py_LOCAL_INLINE(BOOL) matches_member_ign(RE_EncodingTable* encoding, RE_Node* /* values are: property */ TRACE(("%s %d %d\n", re_op_text[member->op], member->match, member->values[0])) - if (encoding->has_property(member->values[0], cases[i])) + if (encoding->has_property(locale_info, member->values[0], + cases[i])) return TRUE; break; case RE_OP_RANGE: @@ -2080,22 +2399,22 @@ Py_LOCAL_INLINE(BOOL) matches_member_ign(RE_EncodingTable* encoding, RE_Node* break; case RE_OP_SET_DIFF: TRACE(("%s\n", re_op_text[member->op])) - if (in_set_diff(encoding, member, cases[i])) + if (in_set_diff(encoding, locale_info, member, cases[i])) return TRUE; break; case RE_OP_SET_INTER: TRACE(("%s\n", re_op_text[member->op])) - if (in_set_inter(encoding, member, cases[i])) + if (in_set_inter(encoding, locale_info, member, cases[i])) return TRUE; break; case RE_OP_SET_SYM_DIFF: TRACE(("%s\n", re_op_text[member->op])) - if (in_set_sym_diff(encoding, member, cases[i])) + if (in_set_sym_diff(encoding, locale_info, member, cases[i])) return TRUE; break; case RE_OP_SET_UNION: TRACE(("%s\n", re_op_text[member->op])) - if (in_set_union(encoding, member, cases[i])) + if (in_set_union(encoding, locale_info, member, cases[i])) return TRUE; break; case RE_OP_STRING: @@ -2119,19 +2438,19 @@ Py_LOCAL_INLINE(BOOL) matches_member_ign(RE_EncodingTable* encoding, RE_Node* } /* Checks whether a character is in a set difference. */ -Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { +Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, RE_Node* node, Py_UCS4 ch) { RE_Node* member; member = node->nonstring.next_2.node; - if (matches_member(encoding, member, ch) != member->match) + if (matches_member(encoding, locale_info, member, ch) != member->match) return FALSE; member = member->next_1.node; while (member) { - if (matches_member(encoding, member, ch) == member->match) + if (matches_member(encoding, locale_info, member, ch) == member->match) return FALSE; member = member->next_1.node; @@ -2141,21 +2460,21 @@ Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_Node* node, } /* Checks whether a character is in a set difference, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_diff_ign(RE_EncodingTable* encoding, RE_Node* - node, int case_count, Py_UCS4* cases) { +Py_LOCAL_INLINE(BOOL) in_set_diff_ign(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, int case_count, Py_UCS4* cases) { RE_Node* member; member = node->nonstring.next_2.node; - if (matches_member_ign(encoding, member, case_count, cases) != + if (matches_member_ign(encoding, locale_info, member, case_count, cases) != member->match) return FALSE; member = member->next_1.node; while (member) { - if (matches_member_ign(encoding, member, case_count, cases) == - member->match) + if (matches_member_ign(encoding, locale_info, member, case_count, + cases) == member->match) return FALSE; member = member->next_1.node; @@ -2165,14 +2484,14 @@ Py_LOCAL_INLINE(BOOL) in_set_diff_ign(RE_EncodingTable* encoding, RE_Node* } /* Checks whether a character is in a set intersection. */ -Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { +Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, RE_Node* node, Py_UCS4 ch) { RE_Node* member; member = node->nonstring.next_2.node; while (member) { - if (matches_member(encoding, member, ch) != member->match) + if (matches_member(encoding, locale_info, member, ch) != member->match) return FALSE; member = member->next_1.node; @@ -2182,15 +2501,15 @@ Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_Node* node, } /* Checks whether a character is in a set intersection, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_inter_ign(RE_EncodingTable* encoding, RE_Node* - node, int case_count, Py_UCS4* cases) { +Py_LOCAL_INLINE(BOOL) in_set_inter_ign(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, int case_count, Py_UCS4* cases) { RE_Node* member; member = node->nonstring.next_2.node; while (member) { - if (matches_member_ign(encoding, member, case_count, cases) != - member->match) + if (matches_member_ign(encoding, locale_info, member, case_count, + cases) != member->match) return FALSE; member = member->next_1.node; @@ -2200,8 +2519,8 @@ Py_LOCAL_INLINE(BOOL) in_set_inter_ign(RE_EncodingTable* encoding, RE_Node* } /* Checks whether a character is in a set symmetric difference. */ -Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, RE_Node* - node, Py_UCS4 ch) { +Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { RE_Node* member; BOOL result; @@ -2210,7 +2529,7 @@ Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, RE_Node* result = FALSE; while (member) { - if (matches_member(encoding, member, ch) == member->match) + if (matches_member(encoding, locale_info, member, ch) == member->match) result = !result; member = member->next_1.node; @@ -2221,8 +2540,8 @@ Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, RE_Node* /* Checks whether a character is in a set symmetric difference, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_sym_diff_ign(RE_EncodingTable* encoding, RE_Node* - node, int case_count, Py_UCS4* cases) { +Py_LOCAL_INLINE(BOOL) in_set_sym_diff_ign(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, int case_count, Py_UCS4* cases) { RE_Node* member; BOOL result; @@ -2231,8 +2550,8 @@ Py_LOCAL_INLINE(BOOL) in_set_sym_diff_ign(RE_EncodingTable* encoding, RE_Node* result = FALSE; while (member) { - if (matches_member_ign(encoding, member, case_count, cases) == - member->match) + if (matches_member_ign(encoding, locale_info, member, case_count, + cases) == member->match) result = !result; member = member->next_1.node; @@ -2242,14 +2561,14 @@ Py_LOCAL_INLINE(BOOL) in_set_sym_diff_ign(RE_EncodingTable* encoding, RE_Node* } /* Checks whether a character is in a set union. */ -Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { +Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, RE_Node* node, Py_UCS4 ch) { RE_Node* member; member = node->nonstring.next_2.node; while (member) { - if (matches_member(encoding, member, ch) == member->match) + if (matches_member(encoding, locale_info, member, ch) == member->match) return TRUE; member = member->next_1.node; @@ -2259,15 +2578,15 @@ Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_Node* node, } /* Checks whether a character is in a set union, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_union_ign(RE_EncodingTable* encoding, RE_Node* - node, int case_count, Py_UCS4* cases) { +Py_LOCAL_INLINE(BOOL) in_set_union_ign(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, int case_count, Py_UCS4* cases) { RE_Node* member; member = node->nonstring.next_2.node; while (member) { - if (matches_member_ign(encoding, member, case_count, cases) == - member->match) + if (matches_member_ign(encoding, locale_info, member, case_count, + cases) == member->match) return TRUE; member = member->next_1.node; @@ -2277,47 +2596,50 @@ Py_LOCAL_INLINE(BOOL) in_set_union_ign(RE_EncodingTable* encoding, RE_Node* } /* Checks whether a character is in a set. */ -Py_LOCAL_INLINE(BOOL) in_set(RE_EncodingTable* encoding, RE_Node* node, Py_UCS4 - ch) { +Py_LOCAL_INLINE(BOOL) matches_SET(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { switch (node->op) { case RE_OP_SET_DIFF: case RE_OP_SET_DIFF_REV: - return in_set_diff(encoding, node, ch); + return in_set_diff(encoding, locale_info, node, ch); case RE_OP_SET_INTER: case RE_OP_SET_INTER_REV: - return in_set_inter(encoding, node, ch); + return in_set_inter(encoding, locale_info, node, ch); case RE_OP_SET_SYM_DIFF: case RE_OP_SET_SYM_DIFF_REV: - return in_set_sym_diff(encoding, node, ch); + return in_set_sym_diff(encoding, locale_info, node, ch); case RE_OP_SET_UNION: case RE_OP_SET_UNION_REV: - return in_set_union(encoding, node, ch); + return in_set_union(encoding, locale_info, node, ch); } return FALSE; } /* Checks whether a character is in a set, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_ign(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { +Py_LOCAL_INLINE(BOOL) matches_SET_IGN(RE_EncodingTable* encoding, + RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) { Py_UCS4 cases[RE_MAX_CASES]; int case_count; - case_count = encoding->all_cases(ch, cases); + case_count = encoding->all_cases(locale_info, ch, cases); switch (node->op) { case RE_OP_SET_DIFF_IGN: case RE_OP_SET_DIFF_IGN_REV: - return in_set_diff_ign(encoding, node, case_count, cases); + return in_set_diff_ign(encoding, locale_info, node, case_count, cases); case RE_OP_SET_INTER_IGN: case RE_OP_SET_INTER_IGN_REV: - return in_set_inter_ign(encoding, node, case_count, cases); + return in_set_inter_ign(encoding, locale_info, node, case_count, + cases); case RE_OP_SET_SYM_DIFF_IGN: case RE_OP_SET_SYM_DIFF_IGN_REV: - return in_set_sym_diff_ign(encoding, node, case_count, cases); + return in_set_sym_diff_ign(encoding, locale_info, node, case_count, + cases); case RE_OP_SET_UNION_IGN: case RE_OP_SET_UNION_IGN_REV: - return in_set_union_ign(encoding, node, case_count, cases); + return in_set_union_ign(encoding, locale_info, node, case_count, + cases); } return FALSE; @@ -2370,6 +2692,7 @@ Py_LOCAL_INLINE(void) init_match(RE_State* state) { /* Clear the counts and cost for matching. */ memset(state->fuzzy_info.counts, 0, sizeof(state->fuzzy_info.counts)); state->fuzzy_info.total_cost = 0; + memset(state->total_fuzzy_counts, 0, sizeof(state->total_fuzzy_counts)); state->total_errors = 0; state->total_cost = 0; state->too_few_errors = FALSE; @@ -2602,11 +2925,8 @@ Py_LOCAL_INLINE(RE_Node*) top_group_return(RE_State* state) { return frame->node; } -/* Checks whether the node is a firstset. */ -Py_LOCAL_INLINE(BOOL) is_firstset(RE_Node* node) { - if (node->step != 0) - return FALSE; - +/* Checks whether a node matches only 1 character. */ +Py_LOCAL_INLINE(BOOL) node_matches_one_character(RE_Node* node) { switch (node->op) { case RE_OP_ANY: case RE_OP_ANY_ALL: @@ -2648,6 +2968,14 @@ Py_LOCAL_INLINE(BOOL) is_firstset(RE_Node* node) { } } +/* Checks whether the node is a firstset. */ +Py_LOCAL_INLINE(BOOL) is_firstset(RE_Node* node) { + if (node->step != 0) + return FALSE; + + return node_matches_one_character(node); +} + /* Locates the start node for testing ahead. */ Py_LOCAL_INLINE(RE_Node*) locate_test_start(RE_Node* node) { for (;;) { @@ -2709,12 +3037,14 @@ Py_LOCAL_INLINE(BOOL) any_case(Py_UCS4 ch, int case_count, Py_UCS4* cases) { return FALSE; } -/* Matches many ANYs. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY(RE_State* state, Py_ssize_t - text_pos, Py_ssize_t limit, BOOL match) { +/* Matches many ANYs, up to a limit. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; + RE_EncodingTable* encoding; text = state->text; + encoding = state->encoding; switch (state->charsize) { case 1: @@ -2725,7 +3055,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY(RE_State* state, Py_ssize_t text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr < limit_ptr && (text_ptr[0] != '\n') == match) + while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) + == match) ++text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -2739,7 +3070,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY(RE_State* state, Py_ssize_t text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr < limit_ptr && (text_ptr[0] != '\n') == match) + while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) + == match) ++text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -2753,7 +3085,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY(RE_State* state, Py_ssize_t text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr < limit_ptr && (text_ptr[0] != '\n') == match) + while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) + == match) ++text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -2764,12 +3097,14 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY(RE_State* state, Py_ssize_t return text_pos; } -/* Matches many ANYs backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_REV(RE_State* state, Py_ssize_t - text_pos, Py_ssize_t limit, BOOL match) { +/* Matches many ANYs, up to a limit, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; + RE_EncodingTable* encoding; text = state->text; + encoding = state->encoding; switch (state->charsize) { case 1: @@ -2780,7 +3115,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_REV(RE_State* state, Py_ssize_t text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr > limit_ptr && (text_ptr[-1] != '\n') == match) + while (text_ptr > limit_ptr && matches_ANY(encoding, node, + text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -2794,7 +3130,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_REV(RE_State* state, Py_ssize_t text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr > limit_ptr && (text_ptr[-1] != '\n') == match) + while (text_ptr > limit_ptr && matches_ANY(encoding, node, + text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -2808,7 +3145,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_REV(RE_State* state, Py_ssize_t text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr > limit_ptr && (text_ptr[-1] != '\n') == match) + while (text_ptr > limit_ptr && matches_ANY(encoding, node, + text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -2819,14 +3157,14 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_REV(RE_State* state, Py_ssize_t return text_pos; } -/* Matches many ANY_Us. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U(RE_State* state, Py_ssize_t - text_pos, Py_ssize_t limit, BOOL match) { - BOOL (*is_line_sep)(Py_UCS4 ch); +/* Matches many ANY_Us, up to a limit. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; + RE_EncodingTable* encoding; - is_line_sep = state->encoding->is_line_sep; text = state->text; + encoding = state->encoding; switch (state->charsize) { case 1: @@ -2837,7 +3175,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U(RE_State* state, Py_ssize_t text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr < limit_ptr && is_line_sep(text_ptr[0]) != match) + while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, + text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -2851,7 +3190,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U(RE_State* state, Py_ssize_t text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr < limit_ptr && is_line_sep(text_ptr[0]) != match) + while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, + text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -2865,7 +3205,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U(RE_State* state, Py_ssize_t text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr < limit_ptr && is_line_sep(text_ptr[0]) != match) + while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, + text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -2876,14 +3217,14 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U(RE_State* state, Py_ssize_t return text_pos; } -/* Matches many ANY_Us backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U_REV(RE_State* state, Py_ssize_t - text_pos, Py_ssize_t limit, BOOL match) { - BOOL (*is_line_sep)(Py_UCS4 ch); +/* Matches many ANY_Us, up to a limit, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; + RE_EncodingTable* encoding; - is_line_sep = state->encoding->is_line_sep; text = state->text; + encoding = state->encoding; switch (state->charsize) { case 1: @@ -2894,7 +3235,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U_REV(RE_State* state, Py_ssize_t text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr > limit_ptr && is_line_sep(text_ptr[-1]) != match) + while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, + text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -2908,7 +3250,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U_REV(RE_State* state, Py_ssize_t text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr > limit_ptr && is_line_sep(text_ptr[-1]) != match) + while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, + text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -2922,7 +3265,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U_REV(RE_State* state, Py_ssize_t text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr > limit_ptr && is_line_sep(text_ptr[-1]) != match) + while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, + text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -2933,7 +3277,7 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U_REV(RE_State* state, Py_ssize_t return text_pos; } -/* Matches many CHARACTERs. */ +/* Matches many CHARACTERs, up to a limit. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; @@ -2991,7 +3335,7 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER(RE_State* state, RE_Node* return text_pos; } -/* Matches many CHARACTERs, ignoring case. */ +/* Matches many CHARACTERs, up to a limit, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; @@ -3000,7 +3344,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN(RE_State* state, RE_Node* text = state->text; match = node->match == match; - case_count = state->encoding->all_cases(node->values[0], cases); + case_count = state->encoding->all_cases(state->locale_info, + node->values[0], cases); switch (state->charsize) { case 1: @@ -3053,7 +3398,7 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN(RE_State* state, RE_Node* return text_pos; } -/* Matches many CHARACTERs backwards, ignoring case. */ +/* Matches many CHARACTERs, up to a limit, backwards, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN_REV(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; @@ -3062,7 +3407,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN_REV(RE_State* state, text = state->text; match = node->match == match; - case_count = state->encoding->all_cases(node->values[0], cases); + case_count = state->encoding->all_cases(state->locale_info, + node->values[0], cases); switch (state->charsize) { case 1: @@ -3115,7 +3461,7 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN_REV(RE_State* state, return text_pos; } -/* Matches many CHARACTERs backwards. */ +/* Matches many CHARACTERs, up to a limit, backwards. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_REV(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; @@ -3173,17 +3519,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_REV(RE_State* state, RE_Node* return text_pos; } -/* Matches many PROPERTYs. */ +/* Matches many PROPERTYs, up to a limit. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; - BOOL (*has_property)(RE_CODE property, Py_UCS4 ch); - RE_CODE property; + RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; text = state->text; match = node->match == match; - has_property = state->encoding->has_property; - property = node->values[0]; + encoding = state->encoding; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3194,8 +3540,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY(RE_State* state, RE_Node* node, text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr < limit_ptr && has_property(property, text_ptr[0]) == - match) + while (text_ptr < limit_ptr && matches_PROPERTY(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3209,8 +3555,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY(RE_State* state, RE_Node* node, text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr < limit_ptr && has_property(property, text_ptr[0]) == - match) + while (text_ptr < limit_ptr && matches_PROPERTY(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3224,8 +3570,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY(RE_State* state, RE_Node* node, text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr < limit_ptr && has_property(property, text_ptr[0]) == - match) + while (text_ptr < limit_ptr && matches_PROPERTY(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3236,17 +3582,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY(RE_State* state, RE_Node* node, return text_pos; } -/* Matches many PROPERTYs, ignoring case. */ +/* Matches many PROPERTYs, up to a limit, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; RE_EncodingTable* encoding; - RE_CODE property; + RE_LocaleInfo* locale_info; text = state->text; match = node->match == match; encoding = state->encoding; - property = node->values[0]; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3257,8 +3603,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN(RE_State* state, RE_Node* text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr < limit_ptr && has_property_ign(encoding, property, - text_ptr[0]) == match) + while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, + locale_info, node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3272,8 +3618,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN(RE_State* state, RE_Node* text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr < limit_ptr && has_property_ign(encoding, property, - text_ptr[0]) == match) + while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, + locale_info, node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3287,8 +3633,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN(RE_State* state, RE_Node* text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr < limit_ptr && has_property_ign(encoding, property, - text_ptr[0]) == match) + while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, + locale_info, node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3299,17 +3645,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN(RE_State* state, RE_Node* return text_pos; } -/* Matches many PROPERTYs backwards, ignoring case. */ +/* Matches many PROPERTYs, up to a limit, backwards, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN_REV(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; RE_EncodingTable* encoding; - RE_CODE property; + RE_LocaleInfo* locale_info; text = state->text; match = node->match == match; encoding = state->encoding; - property = node->values[0]; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3320,8 +3666,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN_REV(RE_State* state, text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr > limit_ptr && has_property_ign(encoding, property, - text_ptr[-1]) == match) + while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, + locale_info, node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3335,8 +3681,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN_REV(RE_State* state, text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr > limit_ptr && has_property_ign(encoding, property, - text_ptr[-1]) == match) + while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, + locale_info, node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3350,8 +3696,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN_REV(RE_State* state, text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr > limit_ptr && has_property_ign(encoding, property, - text_ptr[-1]) == match) + while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, + locale_info, node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3362,17 +3708,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN_REV(RE_State* state, return text_pos; } -/* Matches many PROPERTYs backwards. */ +/* Matches many PROPERTYs, up to a limit, backwards. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_REV(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; - BOOL (*has_property)(RE_CODE property, Py_UCS4 ch); - RE_CODE property; + RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; text = state->text; match = node->match == match; - has_property = state->encoding->has_property; - property = node->values[0]; + encoding = state->encoding; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3383,8 +3729,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr > limit_ptr && has_property(property, text_ptr[-1]) == - match) + while (text_ptr > limit_ptr && matches_PROPERTY(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3398,8 +3744,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr > limit_ptr && has_property(property, text_ptr[-1]) == - match) + while (text_ptr > limit_ptr && matches_PROPERTY(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3413,8 +3759,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr > limit_ptr && has_property(property, text_ptr[-1]) == - match) + while (text_ptr > limit_ptr && matches_PROPERTY(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3425,17 +3771,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_REV(RE_State* state, RE_Node* return text_pos; } -/* Matches many RANGEs. */ +/* Matches many RANGEs, up to a limit. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; - Py_UCS4 lower; - Py_UCS4 upper; + RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; text = state->text; match = node->match == match; - lower = node->values[0]; - upper = node->values[1]; + encoding = state->encoding; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3446,8 +3792,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE(RE_State* state, RE_Node* node, text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr < limit_ptr && in_range(lower, upper, text_ptr[0]) == - match) + while (text_ptr < limit_ptr && matches_RANGE(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3461,8 +3807,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE(RE_State* state, RE_Node* node, text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr < limit_ptr && in_range(lower, upper, text_ptr[0]) == - match) + while (text_ptr < limit_ptr && matches_RANGE(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3476,8 +3822,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE(RE_State* state, RE_Node* node, text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr < limit_ptr && in_range(lower, upper, text_ptr[0]) == - match) + while (text_ptr < limit_ptr && matches_RANGE(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3488,19 +3834,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE(RE_State* state, RE_Node* node, return text_pos; } -/* Matches many RANGEs, ignoring case. */ +/* Matches many RANGEs, up to a limit, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; RE_EncodingTable* encoding; - Py_UCS4 lower; - Py_UCS4 upper; + RE_LocaleInfo* locale_info; text = state->text; - encoding = state->encoding; match = node->match == match; - lower = node->values[0]; - upper = node->values[1]; + encoding = state->encoding; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3511,8 +3855,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN(RE_State* state, RE_Node* text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr < limit_ptr && in_range_ign(encoding, lower, upper, - text_ptr[0]) == match) + while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3526,8 +3870,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN(RE_State* state, RE_Node* text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr < limit_ptr && in_range_ign(encoding, lower, upper, - text_ptr[0]) == match) + while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3541,8 +3885,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN(RE_State* state, RE_Node* text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr < limit_ptr && in_range_ign(encoding, lower, upper, - text_ptr[0]) == match) + while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3553,19 +3897,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN(RE_State* state, RE_Node* return text_pos; } -/* Matches many RANGEs backwards, ignoring case. */ +/* Matches many RANGEs, up to a limit, backwards, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN_REV(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; RE_EncodingTable* encoding; - Py_UCS4 lower; - Py_UCS4 upper; + RE_LocaleInfo* locale_info; text = state->text; - encoding = state->encoding; match = node->match == match; - lower = node->values[0]; - upper = node->values[1]; + encoding = state->encoding; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3576,8 +3918,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr > limit_ptr && in_range_ign(encoding, lower, upper, - text_ptr[-1]) == match) + while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3591,8 +3933,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr > limit_ptr && in_range_ign(encoding, lower, upper, - text_ptr[-1]) == match) + while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3606,8 +3948,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr > limit_ptr && in_range_ign(encoding, lower, upper, - text_ptr[-1]) == match) + while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3618,17 +3960,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN_REV(RE_State* state, RE_Node* return text_pos; } -/* Matches many RANGEs backwards. */ +/* Matches many RANGEs, up to a limit, backwards. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_REV(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; - Py_UCS4 lower; - Py_UCS4 upper; + RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; text = state->text; match = node->match == match; - lower = node->values[0]; - upper = node->values[1]; + encoding = state->encoding; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3639,8 +3981,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr > limit_ptr && in_range(lower, upper, text_ptr[-1]) == - match) + while (text_ptr > limit_ptr && matches_RANGE(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3654,8 +3996,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr > limit_ptr && in_range(lower, upper, text_ptr[-1]) == - match) + while (text_ptr > limit_ptr && matches_RANGE(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3669,8 +4011,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr > limit_ptr && in_range(lower, upper, text_ptr[-1]) == - match) + while (text_ptr > limit_ptr && matches_RANGE(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3681,15 +4023,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_REV(RE_State* state, RE_Node* return text_pos; } -/* Matches many SETs. */ +/* Matches many SETs, up to a limit. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; text = state->text; match = node->match == match; encoding = state->encoding; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3700,8 +4044,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET(RE_State* state, RE_Node* node, text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr < limit_ptr && in_set(encoding, node, text_ptr[0]) == - match) + while (text_ptr < limit_ptr && matches_SET(encoding, locale_info, node, + text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3715,8 +4059,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET(RE_State* state, RE_Node* node, text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr < limit_ptr && in_set(encoding, node, text_ptr[0]) == - match) + while (text_ptr < limit_ptr && matches_SET(encoding, locale_info, node, + text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3730,8 +4074,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET(RE_State* state, RE_Node* node, text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr < limit_ptr && in_set(encoding, node, text_ptr[0]) == - match) + while (text_ptr < limit_ptr && matches_SET(encoding, locale_info, node, + text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3742,15 +4086,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET(RE_State* state, RE_Node* node, return text_pos; } -/* Matches many SETs, ignoring case. */ +/* Matches many SETs, up to a limit, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; text = state->text; match = node->match == match; encoding = state->encoding; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3761,8 +4107,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN(RE_State* state, RE_Node* node, text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr < limit_ptr && in_set_ign(encoding, node, text_ptr[0]) - == match) + while (text_ptr < limit_ptr && matches_SET_IGN(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3776,8 +4122,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN(RE_State* state, RE_Node* node, text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr < limit_ptr && in_set_ign(encoding, node, text_ptr[0]) - == match) + while (text_ptr < limit_ptr && matches_SET_IGN(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3791,8 +4137,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN(RE_State* state, RE_Node* node, text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr < limit_ptr && in_set_ign(encoding, node, text_ptr[0]) - == match) + while (text_ptr < limit_ptr && matches_SET_IGN(encoding, locale_info, + node, text_ptr[0]) == match) ++text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3803,15 +4149,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN(RE_State* state, RE_Node* node, return text_pos; } -/* Matches many SETs backwards, ignoring case. */ +/* Matches many SETs, up to a limit, backwards, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN_REV(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; text = state->text; match = node->match == match; encoding = state->encoding; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3822,8 +4170,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr > limit_ptr && in_set_ign(encoding, node, text_ptr[-1]) - == match) + while (text_ptr > limit_ptr && matches_SET_IGN(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3837,8 +4185,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr > limit_ptr && in_set_ign(encoding, node, text_ptr[-1]) - == match) + while (text_ptr > limit_ptr && matches_SET_IGN(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3852,8 +4200,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN_REV(RE_State* state, RE_Node* text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr > limit_ptr && in_set_ign(encoding, node, text_ptr[-1]) - == match) + while (text_ptr > limit_ptr && matches_SET_IGN(encoding, locale_info, + node, text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3864,15 +4212,17 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN_REV(RE_State* state, RE_Node* return text_pos; } -/* Matches many SETs backwards. */ +/* Matches many SETs, up to a limit, backwards. */ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_REV(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { void* text; RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; text = state->text; match = node->match == match; encoding = state->encoding; + locale_info = state->locale_info; switch (state->charsize) { case 1: @@ -3883,8 +4233,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_REV(RE_State* state, RE_Node* node, text_ptr = (Py_UCS1*)text + text_pos; limit_ptr = (Py_UCS1*)text + limit; - while (text_ptr > limit_ptr && in_set(encoding, node, text_ptr[-1]) == - match) + while (text_ptr > limit_ptr && matches_SET(encoding, locale_info, node, + text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS1*)text; @@ -3898,8 +4248,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_REV(RE_State* state, RE_Node* node, text_ptr = (Py_UCS2*)text + text_pos; limit_ptr = (Py_UCS2*)text + limit; - while (text_ptr > limit_ptr && in_set(encoding, node, text_ptr[-1]) == - match) + while (text_ptr > limit_ptr && matches_SET(encoding, locale_info, node, + text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS2*)text; @@ -3913,8 +4263,8 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_REV(RE_State* state, RE_Node* node, text_ptr = (Py_UCS4*)text + text_pos; limit_ptr = (Py_UCS4*)text + limit; - while (text_ptr > limit_ptr && in_set(encoding, node, text_ptr[-1]) == - match) + while (text_ptr > limit_ptr && matches_SET(encoding, locale_info, node, + text_ptr[-1]) == match) --text_ptr; text_pos = text_ptr - (Py_UCS4*)text; @@ -3927,608 +4277,858 @@ Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_REV(RE_State* state, RE_Node* node, /* Counts a repeated character pattern. */ Py_LOCAL_INLINE(size_t) count_one(RE_State* state, RE_Node* node, Py_ssize_t - text_pos, size_t max_count) { + text_pos, size_t max_count, BOOL* is_partial) { + size_t count; + + *is_partial = FALSE; + if (max_count < 1) return 0; switch (node->op) { case RE_OP_ANY: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return match_many_ANY(state, text_pos, text_pos + max_count, TRUE) - - text_pos; + count = (size_t)(match_many_ANY(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_ANY_ALL: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return max_count; + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_ANY_ALL_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return max_count; + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; case RE_OP_ANY_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return text_pos - match_many_ANY_REV(state, text_pos, text_pos - - max_count, TRUE); + count = (size_t)(text_pos - match_many_ANY_REV(state, node, text_pos, + text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; case RE_OP_ANY_U: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return match_many_ANY_U(state, text_pos, text_pos + max_count, TRUE) - - text_pos; + count = (size_t)(match_many_ANY_U(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_ANY_U_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return text_pos - match_many_ANY_U_REV(state, text_pos, text_pos - - max_count, TRUE); + count = (size_t)(text_pos - match_many_ANY_U_REV(state, node, text_pos, + text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; case RE_OP_CHARACTER: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return match_many_CHARACTER(state, node, text_pos, text_pos + - max_count, TRUE) - text_pos; + count = (size_t)(match_many_CHARACTER(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_CHARACTER_IGN: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return match_many_CHARACTER_IGN(state, node, text_pos, text_pos + - max_count, TRUE) - text_pos; + count = (size_t)(match_many_CHARACTER_IGN(state, node, text_pos, + text_pos + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_CHARACTER_IGN_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return text_pos - match_many_CHARACTER_IGN_REV(state, node, text_pos, - text_pos - max_count, TRUE); + count = (size_t)(text_pos - match_many_CHARACTER_IGN_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; case RE_OP_CHARACTER_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return text_pos - match_many_CHARACTER_REV(state, node, text_pos, - text_pos - max_count, TRUE); + count = (size_t)(text_pos - match_many_CHARACTER_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; case RE_OP_PROPERTY: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return match_many_PROPERTY(state, node, text_pos, text_pos + max_count, - TRUE) - text_pos; + count = (size_t)(match_many_PROPERTY(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_PROPERTY_IGN: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return match_many_PROPERTY_IGN(state, node, text_pos, text_pos + - max_count, TRUE) - text_pos; + count = (size_t)(match_many_PROPERTY_IGN(state, node, text_pos, + text_pos + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_PROPERTY_IGN_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return text_pos - match_many_PROPERTY_IGN_REV(state, node, text_pos, - text_pos - max_count, TRUE); + count = (size_t)(text_pos - match_many_PROPERTY_IGN_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; case RE_OP_PROPERTY_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return text_pos - match_many_PROPERTY_REV(state, node, text_pos, - text_pos - max_count, TRUE); + count = (size_t)(text_pos - match_many_PROPERTY_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; case RE_OP_RANGE: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return match_many_RANGE(state, node, text_pos, text_pos + max_count, - TRUE) - text_pos; + count = (size_t)(match_many_RANGE(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_RANGE_IGN: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return match_many_RANGE_IGN(state, node, text_pos, text_pos + - max_count, TRUE) - text_pos; + count = (size_t)(match_many_RANGE_IGN(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_RANGE_IGN_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return text_pos - match_many_RANGE_IGN_REV(state, node, text_pos, - text_pos - max_count, TRUE); + count = (size_t)(text_pos - match_many_RANGE_IGN_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; case RE_OP_RANGE_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return text_pos - match_many_RANGE_REV(state, node, text_pos, text_pos - - max_count, TRUE); + count = (size_t)(text_pos - match_many_RANGE_REV(state, node, text_pos, + text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; case RE_OP_SET_DIFF: case RE_OP_SET_INTER: case RE_OP_SET_SYM_DIFF: case RE_OP_SET_UNION: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return match_many_SET(state, node, text_pos, text_pos + max_count, - TRUE) - text_pos; + count = (size_t)(match_many_SET(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_SET_DIFF_IGN: case RE_OP_SET_INTER_IGN: case RE_OP_SET_SYM_DIFF_IGN: case RE_OP_SET_UNION_IGN: - max_count = RE_MIN(max_count, (size_t)(state->slice_end - text_pos)); + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - return match_many_SET_IGN(state, node, text_pos, text_pos + max_count, - TRUE) - text_pos; + count = (size_t)(match_many_SET_IGN(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; case RE_OP_SET_DIFF_IGN_REV: case RE_OP_SET_INTER_IGN_REV: case RE_OP_SET_SYM_DIFF_IGN_REV: case RE_OP_SET_UNION_IGN_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return text_pos - match_many_SET_IGN_REV(state, node, text_pos, - text_pos - max_count, TRUE); + count = (size_t)(text_pos - match_many_SET_IGN_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; case RE_OP_SET_DIFF_REV: case RE_OP_SET_INTER_REV: case RE_OP_SET_SYM_DIFF_REV: case RE_OP_SET_UNION_REV: - max_count = RE_MIN(max_count, (size_t)(text_pos - state->slice_start)); + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - return text_pos - match_many_SET_REV(state, node, text_pos, text_pos - - max_count, TRUE); + count = (size_t)(text_pos - match_many_SET_REV(state, node, text_pos, + text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; } return 0; } -/* Tries to match a character pattern. */ -Py_LOCAL_INLINE(BOOL) match_one(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; - - char_at = state->char_at; - text = state->text; - - switch (node->op) { - case RE_OP_ANY: - return text_pos < state->slice_end && char_at(text, text_pos) != '\n'; - case RE_OP_ANY_ALL: - return text_pos < state->slice_end; - case RE_OP_ANY_ALL_REV: - return text_pos > state->slice_start; - case RE_OP_ANY_REV: - return text_pos > state->slice_start && char_at(text, text_pos - 1) != - '\n'; - case RE_OP_ANY_U: - return text_pos < state->slice_end && - !state->encoding->is_line_sep(char_at(text, text_pos)); - case RE_OP_ANY_U_REV: - return text_pos > state->slice_start && - !state->encoding->is_line_sep(char_at(text, text_pos - 1)); - case RE_OP_CHARACTER: - return text_pos < state->slice_end && (char_at(text, text_pos) == - node->values[0]) == node->match; - case RE_OP_CHARACTER_IGN: - return text_pos < state->slice_end && same_char_ign(state->encoding, - char_at(text, text_pos), node->values[0]) == node->match; - case RE_OP_CHARACTER_IGN_REV: - return text_pos > state->slice_start && same_char_ign(state->encoding, - char_at(text, text_pos - 1), node->values[0]) == node->match; - case RE_OP_CHARACTER_REV: - return text_pos > state->slice_start && (char_at(text, text_pos - 1) == - node->values[0]) == node->match; - case RE_OP_PROPERTY: - return text_pos < state->slice_end && - state->encoding->has_property(node->values[0], char_at(text, - text_pos)) == node->match; - case RE_OP_PROPERTY_IGN: - return text_pos < state->slice_end && has_property_ign(state->encoding, - node->values[0], char_at(text, text_pos)) == node->match; - case RE_OP_PROPERTY_IGN_REV: - return text_pos > state->slice_start && - has_property_ign(state->encoding, node->values[0], char_at(text, - text_pos - 1)) == node->match; - case RE_OP_PROPERTY_REV: - return text_pos > state->slice_start && - state->encoding->has_property(node->values[0], char_at(text, text_pos - - 1)) == node->match; - case RE_OP_RANGE: - return text_pos < state->slice_end && in_range(node->values[0], - node->values[1], char_at(text, text_pos)) == node->match; - case RE_OP_RANGE_IGN: - return text_pos < state->slice_end && in_range_ign(state->encoding, - node->values[0], node->values[1], char_at(text, text_pos)) == - node->match; - case RE_OP_RANGE_IGN_REV: - return text_pos > state->slice_start && in_range_ign(state->encoding, - node->values[0], node->values[1], char_at(text, text_pos - 1)) == - node->match; - case RE_OP_RANGE_REV: - return text_pos > state->slice_start && in_range(node->values[0], - node->values[1], char_at(text, text_pos - 1)) == node->match; - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - return text_pos < state->slice_end && in_set(state->encoding, node, - char_at(text, text_pos)) == node->match; - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - return text_pos < state->slice_end && in_set_ign(state->encoding, node, - char_at(text, text_pos)) == node->match; - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - return text_pos > state->slice_start && in_set_ign(state->encoding, - node, char_at(text, text_pos - 1)) == node->match; - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - return text_pos > state->slice_start && in_set(state->encoding, node, - char_at(text, text_pos - 1)) == node->match; - } - - return FALSE; -} - /* Performs a simple string search. */ Py_LOCAL_INLINE(Py_ssize_t) simple_string_search(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit) { - void* text; - size_t length; + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + Py_ssize_t length; RE_CODE* values; - Py_UCS4 first_char; + Py_UCS4 check_char; - text = state->text; - length = node->value_count; + length = (Py_ssize_t)node->value_count; values = node->values; - first_char = values[0]; - limit -= length; + check_char = values[0]; + + *is_partial = FALSE; switch (state->charsize) { case 1: { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; + Py_UCS1* text = (Py_UCS1*)state->text; + Py_UCS1* text_ptr = text + text_pos; + Py_UCS1* limit_ptr = text + limit; - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; + while (text_ptr < limit_ptr) { + if (text_ptr[0] == check_char) { + Py_ssize_t s_pos; - while (text_ptr <= limit_ptr) { - if (text_ptr[0] == first_char) { - size_t pos; + s_pos = 1; - pos = 1; - while (pos < length && text_ptr[pos] == values[pos]) - ++pos; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; - if (pos >= length) - return text_ptr - (Py_UCS1*)text; + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(text_ptr[s_pos], values[s_pos])) + break; + + ++s_pos; + } } ++text_ptr; } + text_pos = text_ptr - text; break; } case 2: { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; + Py_UCS2* text = (Py_UCS2*)state->text; + Py_UCS2* text_ptr = text + text_pos; + Py_UCS2* limit_ptr = text + limit; - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; + while (text_ptr < limit_ptr) { + if (text_ptr[0] == check_char) { + Py_ssize_t s_pos; - while (text_ptr <= limit_ptr) { - if (text_ptr[0] == first_char) { - size_t pos; + s_pos = 1; - pos = 1; - while (pos < length && text_ptr[pos] == values[pos]) - ++pos; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; - if (pos >= length) - return text_ptr - (Py_UCS2*)text; + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(text_ptr[s_pos], values[s_pos])) + break; + + ++s_pos; + } } ++text_ptr; } + text_pos = text_ptr - text; break; } case 4: { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; + Py_UCS4* text = (Py_UCS4*)state->text; + Py_UCS4* text_ptr = text + text_pos; + Py_UCS4* limit_ptr = text + limit; - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; + while (text_ptr < limit_ptr) { + if (text_ptr[0] == check_char) { + Py_ssize_t s_pos; - while (text_ptr <= limit_ptr) { - if (text_ptr[0] == first_char) { - size_t pos; + s_pos = 1; - pos = 1; - while (pos < length && text_ptr[pos] == values[pos]) - ++pos; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; - if (pos >= length) - return text_ptr - (Py_UCS4*)text; + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(text_ptr[s_pos], values[s_pos])) + break; + + ++s_pos; + } } ++text_ptr; } + text_pos = text_ptr - text; break; } } + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_pos; + } + return -1; } /* Performs a simple string search, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_ign(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit) { - void* text; - size_t length; + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + Py_ssize_t length; RE_CODE* values; RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; Py_UCS4 cases[RE_MAX_CASES]; int case_count; - text = state->text; - length = node->value_count; + length = (Py_ssize_t)node->value_count; values = node->values; encoding = state->encoding; - case_count = encoding->all_cases(values[0], cases); - limit -= length; + locale_info = state->locale_info; + case_count = encoding->all_cases(locale_info, values[0], cases); + + *is_partial = FALSE; switch (state->charsize) { case 1: { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; + Py_UCS1* text = (Py_UCS1*)state->text; + Py_UCS1* text_ptr = text + text_pos; + Py_UCS1* limit_ptr = text + limit; - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr <= limit_ptr) { + while (text_ptr < limit_ptr) { if (any_case(text_ptr[0], case_count, cases)) { - size_t pos; + Py_ssize_t s_pos; - pos = 1; - while (pos < length && same_char_ign(encoding, text_ptr[pos], - values[pos])) - ++pos; + s_pos = 1; - if (pos >= length) - return text_ptr - (Py_UCS1*)text; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, locale_info, text_ptr[s_pos], + values[s_pos])) + break; + + ++s_pos; + } } ++text_ptr; } + text_pos = text_ptr - text; break; } case 2: { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; + Py_UCS2* text = (Py_UCS2*)state->text; + Py_UCS2* text_ptr = text + text_pos; + Py_UCS2* limit_ptr = text + limit; - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr <= limit_ptr) { + while (text_ptr < limit_ptr) { if (any_case(text_ptr[0], case_count, cases)) { - size_t pos; + Py_ssize_t s_pos; - pos = 1; - while (pos < length && same_char_ign(encoding, text_ptr[pos], - values[pos])) - ++pos; + s_pos = 1; - if (pos >= length) - return text_ptr - (Py_UCS2*)text; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, locale_info, text_ptr[s_pos], + values[s_pos])) + break; + + ++s_pos; + } } ++text_ptr; } + text_pos = text_ptr - text; break; } case 4: { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; + Py_UCS4* text = (Py_UCS4*)state->text; + Py_UCS4* text_ptr = text + text_pos; + Py_UCS4* limit_ptr = text + limit; - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr <= limit_ptr) { + while (text_ptr < limit_ptr) { if (any_case(text_ptr[0], case_count, cases)) { - size_t pos; + Py_ssize_t s_pos; - pos = 1; - while (pos < length && same_char_ign(encoding, text_ptr[pos], - values[pos])) - ++pos; + s_pos = 1; - if (pos >= length) - return text_ptr - (Py_UCS4*)text; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, locale_info, text_ptr[s_pos], + values[s_pos])) + break; + + ++s_pos; + } } ++text_ptr; } + text_pos = text_ptr - text; break; } } + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_pos; + } + return -1; } -/* Performs a simple string search backwards, ignoring case. */ +/* Performs a simple string search, backwards, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_ign_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { - void* text; - size_t length; + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + Py_ssize_t length; RE_CODE* values; RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; Py_UCS4 cases[RE_MAX_CASES]; int case_count; - text = state->text; - length = node->value_count; + length = (Py_ssize_t)node->value_count; values = node->values; encoding = state->encoding; - case_count = encoding->all_cases(values[0], cases); - text_pos -= length; + locale_info = state->locale_info; + case_count = encoding->all_cases(locale_info, values[length - 1], cases); + + *is_partial = FALSE; switch (state->charsize) { case 1: { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; + Py_UCS1* text = (Py_UCS1*)state->text; + Py_UCS1* text_ptr = text + text_pos; + Py_UCS1* limit_ptr = text + limit; - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; + while (text_ptr > limit_ptr) { + if (any_case(text_ptr[-1], case_count, cases)) { + Py_ssize_t s_pos; - while (text_ptr >= limit_ptr) { - if (any_case(text_ptr[0], case_count, cases)) { - size_t pos; + s_pos = 1; - pos = 1; - while (pos < length && same_char_ign(encoding, text_ptr[pos], - values[pos])) - ++pos; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; - if (pos >= length) - return text_ptr - (Py_UCS1*)text + length; + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, locale_info, text_ptr[- s_pos + - 1], values[length - s_pos - 1])) + break; + + ++s_pos; + } } --text_ptr; } + text_pos = text_ptr - text; break; } case 2: { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; + Py_UCS2* text = (Py_UCS2*)state->text; + Py_UCS2* text_ptr = text + text_pos; + Py_UCS2* limit_ptr = text + limit; - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; + while (text_ptr > limit_ptr) { + if (any_case(text_ptr[-1], case_count, cases)) { + Py_ssize_t s_pos; - while (text_ptr >= limit_ptr) { - if (any_case(text_ptr[0], case_count, cases)) { - size_t pos; + s_pos = 1; - pos = 1; - while (pos < length && same_char_ign(encoding, text_ptr[pos], - values[pos])) - ++pos; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; - if (pos >= length) - return text_ptr - (Py_UCS2*)text + length; + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, locale_info, text_ptr[- s_pos + - 1], values[length - s_pos - 1])) + break; + + ++s_pos; + } } --text_ptr; } + text_pos = text_ptr - text; break; } case 4: { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; + Py_UCS4* text = (Py_UCS4*)state->text; + Py_UCS4* text_ptr = text + text_pos; + Py_UCS4* limit_ptr = text + limit; - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; + while (text_ptr > limit_ptr) { + if (any_case(text_ptr[-1], case_count, cases)) { + Py_ssize_t s_pos; - while (text_ptr >= limit_ptr) { - if (any_case(text_ptr[0], case_count, cases)) { - size_t pos; + s_pos = 1; - pos = 1; - while (pos < length && same_char_ign(encoding, text_ptr[pos], - values[pos])) - ++pos; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; - if (pos >= length) - return text_ptr - (Py_UCS4*)text + length; + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, locale_info, text_ptr[- s_pos + - 1], values[length - s_pos - 1])) + break; + + ++s_pos; + } } --text_ptr; } + text_pos = text_ptr - text; break; } } + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_pos; + } + return -1; } -/* Performs a simple string search backwards. */ +/* Performs a simple string search, backwards. */ Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_rev(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit) { - void* text; - size_t length; + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + Py_ssize_t length; RE_CODE* values; - Py_UCS4 first_char; + Py_UCS4 check_char; - text = state->text; - length = node->value_count; + length = (Py_ssize_t)node->value_count; values = node->values; - first_char = values[0]; - text_pos -= length; + check_char = values[length - 1]; + + *is_partial = FALSE; switch (state->charsize) { case 1: { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; + Py_UCS1* text = (Py_UCS1*)state->text; + Py_UCS1* text_ptr = text + text_pos; + Py_UCS1* limit_ptr = text + limit; - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; + while (text_ptr > limit_ptr) { + if (text_ptr[-1] == check_char) { + Py_ssize_t s_pos; - while (text_ptr >= limit_ptr) { - if (text_ptr[0] == first_char) { - size_t pos; + s_pos = 1; - pos = 1; - while (pos < length && text_ptr[pos] == values[pos]) - ++pos; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; - if (pos >= length) - return text_ptr - (Py_UCS1*)text + length; + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(text_ptr[- s_pos - 1], values[length - s_pos + - 1])) + break; + + ++s_pos; + } } --text_ptr; } + text_pos = text_ptr - text; break; } case 2: { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; + Py_UCS2* text = (Py_UCS2*)state->text; + Py_UCS2* text_ptr = text + text_pos; + Py_UCS2* limit_ptr = text + limit; - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; + while (text_ptr > limit_ptr) { + if (text_ptr[-1] == check_char) { + Py_ssize_t s_pos; - while (text_ptr >= limit_ptr) { - if (text_ptr[0] == first_char) { - size_t pos; + s_pos = 1; - pos = 1; - while (pos < length && text_ptr[pos] == values[pos]) - ++pos; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; - if (pos >= length) - return text_ptr - (Py_UCS2*)text + length; + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(text_ptr[- s_pos - 1], values[length - s_pos + - 1])) + break; + + ++s_pos; + } } --text_ptr; } + text_pos = text_ptr - text; break; } case 4: { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; + Py_UCS4* text = (Py_UCS4*)state->text; + Py_UCS4* text_ptr = text + text_pos; + Py_UCS4* limit_ptr = text + limit; - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; + while (text_ptr > limit_ptr) { + if (text_ptr[-1] == check_char) { + Py_ssize_t s_pos; - while (text_ptr >= limit_ptr) { - if (text_ptr[0] == first_char) { - size_t pos; + s_pos = 1; - pos = 1; - while (pos < length && text_ptr[pos] == values[pos]) - ++pos; + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; - if (pos >= length) - return text_ptr - (Py_UCS4*)text + length; + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(text_ptr[- s_pos - 1], values[length - s_pos + - 1])) + break; + + ++s_pos; + } } --text_ptr; } + text_pos = text_ptr - text; break; } } + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_pos; + } + return -1; } @@ -4569,7 +5169,7 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search(RE_State* state, RE_Node* node, Py_ssize_t pos; pos = last_pos - 1; - while (pos >= 0 && text_ptr[pos] == values[pos]) + while (pos >= 0 && same_char(text_ptr[pos], values[pos])) --pos; if (pos < 0) @@ -4597,7 +5197,7 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search(RE_State* state, RE_Node* node, Py_ssize_t pos; pos = last_pos - 1; - while (pos >= 0 && text_ptr[pos] == values[pos]) + while (pos >= 0 && same_char(text_ptr[pos], values[pos])) --pos; if (pos < 0) @@ -4625,7 +5225,7 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search(RE_State* state, RE_Node* node, Py_ssize_t pos; pos = last_pos - 1; - while (pos >= 0 && text_ptr[pos] == values[pos]) + while (pos >= 0 && same_char(text_ptr[pos], values[pos])) --pos; if (pos < 0) @@ -4646,6 +5246,7 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search(RE_State* state, RE_Node* node, Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; void* text; Py_ssize_t length; RE_CODE* values; @@ -4656,13 +5257,14 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign(RE_State* state, RE_Node* int case_count; encoding = state->encoding; + locale_info = state->locale_info; text = state->text; length = (Py_ssize_t)node->value_count; values = node->values; good_suffix_offset = node->string.good_suffix_offset; bad_character_offset = node->string.bad_character_offset; last_pos = length - 1; - case_count = encoding->all_cases(values[last_pos], cases); + case_count = encoding->all_cases(locale_info, values[last_pos], cases); limit -= length; switch (state->charsize) { @@ -4682,8 +5284,8 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign(RE_State* state, RE_Node* Py_ssize_t pos; pos = last_pos - 1; - while (pos >= 0 && same_char_ign(encoding, - text_ptr[pos], values[pos])) + while (pos >= 0 && same_char_ign(encoding, locale_info, + text_ptr[pos], values[pos])) --pos; if (pos < 0) @@ -4711,8 +5313,8 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign(RE_State* state, RE_Node* Py_ssize_t pos; pos = last_pos - 1; - while (pos >= 0 && same_char_ign(encoding, - text_ptr[pos], values[pos])) + while (pos >= 0 && same_char_ign(encoding, locale_info, + text_ptr[pos], values[pos])) --pos; if (pos < 0) @@ -4740,8 +5342,8 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign(RE_State* state, RE_Node* Py_ssize_t pos; pos = last_pos - 1; - while (pos >= 0 && same_char_ign(encoding, - text_ptr[pos], values[pos])) + while (pos >= 0 && same_char_ign(encoding, locale_info, + text_ptr[pos], values[pos])) --pos; if (pos < 0) @@ -4758,10 +5360,11 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign(RE_State* state, RE_Node* return -1; } -/* Performs a Boyer-Moore fast string search backwards, ignoring case. */ +/* Performs a Boyer-Moore fast string search, backwards, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign_rev(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; void* text; Py_ssize_t length; RE_CODE* values; @@ -4771,12 +5374,13 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign_rev(RE_State* state, int case_count; encoding = state->encoding; + locale_info = state->locale_info; text = state->text; length = (Py_ssize_t)node->value_count; values = node->values; good_suffix_offset = node->string.good_suffix_offset; bad_character_offset = node->string.bad_character_offset; - case_count = encoding->all_cases(values[0], cases); + case_count = encoding->all_cases(locale_info, values[0], cases); text_pos -= length; switch (state->charsize) { @@ -4796,8 +5400,8 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign_rev(RE_State* state, Py_ssize_t pos; pos = 1; - while (pos < length && same_char_ign(encoding, - text_ptr[pos], values[pos])) + while (pos < length && same_char_ign(encoding, locale_info, + text_ptr[pos], values[pos])) ++pos; if (pos >= length) @@ -4825,8 +5429,8 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign_rev(RE_State* state, Py_ssize_t pos; pos = 1; - while (pos < length && same_char_ign(encoding, - text_ptr[pos], values[pos])) + while (pos < length && same_char_ign(encoding, locale_info, + text_ptr[pos], values[pos])) ++pos; if (pos >= length) @@ -4854,8 +5458,8 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign_rev(RE_State* state, Py_ssize_t pos; pos = 1; - while (pos < length && same_char_ign(encoding, - text_ptr[pos], values[pos])) + while (pos < length && same_char_ign(encoding, locale_info, + text_ptr[pos], values[pos])) ++pos; if (pos >= length) @@ -4872,7 +5476,7 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign_rev(RE_State* state, return -1; } -/* Performs a Boyer-Moore fast string search backwards. */ +/* Performs a Boyer-Moore fast string search, backwards. */ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_rev(RE_State* state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { void* text; @@ -4907,7 +5511,7 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_rev(RE_State* state, RE_Node* Py_ssize_t pos; pos = 1; - while (pos < length && text_ptr[pos] == values[pos]) + while (pos < length && same_char(text_ptr[pos], values[pos])) ++pos; if (pos >= length) @@ -4935,7 +5539,7 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_rev(RE_State* state, RE_Node* Py_ssize_t pos; pos = 1; - while (pos < length && text_ptr[pos] == values[pos]) + while (pos < length && same_char(text_ptr[pos], values[pos])) ++pos; if (pos >= length) @@ -4963,7 +5567,7 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_rev(RE_State* state, RE_Node* Py_ssize_t pos; pos = 1; - while (pos < length && text_ptr[pos] == values[pos]) + while (pos < length && same_char(text_ptr[pos], values[pos])) ++pos; if (pos >= length) @@ -4980,14 +5584,9 @@ Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_rev(RE_State* state, RE_Node* return -1; } -/* Check whether 2 characters are the same. */ -static BOOL same_char(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2) { - return ch1 == ch2; -} - -/* Build the tables for a Boyer-Moore fast string search. */ -Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* - node, BOOL ignore) { +/* Builds the tables for a Boyer-Moore fast string search. */ +Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_State* state, RE_Node* node, BOOL + ignore) { Py_ssize_t length; RE_CODE* values; Py_ssize_t* bad; @@ -4995,7 +5594,8 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* Py_UCS4 ch; Py_ssize_t last_pos; Py_ssize_t pos; - BOOL (*is_same_char)(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2); + BOOL (*is_same_char)(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, Py_UCS4 ch1, Py_UCS4 ch2); Py_ssize_t suffix_len; BOOL saved_start; Py_ssize_t s; @@ -5010,7 +5610,7 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* values = node->values; bad = (Py_ssize_t*)re_alloc(256 * sizeof(bad[0])); - good = (Py_ssize_t*)re_alloc(length * sizeof(good[0])); + good = (Py_ssize_t*)re_alloc((size_t)length * sizeof(good[0])); if (!bad || !good) { re_dealloc(bad); @@ -5033,7 +5633,8 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* int count; int i; - count = encoding->all_cases(ch, codepoints); + count = state->encoding->all_cases(state->locale_info, ch, + codepoints); for (i = 0; i < count; i++) bad[codepoints[i] & 0xFF] = offset; @@ -5041,7 +5642,7 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* bad[ch & 0xFF] = offset; } - is_same_char = ignore ? same_char_ign : same_char; + is_same_char = ignore ? same_char_ign_wrapper : same_char_wrapper; suffix_len = 2; pos = length - suffix_len; @@ -5049,6 +5650,7 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* s = pos - 1; i = suffix_len - 1; s_start = s; + while (pos >= 0) { /* Look for another occurrence of the suffix. */ while (i > 0) { @@ -5056,7 +5658,8 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* if (s + i < 0) break; - if (is_same_char(encoding, values[s + i], values[pos + i])) + if (is_same_char(state->encoding, state->locale_info, values[s + + i], values[pos + i])) /* It still matches. */ --i; else { @@ -5066,7 +5669,8 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* } } - if (s >= 0 && is_same_char(encoding, values[s], values[pos])) { + if (s >= 0 && is_same_char(state->encoding, state->locale_info, + values[s], values[pos])) { /* We haven't dropped off the end of the string, and the suffix has * matched this far, so this is a good starting point for the next * iteration. @@ -5112,9 +5716,9 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* return TRUE; } -/* Build the tables for a Boyer-Moore fast string search backwards. */ -Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, - RE_Node* node, BOOL ignore) { +/* Builds the tables for a Boyer-Moore fast string search, backwards. */ +Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_State* state, RE_Node* node, + BOOL ignore) { Py_ssize_t length; RE_CODE* values; Py_ssize_t* bad; @@ -5122,7 +5726,8 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, Py_UCS4 ch; Py_ssize_t last_pos; Py_ssize_t pos; - BOOL (*is_same_char)(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2); + BOOL (*is_same_char)(RE_EncodingTable* encoding, RE_LocaleInfo* + locale_info, Py_UCS4 ch1, Py_UCS4 ch2); Py_ssize_t suffix_len; BOOL saved_start; Py_ssize_t s; @@ -5137,7 +5742,7 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, values = node->values; bad = (Py_ssize_t*)re_alloc(256 * sizeof(bad[0])); - good = (Py_ssize_t*)re_alloc(length * sizeof(good[0])); + good = (Py_ssize_t*)re_alloc((size_t)length * sizeof(good[0])); if (!bad || !good) { re_dealloc(bad); @@ -5160,7 +5765,8 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, int count; int i; - count = encoding->all_cases(ch, codepoints); + count = state->encoding->all_cases(state->locale_info, ch, + codepoints); for (i = 0; i < count; i++) bad[codepoints[i] & 0xFF] = offset; @@ -5168,7 +5774,7 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, bad[ch & 0xFF] = offset; } - is_same_char = ignore ? same_char_ign : same_char; + is_same_char = ignore ? same_char_ign_wrapper : same_char_wrapper; suffix_len = 2; pos = suffix_len - 1; @@ -5176,6 +5782,7 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, s = pos + 1; i = suffix_len - 1; s_start = s; + while (pos < length) { /* Look for another occurrence of the suffix. */ while (i > 0) { @@ -5183,7 +5790,8 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, if (s - i >= length) break; - if (is_same_char(encoding, values[s - i], values[pos - i])) + if (is_same_char(state->encoding, state->locale_info, values[s - + i], values[pos - i])) /* It still matches. */ --i; else { @@ -5193,7 +5801,8 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, } } - if (s < length && is_same_char(encoding, values[s], values[pos])) { + if (s < length && is_same_char(state->encoding, state->locale_info, + values[s], values[pos])) { /* We haven't dropped off the end of the string, and the suffix has * matched this far, so this is a good starting point for the next * iteration. @@ -5241,15 +5850,13 @@ Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, /* Performs a string search. */ Py_LOCAL_INLINE(Py_ssize_t) string_search(RE_SafeState* safe_state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit) { + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { RE_State* state; Py_ssize_t found_pos; state = safe_state->re_state; - /* Can the string fit the available space? */ - if (text_pos + (Py_ssize_t)node->value_count > limit) - return -1; + *is_partial = FALSE; /* Has the node been initialised for fast searching, if necessary? */ if (!(node->status & RE_STATUS_FAST_INIT)) { @@ -5260,171 +5867,176 @@ Py_LOCAL_INLINE(Py_ssize_t) string_search(RE_SafeState* safe_state, RE_Node* /* Double-check because of multithreading. */ if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables(state->encoding, node, FALSE); + build_fast_tables(state, node, FALSE); node->status |= RE_STATUS_FAST_INIT; } release_GIL(safe_state); } - if (node->string.bad_character_offset) + if (node->string.bad_character_offset) { + /* Start with a fast search. This will find the string if it's complete + * (i.e. not truncated). + */ found_pos = fast_string_search(state, node, text_pos, limit); - else - found_pos = simple_string_search(state, node, text_pos, limit); + if (found_pos < 0 && state->partial_side == RE_PARTIAL_RIGHT) + /* We didn't find the string, but it could've been truncated, so + * try again, starting close to the end. + */ + found_pos = simple_string_search(state, node, limit - + (Py_ssize_t)(node->value_count - 1), limit, is_partial); + } else + found_pos = simple_string_search(state, node, text_pos, limit, + is_partial); return found_pos; } /* Performs a string search, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) string_search_fld(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos) { + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos, + BOOL* is_partial) { RE_State* state; RE_EncodingTable* encoding; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + RE_LocaleInfo* locale_info; + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + folded); Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); void* text; RE_CODE* values; Py_ssize_t start_pos; - int folded_pos; + int f_pos; int folded_len; Py_ssize_t length; - Py_ssize_t string_pos; + Py_ssize_t s_pos; Py_UCS4 folded[RE_MAX_FOLDED]; - int case_count; - Py_UCS4 cases[RE_MAX_CASES]; state = safe_state->re_state; encoding = state->encoding; + locale_info = state->locale_info; full_case_fold = encoding->full_case_fold; char_at = state->char_at; text = state->text; values = node->values; start_pos = text_pos; - folded_pos = 0; + f_pos = 0; folded_len = 0; - length = node->value_count; - string_pos = 0; + length = (Py_ssize_t)node->value_count; + s_pos = 0; - /* We'll special-case the first character of the string. */ - case_count = encoding->all_cases(values[0], cases); + *is_partial = FALSE; + + while (s_pos < length || f_pos < folded_len) { + if (f_pos >= folded_len) { + /* Fetch and casefold another character. */ + if (text_pos >= limit) { + if (text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) { + *is_partial = TRUE; + return start_pos; + } - while (string_pos < length || folded_pos < folded_len) { - if (folded_pos >= folded_len) { -fetch: - if (text_pos >= limit) return -1; - - folded_len = full_case_fold(char_at(text, text_pos), folded); - folded_pos = 0; - } - - if (string_pos == 0) { - int i; - - for (i = 0; i < case_count; i++) { - if (folded[0] == cases[i]) - goto match; } - ++start_pos; - text_pos = start_pos; - goto fetch; - } else if (same_char_ign(encoding, values[string_pos], - folded[folded_pos])) { -match: - ++string_pos; - ++folded_pos; + folded_len = full_case_fold(locale_info, char_at(text, text_pos), + folded); + f_pos = 0; + } - if (folded_pos >= folded_len) + if (same_char_ign(encoding, locale_info, values[s_pos], folded[f_pos])) + { + ++s_pos; + ++f_pos; + + if (f_pos >= folded_len) ++text_pos; } else { ++start_pos; text_pos = start_pos; - folded_pos = 0; + f_pos = 0; folded_len = 0; - string_pos = 0; + s_pos = 0; } } + /* We found the string. */ if (new_pos) *new_pos = text_pos; return start_pos; } -/* Performs a string search backwards, ignoring case. */ +/* Performs a string search, backwards, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) string_search_fld_rev(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos) { + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos, + BOOL* is_partial) { RE_State* state; RE_EncodingTable* encoding; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + RE_LocaleInfo* locale_info; + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + folded); Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); void* text; RE_CODE* values; Py_ssize_t start_pos; - int folded_pos; + int f_pos; int folded_len; Py_ssize_t length; - Py_ssize_t string_pos; + Py_ssize_t s_pos; Py_UCS4 folded[RE_MAX_FOLDED]; - int case_count; - Py_UCS4 cases[RE_MAX_CASES]; state = safe_state->re_state; encoding = state->encoding; + locale_info = state->locale_info; full_case_fold = encoding->full_case_fold; char_at = state->char_at; text = state->text; values = node->values; start_pos = text_pos; - folded_pos = 0; + f_pos = 0; folded_len = 0; - length = node->value_count; - string_pos = length; + length = (Py_ssize_t)node->value_count; + s_pos = 0; - /* We'll special-case the last character of the string. */ - case_count = encoding->all_cases(values[length - 1], cases); + *is_partial = FALSE; + + while (s_pos < length || f_pos < folded_len) { + if (f_pos >= folded_len) { + /* Fetch and casefold another character. */ + if (text_pos <= limit) { + if (text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) { + *is_partial = TRUE; + return start_pos; + } - while (string_pos > 0 || folded_pos > 0) { - if (folded_pos <= 0) { -fetch: - if (text_pos <= limit) return -1; - - folded_len = full_case_fold(char_at(text, text_pos - 1), folded); - folded_pos = folded_len; - } - - if (string_pos == length) { - int i; - - for (i = 0; i < case_count; i++) { - if (folded[folded_len - 1] == cases[i]) - goto match; } - --start_pos; - text_pos = start_pos; - goto fetch; - } else if (same_char_ign(encoding, values[string_pos - 1], - folded[folded_pos - 1])) { -match: - --string_pos; - --folded_pos; + folded_len = full_case_fold(locale_info, char_at(text, text_pos - + 1), folded); + f_pos = 0; + } - if (folded_pos <= 0) + if (same_char_ign(encoding, locale_info, values[length - s_pos - 1], + folded[folded_len - f_pos - 1])) { + ++s_pos; + ++f_pos; + + if (f_pos >= folded_len) --text_pos; } else { --start_pos; text_pos = start_pos; - folded_pos = 0; + f_pos = 0; folded_len = 0; - string_pos = length; + s_pos = 0; } } + /* We found the string. */ if (new_pos) *new_pos = text_pos; @@ -5433,15 +6045,13 @@ match: /* Performs a string search, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) string_search_ign(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { RE_State* state; Py_ssize_t found_pos; state = safe_state->re_state; - /* Can the string fit the available space? */ - if (text_pos + (Py_ssize_t)node->value_count > limit) - return -1; + *is_partial = FALSE; /* Has the node been initialised for fast searching, if necessary? */ if (!(node->status & RE_STATUS_FAST_INIT)) { @@ -5452,32 +6062,40 @@ Py_LOCAL_INLINE(Py_ssize_t) string_search_ign(RE_SafeState* safe_state, /* Double-check because of multithreading. */ if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables(state->encoding, node, TRUE); + build_fast_tables(state, node, TRUE); node->status |= RE_STATUS_FAST_INIT; } release_GIL(safe_state); } - if (node->string.bad_character_offset) + if (node->string.bad_character_offset) { + /* Start with a fast search. This will find the string if it's complete + * (i.e. not truncated). + */ found_pos = fast_string_search_ign(state, node, text_pos, limit); - else - found_pos = simple_string_search_ign(state, node, text_pos, limit); + if (found_pos < 0 && state->partial_side == RE_PARTIAL_RIGHT) + /* We didn't find the string, but it could've been truncated, so + * try again, starting close to the end. + */ + found_pos = simple_string_search_ign(state, node, limit - + (Py_ssize_t)(node->value_count - 1), limit, is_partial); + } else + found_pos = simple_string_search_ign(state, node, text_pos, limit, + is_partial); return found_pos; } -/* Performs a string search backwards, ignoring case. */ +/* Performs a string search, backwards, ignoring case. */ Py_LOCAL_INLINE(Py_ssize_t) string_search_ign_rev(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { RE_State* state; Py_ssize_t found_pos; state = safe_state->re_state; - /* Can the string fit the available space? */ - if (text_pos - (Py_ssize_t)node->value_count < limit) - return -1; + *is_partial = FALSE; /* Has the node been initialised for fast searching, if necessary? */ if (!(node->status & RE_STATUS_FAST_INIT)) { @@ -5488,32 +6106,40 @@ Py_LOCAL_INLINE(Py_ssize_t) string_search_ign_rev(RE_SafeState* safe_state, /* Double-check because of multithreading. */ if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables_rev(state->encoding, node, TRUE); + build_fast_tables_rev(state, node, TRUE); node->status |= RE_STATUS_FAST_INIT; } release_GIL(safe_state); } - if (node->string.bad_character_offset) + if (node->string.bad_character_offset) { + /* Start with a fast search. This will find the string if it's complete + * (i.e. not truncated). + */ found_pos = fast_string_search_ign_rev(state, node, text_pos, limit); - else - found_pos = simple_string_search_ign_rev(state, node, text_pos, limit); + if (found_pos < 0 && state->partial_side == RE_PARTIAL_LEFT) + /* We didn't find the string, but it could've been truncated, so + * try again, starting close to the end. + */ + found_pos = simple_string_search_ign_rev(state, node, limit + + (Py_ssize_t)(node->value_count - 1), limit, is_partial); + } else + found_pos = simple_string_search_ign_rev(state, node, text_pos, limit, + is_partial); return found_pos; } -/* Performs a string search backwards. */ +/* Performs a string search, backwards. */ Py_LOCAL_INLINE(Py_ssize_t) string_search_rev(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { RE_State* state; Py_ssize_t found_pos; state = safe_state->re_state; - /* Can the string fit the available space? */ - if (text_pos - (Py_ssize_t)node->value_count < limit) - return -1; + *is_partial = FALSE; /* Has the node been initialised for fast searching, if necessary? */ if (!(node->status & RE_STATUS_FAST_INIT)) { @@ -5524,23 +6150,33 @@ Py_LOCAL_INLINE(Py_ssize_t) string_search_rev(RE_SafeState* safe_state, /* Double-check because of multithreading. */ if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables_rev(state->encoding, node, FALSE); + build_fast_tables_rev(state, node, FALSE); node->status |= RE_STATUS_FAST_INIT; } release_GIL(safe_state); } - if (node->string.bad_character_offset) + if (node->string.bad_character_offset) { + /* Start with a fast search. This will find the string if it's complete + * (i.e. not truncated). + */ found_pos = fast_string_search_rev(state, node, text_pos, limit); - else - found_pos = simple_string_search_rev(state, node, text_pos, limit); + if (found_pos < 0 && state->partial_side == RE_PARTIAL_LEFT) + /* We didn't find the string, but it could've been truncated, so + * try again, starting close to the end. + */ + found_pos = simple_string_search_rev(state, node, limit + + (Py_ssize_t)(node->value_count - 1), limit, is_partial); + } else + found_pos = simple_string_search_rev(state, node, text_pos, limit, + is_partial); return found_pos; } /* Returns how many characters there could be before full case-folding. */ -Py_LOCAL_INLINE(size_t) possible_unfolded_length(size_t length) { +Py_LOCAL_INLINE(Py_ssize_t) possible_unfolded_length(Py_ssize_t length) { if (length == 0) return 0; @@ -5550,477 +6186,1449 @@ Py_LOCAL_INLINE(size_t) possible_unfolded_length(size_t length) { return length / RE_MAX_FOLDED; } +/* Checks whether there's any character except a newline at a position. */ +Py_LOCAL_INLINE(int) try_match_ANY(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_ANY(state->encoding, node, state->char_at(state->text, + text_pos))); +} + +/* Checks whether there's any character at all at a position. */ +Py_LOCAL_INLINE(int) try_match_ANY_ALL(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end); +} + +/* Checks whether there's any character at all at a position, backwards. */ +Py_LOCAL_INLINE(int) try_match_ANY_ALL_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start); +} + +/* Checks whether there's any character except a newline at a position, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_ANY_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_ANY(state->encoding, node, state->char_at(state->text, text_pos - + 1))); +} + +/* Checks whether there's any character except a line separator at a position. + */ +Py_LOCAL_INLINE(int) try_match_ANY_U(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_ANY_U(state->encoding, node, state->char_at(state->text, + text_pos))); +} + +/* Checks whether there's any character except a line separator at a position, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_ANY_U_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_ANY_U(state->encoding, node, state->char_at(state->text, text_pos + - 1))); +} + +/* Checks whether a position is on a word boundary. */ +Py_LOCAL_INLINE(int) try_match_BOUNDARY(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_boundary(state, text_pos) == + node->match); +} + +/* Checks whether there's a character at a position. */ +Py_LOCAL_INLINE(int) try_match_CHARACTER(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_CHARACTER(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos)) == node->match); +} + +/* Checks whether there's a character at a position, ignoring case. */ +Py_LOCAL_INLINE(int) try_match_CHARACTER_IGN(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_CHARACTER_IGN(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos)) == node->match); +} + +/* Checks whether there's a character at a position, ignoring case, backwards. + */ +Py_LOCAL_INLINE(int) try_match_CHARACTER_IGN_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_CHARACTER_IGN(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos - 1)) == node->match); +} + +/* Checks whether there's a character at a position, backwards. */ +Py_LOCAL_INLINE(int) try_match_CHARACTER_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_CHARACTER(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos - 1)) == node->match); +} + +/* Checks whether a position is on a default word boundary. */ +Py_LOCAL_INLINE(int) try_match_DEFAULT_BOUNDARY(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_default_boundary(state, text_pos) + == node->match); +} + +/* Checks whether a position is at the default end of a word. */ +Py_LOCAL_INLINE(int) try_match_DEFAULT_END_OF_WORD(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_default_word_end(state, + text_pos)); +} + +/* Checks whether a position is at the default start of a word. */ +Py_LOCAL_INLINE(int) try_match_DEFAULT_START_OF_WORD(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_default_word_start(state, + text_pos)); +} + +/* Checks whether a position is at the end of a line. */ +Py_LOCAL_INLINE(int) try_match_END_OF_LINE(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(text_pos >= state->slice_end || + state->char_at(state->text, text_pos) == '\n'); +} + +/* Checks whether a position is at the end of a line. */ +Py_LOCAL_INLINE(int) try_match_END_OF_LINE_U(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_line_end(state, text_pos)); +} + +/* Checks whether a position is at the end of the string. */ +Py_LOCAL_INLINE(int) try_match_END_OF_STRING(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(text_pos >= state->text_length); +} + +/* Checks whether a position is at the end of a line or the string. */ +Py_LOCAL_INLINE(int) try_match_END_OF_STRING_LINE(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + return bool_as_status(text_pos >= state->text_length || text_pos == + state->final_newline); +} + +/* Checks whether a position is at the end of the string. */ +Py_LOCAL_INLINE(int) try_match_END_OF_STRING_LINE_U(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + return bool_as_status(text_pos >= state->text_length || text_pos == + state->final_line_sep); +} + +/* Checks whether a position is at the end of a word. */ +Py_LOCAL_INLINE(int) try_match_END_OF_WORD(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_word_end(state, text_pos)); +} + +/* Checks whether a position is on a grapheme boundary. */ +Py_LOCAL_INLINE(int) try_match_GRAPHEME_BOUNDARY(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_grapheme_boundary(state, + text_pos)); +} + +/* Checks whether there's a character with a certain property at a position. */ +Py_LOCAL_INLINE(int) try_match_PROPERTY(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_PROPERTY(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos)) == node->match); +} + +/* Checks whether there's a character with a certain property at a position, + * ignoring case. + */ +Py_LOCAL_INLINE(int) try_match_PROPERTY_IGN(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_PROPERTY_IGN(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos)) == node->match); +} + +/* Checks whether there's a character with a certain property at a position, + * ignoring case, backwards. + */ +Py_LOCAL_INLINE(int) try_match_PROPERTY_IGN_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_PROPERTY_IGN(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos - 1)) == node->match); +} + +/* Checks whether there's a character with a certain property at a position, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_PROPERTY_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_PROPERTY(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos - 1)) == node->match); +} + +/* Checks whether there's a character in a certain range at a position. */ +Py_LOCAL_INLINE(int) try_match_RANGE(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_RANGE(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos)) == node->match); +} + +/* Checks whether there's a character in a certain range at a position, + * ignoring case. + */ +Py_LOCAL_INLINE(int) try_match_RANGE_IGN(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_RANGE_IGN(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos)) == node->match); +} + +/* Checks whether there's a character in a certain range at a position, + * ignoring case, backwards. + */ +Py_LOCAL_INLINE(int) try_match_RANGE_IGN_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_RANGE_IGN(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos - 1)) == node->match); +} + +/* Checks whether there's a character in a certain range at a position, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_RANGE_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_RANGE(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos - 1)) == node->match); +} + +/* Checks whether a position is at the search anchor. */ +Py_LOCAL_INLINE(int) try_match_SEARCH_ANCHOR(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(text_pos == state->search_anchor); +} + +/* Checks whether there's a character in a certain set at a position. */ +Py_LOCAL_INLINE(int) try_match_SET(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_SET(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos)) == node->match); +} + +/* Checks whether there's a character in a certain set at a position, ignoring + * case. + */ +Py_LOCAL_INLINE(int) try_match_SET_IGN(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_SET_IGN(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos)) == node->match); +} + +/* Checks whether there's a character in a certain set at a position, ignoring + * case, backwards. + */ +Py_LOCAL_INLINE(int) try_match_SET_IGN_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_SET_IGN(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos - 1)) == node->match); +} + +/* Checks whether there's a character in a certain set at a position, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_SET_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_SET(state->encoding, state->locale_info, node, + state->char_at(state->text, text_pos - 1)) == node->match); +} + +/* Checks whether a position is at the start of a line. */ +Py_LOCAL_INLINE(int) try_match_START_OF_LINE(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(text_pos <= 0 || state->char_at(state->text, text_pos + - 1) == '\n'); +} + +/* Checks whether a position is at the start of a line. */ +Py_LOCAL_INLINE(int) try_match_START_OF_LINE_U(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_line_start(state, text_pos)); +} + +/* Checks whether a position is at the start of the string. */ +Py_LOCAL_INLINE(int) try_match_START_OF_STRING(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(text_pos <= 0); +} + +/* Checks whether a position is at the start of a word. */ +Py_LOCAL_INLINE(int) try_match_START_OF_WORD(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_word_start(state, text_pos)); +} + +/* Checks whether there's a certain string at a position. */ +Py_LOCAL_INLINE(int) try_match_STRING(RE_State* state, RE_NextNode* next, + RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_CODE* values; + Py_ssize_t s_pos; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + values = node->values; + + for (s_pos = 0; s_pos < length; s_pos++) { + if (text_pos + s_pos >= state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + if (!same_char(char_at(state->text, text_pos + s_pos), values[s_pos])) + return RE_ERROR_FAILURE; + } + + next_position->node = next->match_next; + next_position->text_pos = text_pos + next->match_step; + + return RE_ERROR_SUCCESS; +} + +/* Checks whether there's a certain string at a position, ignoring case. */ +Py_LOCAL_INLINE(int) try_match_STRING_FLD(RE_State* state, RE_NextNode* next, + RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + folded); + Py_ssize_t s_pos; + RE_CODE* values; + int folded_len; + int f_pos; + Py_ssize_t start_pos; + Py_UCS4 folded[RE_MAX_FOLDED]; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + encoding = state->encoding; + locale_info = state->locale_info; + full_case_fold = encoding->full_case_fold; + + s_pos = 0; + values = node->values; + folded_len = 0; + f_pos = 0; + start_pos = text_pos; + + while (s_pos < length) { + if (f_pos >= folded_len) { + /* Fetch and casefold another character. */ + if (text_pos >= state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + folded_len = full_case_fold(locale_info, char_at(state->text, + text_pos), folded); + f_pos = 0; + } + + if (!same_char_ign(encoding, locale_info, folded[f_pos], + values[s_pos])) + return RE_ERROR_FAILURE; + + ++s_pos; + ++f_pos; + + if (f_pos >= folded_len) + ++text_pos; + } + + if (f_pos < folded_len) + return RE_ERROR_FAILURE; + + next_position->node = next->match_next; + if (next->match_step == 0) + next_position->text_pos = start_pos; + else + next_position->text_pos = text_pos; + + return RE_ERROR_SUCCESS; +} + +/* Checks whether there's a certain string at a position, ignoring case, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_STRING_FLD_REV(RE_State* state, RE_NextNode* + next, RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + folded); + Py_ssize_t s_pos; + RE_CODE* values; + int folded_len; + int f_pos; + Py_ssize_t start_pos; + Py_UCS4 folded[RE_MAX_FOLDED]; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + encoding = state->encoding; + locale_info = state->locale_info; + full_case_fold = encoding->full_case_fold; + + s_pos = 0; + values = node->values; + folded_len = 0; + f_pos = 0; + start_pos = text_pos; + + while (s_pos < length) { + if (f_pos >= folded_len) { + /* Fetch and casefold another character. */ + if (text_pos <= state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + folded_len = full_case_fold(locale_info, char_at(state->text, + text_pos - 1), folded); + f_pos = 0; + } + + if (!same_char_ign(encoding, locale_info, folded[folded_len - f_pos - + 1], values[length - s_pos - 1])) + return RE_ERROR_FAILURE; + + ++s_pos; + ++f_pos; + + if (f_pos >= folded_len) + --text_pos; + } + + if (f_pos < folded_len) + return RE_ERROR_FAILURE; + + next_position->node = next->match_next; + if (next->match_step == 0) + next_position->text_pos = start_pos; + else + next_position->text_pos = text_pos; + + return RE_ERROR_SUCCESS; +} + +/* Checks whether there's a certain string at a position, ignoring case. */ +Py_LOCAL_INLINE(int) try_match_STRING_IGN(RE_State* state, RE_NextNode* next, + RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; + RE_CODE* values; + Py_ssize_t s_pos; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + encoding = state->encoding; + locale_info = state->locale_info; + values = node->values; + + for (s_pos = 0; s_pos < length; s_pos++) { + if (text_pos + s_pos >= state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + if (!same_char_ign(encoding, locale_info, char_at(state->text, text_pos + + s_pos), values[s_pos])) + return RE_ERROR_FAILURE; + } + + next_position->node = next->match_next; + next_position->text_pos = text_pos + next->match_step; + + return RE_ERROR_SUCCESS; +} + +/* Checks whether there's a certain string at a position, ignoring case, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_STRING_IGN_REV(RE_State* state, RE_NextNode* + next, RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; + RE_CODE* values; + Py_ssize_t s_pos; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + encoding = state->encoding; + locale_info = state->locale_info; + values = node->values; + + for (s_pos = 0; s_pos < length; s_pos++) { + if (text_pos - s_pos <= state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + if (!same_char_ign(encoding, locale_info, char_at(state->text, text_pos + - s_pos - 1), values[length - s_pos - 1])) + return RE_ERROR_FAILURE; + } + + next_position->node = next->match_next; + next_position->text_pos = text_pos + next->match_step; + + return RE_ERROR_SUCCESS; +} + +/* Checks whether there's a certain string at a position, backwards. */ +Py_LOCAL_INLINE(int) try_match_STRING_REV(RE_State* state, RE_NextNode* next, + RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_CODE* values; + Py_ssize_t s_pos; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + values = node->values; + + for (s_pos = 0; s_pos < length; s_pos++) { + if (text_pos - s_pos <= state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + if (!same_char(char_at(state->text, text_pos - s_pos - 1), + values[length - s_pos - 1])) + return RE_ERROR_FAILURE; + } + + next_position->node = next->match_next; + next_position->text_pos = text_pos + next->match_step; + + return RE_ERROR_SUCCESS; +} + /* Tries a match at the current text position. * - * Returns TRUE and the next node and text position if the match succeeds. + * Returns the next node and text position if the match succeeds. */ -Py_LOCAL_INLINE(BOOL) try_match(RE_State* state, RE_NextNode* next, Py_ssize_t +Py_LOCAL_INLINE(int) try_match(RE_State* state, RE_NextNode* next, Py_ssize_t text_pos, RE_Position* next_position) { RE_Node* test; - void* text; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + int status; test = next->test; if (test->status & RE_STATUS_FUZZY) { next_position->node = next->node; next_position->text_pos = text_pos; - return TRUE; + return RE_ERROR_SUCCESS; } - text = state->text; - char_at = state->char_at; - switch (test->op) { - case RE_OP_ANY: /* Any character, except a newline. */ - if (text_pos >= state->slice_end || char_at(text, text_pos) == '\n') - return FALSE; + case RE_OP_ANY: + status = try_match_ANY(state, test, text_pos); break; - case RE_OP_ANY_ALL: /* Any character at all. */ - if (text_pos >= state->slice_end) - return FALSE; + case RE_OP_ANY_ALL: + status = try_match_ANY_ALL(state, test, text_pos); break; - case RE_OP_ANY_ALL_REV: /* Any character at all. */ - if (text_pos <= state->slice_start) - return FALSE; + case RE_OP_ANY_ALL_REV: + status = try_match_ANY_ALL_REV(state, test, text_pos); break; - case RE_OP_ANY_REV: /* Any character, except a newline. */ - if (text_pos <= state->slice_start || char_at(text, text_pos - 1) == - '\n') - return FALSE; + case RE_OP_ANY_REV: + status = try_match_ANY_REV(state, test, text_pos); break; - case RE_OP_ANY_U: /* Any character, except a line separator. */ - if (text_pos >= state->slice_end || - state->encoding->is_line_sep(char_at(text, text_pos))) - return FALSE; + case RE_OP_ANY_U: + status = try_match_ANY_U(state, test, text_pos); break; - case RE_OP_ANY_U_REV: /* Any character, except a line separator. */ - if (text_pos <= state->slice_start || - state->encoding->is_line_sep(char_at(text, text_pos - 1))) - return FALSE; + case RE_OP_ANY_U_REV: + status = try_match_ANY_U_REV(state, test, text_pos); break; - case RE_OP_BOUNDARY: /* At a word boundary. */ - if (state->encoding->at_boundary(state, text_pos) != test->match) - return FALSE; + case RE_OP_BOUNDARY: + status = try_match_BOUNDARY(state, test, text_pos); break; - case RE_OP_BRANCH: /* 2-way branch. */ - if (!try_match(state, &test->next_1, text_pos, next_position) && - !try_match(state, &test->nonstring.next_2, text_pos, next_position)) - return FALSE; + case RE_OP_BRANCH: + status = try_match(state, &test->next_1, text_pos, next_position); + if (status == RE_ERROR_FAILURE) + status = try_match(state, &test->nonstring.next_2, text_pos, + next_position); break; - case RE_OP_CHARACTER: /* A character literal. */ - if (text_pos >= state->slice_end || (char_at(text, text_pos) == - test->values[0]) != test->match) - return FALSE; + case RE_OP_CHARACTER: + status = try_match_CHARACTER(state, test, text_pos); break; - case RE_OP_CHARACTER_IGN: /* A character literal, ignoring case. */ - if (text_pos >= state->slice_end || same_char_ign(state->encoding, - char_at(text, text_pos), test->values[0]) != test->match) - return FALSE; + case RE_OP_CHARACTER_IGN: + status = try_match_CHARACTER_IGN(state, test, text_pos); break; - case RE_OP_CHARACTER_IGN_REV: /* A character literal, ignoring case. */ - if (text_pos <= state->slice_start || same_char_ign(state->encoding, - char_at(text, text_pos - 1), test->values[0]) != test->match) - return FALSE; + case RE_OP_CHARACTER_IGN_REV: + status = try_match_CHARACTER_IGN_REV(state, test, text_pos); break; - case RE_OP_CHARACTER_REV: /* A character literal. */ - if (text_pos <= state->slice_start || (char_at(text, text_pos - 1) == - test->values[0]) != test->match) - return FALSE; + case RE_OP_CHARACTER_REV: + status = try_match_CHARACTER_REV(state, test, text_pos); break; - case RE_OP_DEFAULT_BOUNDARY: /* At a default word boundary. */ - if (state->encoding->at_default_boundary(state, text_pos) != - test->match) - return FALSE; + case RE_OP_DEFAULT_BOUNDARY: + status = try_match_DEFAULT_BOUNDARY(state, test, text_pos); break; - case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ - if (state->encoding->at_default_word_end(state, text_pos) != - test->match) - return FALSE; + case RE_OP_DEFAULT_END_OF_WORD: + status = try_match_DEFAULT_END_OF_WORD(state, test, text_pos); break; - case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ - if (state->encoding->at_default_word_start(state, text_pos) != - test->match) - return FALSE; + case RE_OP_DEFAULT_START_OF_WORD: + status = try_match_DEFAULT_START_OF_WORD(state, test, text_pos); break; - case RE_OP_END_OF_LINE: /* At the end of a line. */ - if (text_pos != state->text_length && char_at(text, text_pos) != '\n') - return FALSE; + case RE_OP_END_OF_LINE: + status = try_match_END_OF_LINE(state, test, text_pos); break; - case RE_OP_END_OF_LINE_U: /* At the end of a line. */ - if (!state->encoding->at_line_end(state, text_pos)) - return FALSE; + case RE_OP_END_OF_LINE_U: + status = try_match_END_OF_LINE_U(state, test, text_pos); break; - case RE_OP_END_OF_STRING: /* At the end of the string. */ - if (text_pos != state->text_length) - return FALSE; + case RE_OP_END_OF_STRING: + status = try_match_END_OF_STRING(state, test, text_pos); break; - case RE_OP_END_OF_STRING_LINE: /* At the end of the string or the final newline. */ - if (text_pos != state->text_length && text_pos != state->final_newline) - return FALSE; + case RE_OP_END_OF_STRING_LINE: + status = try_match_END_OF_STRING_LINE(state, test, text_pos); break; - case RE_OP_END_OF_STRING_LINE_U: /* At the end of the string or the final newline. */ - if (text_pos != state->text_length && text_pos != - state->final_line_sep) - return FALSE; + case RE_OP_END_OF_STRING_LINE_U: + status = try_match_END_OF_STRING_LINE_U(state, test, text_pos); break; - case RE_OP_END_OF_WORD: /* At end of a word. */ - if (state->encoding->at_word_end(state, text_pos) != test->match) - return FALSE; + case RE_OP_END_OF_WORD: + status = try_match_END_OF_WORD(state, test, text_pos); break; - case RE_OP_GRAPHEME_BOUNDARY: /* At a grapheme boundary. */ - if (state->encoding->at_grapheme_boundary(state, text_pos) != - test->match) - return FALSE; + case RE_OP_GRAPHEME_BOUNDARY: + status = try_match_GRAPHEME_BOUNDARY(state, test, text_pos); break; - case RE_OP_PROPERTY: /* A character property. */ - /* values are: property */ - if (text_pos >= state->slice_end || - state->encoding->has_property(test->values[0], char_at(text, - text_pos)) != test->match) - return FALSE; + case RE_OP_PROPERTY: + status = try_match_PROPERTY(state, test, text_pos); break; - case RE_OP_PROPERTY_IGN: /* A character property, ignoring case. */ - /* values are: property */ - if (text_pos >= state->slice_end || !has_property_ign(state->encoding, - test->values[0], char_at(text, text_pos)) != test->match) - return FALSE; + case RE_OP_PROPERTY_IGN: + status = try_match_PROPERTY_IGN(state, test, text_pos); break; - case RE_OP_PROPERTY_IGN_REV: /* A character property, ignoring case. */ - /* values are: property */ - if (text_pos <= state->slice_start || - !has_property_ign(state->encoding, test->values[0], char_at(text, - text_pos - 1)) != test->match) - return FALSE; + case RE_OP_PROPERTY_IGN_REV: + status = try_match_PROPERTY_IGN_REV(state, test, text_pos); break; - case RE_OP_PROPERTY_REV: /* A character property. */ - /* values are: property */ - if (text_pos <= state->slice_start || - state->encoding->has_property(test->values[0], char_at(text, text_pos - - 1)) != test->match) - return FALSE; + case RE_OP_PROPERTY_REV: + status = try_match_PROPERTY_REV(state, test, text_pos); break; - case RE_OP_RANGE: /* A range. */ - /* values are: range */ - if (text_pos >= state->slice_end || in_range(test->values[0], - test->values[1], char_at(text, text_pos)) != test->match) - return FALSE; + case RE_OP_RANGE: + status = try_match_RANGE(state, test, text_pos); break; - case RE_OP_RANGE_IGN: /* A range, ignoring case. */ - /* values are: range */ - if (text_pos >= state->slice_end || in_range_ign(state->encoding, - test->values[0], test->values[1], char_at(text, text_pos)) != - test->match) - return FALSE; + case RE_OP_RANGE_IGN: + status = try_match_RANGE_IGN(state, test, text_pos); break; - case RE_OP_RANGE_IGN_REV: /* A range, ignoring case. */ - /* values are: range */ - if (text_pos <= state->slice_start || in_range_ign(state->encoding, - test->values[0], test->values[1], char_at(text, text_pos - 1)) != - test->match) - return FALSE; + case RE_OP_RANGE_IGN_REV: + status = try_match_RANGE_IGN_REV(state, test, text_pos); break; - case RE_OP_RANGE_REV: /* A range. */ - /* values are: range */ - if (text_pos <= state->slice_start || in_range(test->values[0], - test->values[1], char_at(text, text_pos - 1)) != test->match) - return FALSE; + case RE_OP_RANGE_REV: + status = try_match_RANGE_REV(state, test, text_pos); break; - case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ - if (text_pos != state->search_anchor) - return FALSE; + case RE_OP_SEARCH_ANCHOR: + status = try_match_SEARCH_ANCHOR(state, test, text_pos); break; - case RE_OP_SET_DIFF: /* Character set. */ + case RE_OP_SET_DIFF: case RE_OP_SET_INTER: case RE_OP_SET_SYM_DIFF: case RE_OP_SET_UNION: - if (text_pos >= state->slice_end || in_set(state->encoding, test, - char_at(text, text_pos)) != test->match) - return FALSE; + status = try_match_SET(state, test, text_pos); break; - case RE_OP_SET_DIFF_IGN: /* Character set, ignoring case. */ + case RE_OP_SET_DIFF_IGN: case RE_OP_SET_INTER_IGN: case RE_OP_SET_SYM_DIFF_IGN: case RE_OP_SET_UNION_IGN: - if (text_pos >= state->slice_end || in_set_ign(state->encoding, test, - char_at(text, text_pos)) != test->match) - return FALSE; + status = try_match_SET_IGN(state, test, text_pos); break; - case RE_OP_SET_DIFF_IGN_REV: /* Character set, ignoring case. */ + case RE_OP_SET_DIFF_IGN_REV: case RE_OP_SET_INTER_IGN_REV: case RE_OP_SET_SYM_DIFF_IGN_REV: case RE_OP_SET_UNION_IGN_REV: - if (text_pos <= state->slice_start || in_set_ign(state->encoding, test, - char_at(text, text_pos - 1)) != test->match) - return FALSE; + status = try_match_SET_IGN_REV(state, test, text_pos); break; - case RE_OP_SET_DIFF_REV: /* Character set. */ + case RE_OP_SET_DIFF_REV: case RE_OP_SET_INTER_REV: case RE_OP_SET_SYM_DIFF_REV: case RE_OP_SET_UNION_REV: - if (text_pos <= state->slice_start || in_set(state->encoding, test, - char_at(text, text_pos - 1)) != test->match) - return FALSE; + status = try_match_SET_REV(state, test, text_pos); break; - case RE_OP_START_OF_LINE: /* At the start of a line. */ - if (text_pos != 0 && char_at(text, text_pos - 1) != '\n') - return FALSE; + case RE_OP_START_OF_LINE: + status = try_match_START_OF_LINE(state, test, text_pos); break; - case RE_OP_START_OF_LINE_U: /* At the start of a line. */ - if (!state->encoding->at_line_start(state, text_pos)) - return FALSE; + case RE_OP_START_OF_LINE_U: + status = try_match_START_OF_LINE_U(state, test, text_pos); break; - case RE_OP_START_OF_STRING: /* At the start of the string. */ - if (text_pos != 0) - return FALSE; + case RE_OP_START_OF_STRING: + status = try_match_START_OF_STRING(state, test, text_pos); break; - case RE_OP_START_OF_WORD: /* At start of a word. */ - if (state->encoding->at_word_start(state, text_pos) != test->match) - return FALSE; + case RE_OP_START_OF_WORD: + status = try_match_START_OF_WORD(state, test, text_pos); break; - case RE_OP_STRING: /* A string literal. */ - { - size_t length; - size_t available; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_CODE* values; - size_t i; - - length = test->value_count; - available = state->slice_end - text_pos; - if (length > available) - return FALSE; - - char_at = state->char_at; - values = test->values; - - for (i = 0; i < length; i++) { - if (char_at(text, text_pos + i) != values[i]) - return FALSE; - } - break; - } - case RE_OP_STRING_FLD: /* A string literal, ignoring case. */ - { - size_t length; - Py_ssize_t available; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_ssize_t pos; - size_t string_pos; - RE_CODE* values; - int folded_len; - int folded_pos; - Py_UCS4 folded[RE_MAX_FOLDED]; - - length = test->value_count; - available = state->slice_end - text_pos; - if ((Py_ssize_t)possible_unfolded_length(length) > available) - return FALSE; - - char_at = state->char_at; - encoding = state->encoding; - full_case_fold = encoding->full_case_fold; - pos = text_pos; - string_pos = 0; - values = test->values; - folded_len = 0; - folded_pos = 0; - - while (string_pos < length) { - if (folded_pos >= folded_len) { - if (pos >= state->slice_end) - return FALSE; - - folded_len = full_case_fold(char_at(text, pos), folded); - folded_pos = 0; - } - - if (!same_char_ign(encoding, folded[folded_pos], - values[string_pos])) - return FALSE; - - ++string_pos; - ++folded_pos; - - if (folded_pos >= folded_len) - ++pos; - } - - if (folded_pos < folded_len) - return FALSE; - - next_position->node = next->match_next; - if (next->match_step == 0) - next_position->text_pos = text_pos; - else - next_position->text_pos = pos; - - return TRUE; - } - case RE_OP_STRING_FLD_REV: /* A string literal, ignoring case. */ - { - size_t length; - Py_ssize_t available; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_ssize_t pos; - size_t string_pos; - RE_CODE* values; - int folded_len; - int folded_pos; - Py_UCS4 folded[RE_MAX_FOLDED]; - - length = test->value_count; - available = text_pos - state->slice_start; - if ((Py_ssize_t)possible_unfolded_length(length) > available) - return FALSE; - - char_at = state->char_at; - encoding = state->encoding; - full_case_fold = encoding->full_case_fold; - pos = text_pos; - string_pos = length; - values = test->values; - folded_len = 0; - folded_pos = folded_len; - - while (string_pos > 0) { - if (folded_pos <= 0) { - if (pos <= state->slice_start) - return FALSE; - - folded_len = full_case_fold(char_at(text, pos - 1), folded); - folded_pos = folded_len; - } - - if (!same_char_ign(encoding, folded[folded_pos - 1], - values[string_pos - 1])) - return FALSE; - - --string_pos; - --folded_pos; - - if (folded_pos <= 0) - --pos; - } - - if (folded_pos > 0) - return FALSE; - - next_position->node = next->match_next; - if (next->match_step == 0) - next_position->text_pos = text_pos; - else - next_position->text_pos = pos; - - return TRUE; - } - case RE_OP_STRING_IGN: /* A string literal, ignoring case. */ - { - size_t length; - size_t available; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_CODE* values; - size_t i; - - length = test->value_count; - available = state->slice_end - text_pos; - if (length > available) - return FALSE; - - char_at = state->char_at; - encoding = state->encoding; - values = test->values; - - for (i = 0; i < length; i++) { - if (!same_char_ign(encoding, char_at(text, text_pos + i), - values[i])) - return FALSE; - } - break; - } - case RE_OP_STRING_IGN_REV: /* A string literal, ignoring case. */ - { - size_t length; - size_t available; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_CODE* values; - size_t i; - - length = test->value_count; - available = text_pos - state->slice_start; - if (length > available) - return FALSE; - - char_at = state->char_at; - encoding = state->encoding; - values = test->values; - text_pos -= length; - - for (i = 0; i < length; i++) { - if (!same_char_ign(encoding, char_at(text, text_pos + i), - values[i])) - return FALSE; - } - - text_pos += length; - break; - } - case RE_OP_STRING_REV: /* A string literal. */ - { - size_t length; - size_t available; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_CODE* values; - size_t i; - - length = test->value_count; - available = text_pos - state->slice_start; - if (length > available) - return FALSE; - - char_at = state->char_at; - values = test->values; - text_pos -= length; - - for (i = 0; i < length; i++) { - if (char_at(text, text_pos + i) != values[i]) - return FALSE; - } - - text_pos += length; - break; - } + case RE_OP_STRING: + return try_match_STRING(state, next, test, text_pos, next_position); + case RE_OP_STRING_FLD: + return try_match_STRING_FLD(state, next, test, text_pos, + next_position); + case RE_OP_STRING_FLD_REV: + return try_match_STRING_FLD_REV(state, next, test, text_pos, + next_position); + case RE_OP_STRING_IGN: + return try_match_STRING_IGN(state, next, test, text_pos, + next_position); + case RE_OP_STRING_IGN_REV: + return try_match_STRING_IGN_REV(state, next, test, text_pos, + next_position); + case RE_OP_STRING_REV: + return try_match_STRING_REV(state, next, test, text_pos, + next_position); default: next_position->node = next->node; next_position->text_pos = text_pos; - return TRUE; + return RE_ERROR_SUCCESS; } + if (status != RE_ERROR_SUCCESS) + return status; + next_position->node = next->match_next; next_position->text_pos = text_pos + next->match_step; - return TRUE; + return RE_ERROR_SUCCESS; } -Py_LOCAL_INLINE(BOOL) search_start(RE_SafeState* safe_state, RE_NextNode* next, - RE_Position* new_position, int search_index); +/* Searches for a word boundary. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_BOUNDARY(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_boundary = state->encoding->at_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for a word boundary, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_BOUNDARY_rev(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_boundary = state->encoding->at_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for a default word boundary. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_BOUNDARY(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_default_boundary = state->encoding->at_default_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_default_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for a default word boundary, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_BOUNDARY_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_default_boundary = state->encoding->at_default_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_default_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the default end of a word. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_END_OF_WORD(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_word_end)(RE_State* state, Py_ssize_t text_pos); + + at_default_word_end = state->encoding->at_default_word_end; + + *is_partial = FALSE; + + for (;;) { + if (at_default_word_end(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the default end of a word, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_END_OF_WORD_rev(RE_State* + state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_word_end)(RE_State* state, Py_ssize_t text_pos); + + at_default_word_end = state->encoding->at_default_word_end; + + *is_partial = FALSE; + + for (;;) { + if (at_default_word_end(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the default start of a word. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_START_OF_WORD(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_word_start)(RE_State* state, Py_ssize_t text_pos); + + at_default_word_start = state->encoding->at_default_word_start; + + *is_partial = FALSE; + + for (;;) { + if (at_default_word_start(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the default start of a word, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_START_OF_WORD_rev(RE_State* + state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_word_start)(RE_State* state, Py_ssize_t text_pos); + + at_default_word_start = state->encoding->at_default_word_start; + + *is_partial = FALSE; + + for (;;) { + if (at_default_word_start(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the end of line. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_LINE(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + for (;;) { + if (text_pos >= state->text_length || state->char_at(state->text, + text_pos) == '\n') + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the end of line, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_LINE_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + for (;;) { + if (text_pos >= state->text_length || state->char_at(state->text, + text_pos) == '\n') + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the end of the string. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (state->slice_end >= state->text_length) + return state->text_length; + + return -1; +} + +/* Searches for the end of the string, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (text_pos >= state->text_length) + return text_pos; + + return -1; +} + +/* Searches for the end of the string or line. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_LINE(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (text_pos <= state->final_newline) + text_pos = state->final_newline; + else if (text_pos <= state->text_length) + text_pos = state->text_length; + + if (text_pos > state->slice_end) + return -1; + + if (text_pos >= state->text_length) + return text_pos; + + return text_pos; +} + +/* Searches for the end of the string or line, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_LINE_rev(RE_State* + state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (text_pos >= state->text_length) + text_pos = state->text_length; + else if (text_pos >= state->final_newline) + text_pos = state->final_newline; + else + return -1; + + if (text_pos < state->slice_start) + return -1; + + if (text_pos <= 0) + return text_pos; + + return text_pos; +} + +/* Searches for the end of a word. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_WORD(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_word_end)(RE_State* state, Py_ssize_t text_pos); + + at_word_end = state->encoding->at_word_end; + + *is_partial = FALSE; + + for (;;) { + if (at_word_end(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the end of a word, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_WORD_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_word_end)(RE_State* state, Py_ssize_t text_pos); + + at_word_end = state->encoding->at_word_end; + + *is_partial = FALSE; + + for (;;) { + if (at_word_end(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for a grapheme boundary. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_GRAPHEME_BOUNDARY(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_grapheme_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_grapheme_boundary = state->encoding->at_grapheme_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_grapheme_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for a grapheme boundary, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_GRAPHEME_BOUNDARY_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_grapheme_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_grapheme_boundary = state->encoding->at_grapheme_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_grapheme_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the start of line. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_LINE(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + for (;;) { + if (text_pos <= 0 || state->char_at(state->text, text_pos - 1) == '\n') + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the start of line, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_LINE_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + for (;;) { + if (text_pos <= 0 || state->char_at(state->text, text_pos - 1) == '\n') + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the start of the string. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_STRING(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (text_pos <= 0) + return text_pos; + + return -1; +} + +/* Searches for the start of the string, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_STRING_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (state->slice_start <= 0) + return 0; + + return -1; +} + +/* Searches for the start of a word. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_WORD(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_word_start)(RE_State* state, Py_ssize_t text_pos); + + at_word_start = state->encoding->at_word_start; + + *is_partial = FALSE; + + for (;;) { + if (at_word_start(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the start of a word, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_WORD_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_word_start)(RE_State* state, Py_ssize_t text_pos); + + at_word_start = state->encoding->at_word_start; + + *is_partial = FALSE; + + for (;;) { + if (at_word_start(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for a string. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) + return text_pos; + + return string_search(safe_state, node, text_pos, state->slice_end, + is_partial); +} + +/* Searches for a string, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_FLD(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t* new_pos, BOOL* is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) { + *new_pos = state->req_end; + return text_pos; + } + + return string_search_fld(safe_state, node, text_pos, state->slice_end, + new_pos, is_partial); +} + +/* Searches for a string, ignoring case, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_FLD_REV(RE_SafeState* + safe_state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t* new_pos, BOOL* + is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) { + *new_pos = state->req_end; + return text_pos; + } + + return string_search_fld_rev(safe_state, node, text_pos, + state->slice_start, new_pos, is_partial); +} + +/* Searches for a string, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_IGN(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) + return text_pos; + + return string_search_ign(safe_state, node, text_pos, state->slice_end, + is_partial); +} + +/* Searches for a string, ignoring case, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_IGN_REV(RE_SafeState* + safe_state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) + return text_pos; + + return string_search_ign_rev(safe_state, node, text_pos, + state->slice_start, is_partial); +} + +/* Searches for a string, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_REV(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) + return text_pos; + + return string_search_rev(safe_state, node, text_pos, state->slice_start, + is_partial); +} /* Searches for the start of a match. */ -Py_LOCAL_INLINE(BOOL) search_start(RE_SafeState* safe_state, RE_NextNode* next, +Py_LOCAL_INLINE(int) search_start(RE_SafeState* safe_state, RE_NextNode* next, RE_Position* new_position, int search_index) { RE_State* state; Py_ssize_t text_pos; RE_Node* test; RE_Node* node; Py_ssize_t start_pos; - Py_ssize_t step; - Py_ssize_t limit; RE_SearchPosition* info; state = safe_state->re_state; @@ -6032,17 +7640,21 @@ Py_LOCAL_INLINE(BOOL) search_start(RE_SafeState* safe_state, RE_NextNode* next, node = next->node; if (state->reverse) { - if (start_pos < state->slice_start) - return FALSE; + if (start_pos < state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = state->slice_start; + return RE_ERROR_PARTIAL; + } - limit = state->slice_start; - step = -1; + return RE_ERROR_FAILURE; + } } else { - if (start_pos > state->slice_end) - return FALSE; - - limit = state->slice_end; - step = 1; + if (start_pos > state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = state->slice_end; + return RE_ERROR_PARTIAL; + } + } } if (test->status & RE_STATUS_FUZZY) { @@ -6053,17 +7665,17 @@ Py_LOCAL_INLINE(BOOL) search_start(RE_SafeState* safe_state, RE_NextNode* next, new_position->node = node; new_position->text_pos = start_pos; - return TRUE; + return RE_ERROR_SUCCESS; } again: - if (!state->pattern->is_fuzzy) { + if (!state->pattern->is_fuzzy && state->partial_side == RE_PARTIAL_NONE) { if (state->reverse) { - if (start_pos - (Py_ssize_t)state->min_width < limit) - return FALSE; + if (start_pos - state->min_width < state->slice_start) + return RE_ERROR_FAILURE; } else { - if (start_pos + (Py_ssize_t)state->min_width > limit) - return FALSE; + if (start_pos + state->min_width > state->slice_end) + return RE_ERROR_FAILURE; } } @@ -6072,499 +7684,741 @@ again: if (state->reverse) { if (info->start_pos >= 0 && info->start_pos >= start_pos && start_pos >= info->match_pos) { - state->match_pos = info->match_pos; new_position->text_pos = state->match_pos; new_position->node = node; - return TRUE; + return RE_ERROR_SUCCESS; } } else { if (info->start_pos >= 0 && info->start_pos <= start_pos && start_pos <= info->match_pos) { - state->match_pos = info->match_pos; new_position->text_pos = state->match_pos; new_position->node = node; - return TRUE; + return RE_ERROR_SUCCESS; } } } else info = NULL; switch (test->op) { - case RE_OP_ANY: /* Any character, except a newline. */ - start_pos = match_many_ANY(state, start_pos, limit, FALSE); - if (start_pos >= limit) - return FALSE; + case RE_OP_ANY: + start_pos = match_many_ANY(state, test, start_pos, state->slice_end, + FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; break; - case RE_OP_ANY_ALL: /* Any character at all. */ + case RE_OP_ANY_ALL: break; - case RE_OP_ANY_ALL_REV: /* Any character at all backwards. */ + case RE_OP_ANY_ALL_REV: break; - case RE_OP_ANY_REV: /* Any character backwards, except a newline. */ - start_pos = match_many_ANY_REV(state, start_pos, limit, FALSE); - if (start_pos <= limit) - return FALSE; + case RE_OP_ANY_REV: + start_pos = match_many_ANY_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; break; - case RE_OP_ANY_U: /* Any character, except a line separator. */ - start_pos = match_many_ANY_U(state, start_pos, limit, FALSE); - if (start_pos >= limit) - return FALSE; + case RE_OP_ANY_U: + start_pos = match_many_ANY_U(state, test, start_pos, state->slice_end, + FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; break; - case RE_OP_ANY_U_REV: /* Any character backwards, except a line separator. */ - start_pos = match_many_ANY_U_REV(state, start_pos, limit, FALSE); - if (start_pos <= limit) - return FALSE; + case RE_OP_ANY_U_REV: + start_pos = match_many_ANY_U_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; break; - case RE_OP_BOUNDARY: /* At a word boundary. */ + case RE_OP_BOUNDARY: { - BOOL match; - Py_ssize_t step; - BOOL (*at_boundary)(RE_State* state, Py_ssize_t start_pos); + BOOL is_partial; - match = test->match; - step = state->reverse ? -1 : 1; - at_boundary = state->encoding->at_boundary; + if (state->reverse) + start_pos = search_start_BOUNDARY_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_BOUNDARY(state, test, start_pos, + &is_partial); - for (;;) { - if (at_boundary(state, start_pos) == match) - break; - if (start_pos == limit) - return FALSE; - start_pos += step; + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; } - case RE_OP_CHARACTER: /* A character literal. */ - start_pos = match_many_CHARACTER(state, test, start_pos, limit, FALSE); - if (start_pos >= limit) - return FALSE; + case RE_OP_CHARACTER: + start_pos = match_many_CHARACTER(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; break; - case RE_OP_CHARACTER_IGN: /* A character literal, ignoring case. */ - start_pos = match_many_CHARACTER_IGN(state, test, start_pos, limit, - FALSE); - if (start_pos >= limit) - return FALSE; + case RE_OP_CHARACTER_IGN: + start_pos = match_many_CHARACTER_IGN(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; break; - case RE_OP_CHARACTER_IGN_REV: /* A character literal backwards, ignoring case. */ - start_pos = match_many_CHARACTER_IGN_REV(state, test, start_pos, limit, - FALSE); - if (start_pos <= limit) - return FALSE; + case RE_OP_CHARACTER_IGN_REV: + start_pos = match_many_CHARACTER_IGN_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; break; - case RE_OP_CHARACTER_REV: /* A character literal backwards. */ - start_pos = match_many_CHARACTER_REV(state, test, start_pos, limit, - FALSE); - if (start_pos <= limit) - return FALSE; + case RE_OP_CHARACTER_REV: + start_pos = match_many_CHARACTER_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; break; - case RE_OP_DEFAULT_BOUNDARY: /* At a default word boundary. */ + case RE_OP_DEFAULT_BOUNDARY: { - BOOL match; - Py_ssize_t step; - BOOL (*at_default_boundary)(RE_State* state, Py_ssize_t start_pos); + BOOL is_partial; - match = test->match; - step = state->reverse ? -1 : 1; - at_default_boundary = state->encoding->at_default_boundary; + if (state->reverse) + start_pos = search_start_DEFAULT_BOUNDARY_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_DEFAULT_BOUNDARY(state, test, start_pos, + &is_partial); - for (;;) { - if (at_default_boundary(state, start_pos) == match) - break; - if (start_pos == limit) - return FALSE; - start_pos += step; + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; } - case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ + case RE_OP_DEFAULT_END_OF_WORD: { - BOOL match; - Py_ssize_t step; - BOOL (*at_default_word_end)(RE_State* state, Py_ssize_t start_pos); + BOOL is_partial; - match = test->match; - step = state->reverse ? -1 : 1; - at_default_word_end = state->encoding->at_default_word_end; + if (state->reverse) + start_pos = search_start_DEFAULT_END_OF_WORD_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_DEFAULT_END_OF_WORD(state, test, + start_pos, &is_partial); - for (;;) { - if (at_default_word_end(state, start_pos) == match) - break; - if (start_pos == limit) - return FALSE; - start_pos += step; + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; } - case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ + case RE_OP_DEFAULT_START_OF_WORD: { - BOOL match; - Py_ssize_t step; - BOOL (*at_default_word_start)(RE_State* state, Py_ssize_t start_pos); + BOOL is_partial; - match = test->match; - step = state->reverse ? -1 : 1; - at_default_word_start = state->encoding->at_default_word_start; + if (state->reverse) + start_pos = search_start_DEFAULT_START_OF_WORD_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_DEFAULT_START_OF_WORD(state, test, + start_pos, &is_partial); - for (;;) { - if (at_default_word_start(state, start_pos) == match) - break; - if (start_pos == limit) - return FALSE; - start_pos += step; + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; } - case RE_OP_END_OF_LINE: /* At the end of a line. */ + case RE_OP_END_OF_LINE: { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; - Py_ssize_t step; + BOOL is_partial; - char_at = state->char_at; - text = state->text; - text_pos = start_pos; - step = state->reverse ? -1 : 1; + if (state->reverse) + start_pos = search_start_END_OF_LINE_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_END_OF_LINE(state, test, start_pos, + &is_partial); - for (;;) { - if (text_pos == state->text_length || char_at(text, text_pos) == - '\n') - break; - if (text_pos == limit) - return FALSE; - text_pos += step; - } + if (start_pos < 0) + return RE_ERROR_FAILURE; - start_pos = text_pos; - break; - } - case RE_OP_END_OF_STRING: /* At the end of the string. */ - if (state->reverse) { - if (start_pos != state->text_length) - return FALSE; - } else { - if (state->slice_end != state->text_length) - return FALSE; - } - - start_pos = state->text_length; - break; - case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ - if (state->reverse) { - if (start_pos >= state->text_length) - start_pos = state->text_length; - else if (start_pos >= state->final_newline) - start_pos = state->final_newline; - else - return FALSE; - - if (start_pos < state->slice_start) - return FALSE; - } else { - if (start_pos <= state->final_newline) - start_pos = state->final_newline; - else if (start_pos <= state->text_length) - start_pos = state->text_length; - else - return FALSE; - - if (start_pos > state->slice_end) - return FALSE; - } - break; - case RE_OP_END_OF_WORD: /* At end of a word. */ - { - BOOL match; - Py_ssize_t step; - BOOL (*at_word_end)(RE_State* state, Py_ssize_t start_pos); - - match = test->match; - step = state->reverse ? -1 : 1; - at_word_end = state->encoding->at_word_end; - - for (;;) { - if (at_word_end(state, start_pos) == match) - break; - if (start_pos == limit) - return FALSE; - start_pos += step; + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; } - case RE_OP_GRAPHEME_BOUNDARY: /* At a grapheme boundary. */ + case RE_OP_END_OF_STRING: { - BOOL match; - Py_ssize_t step; - BOOL (*at_boundary)(RE_State* state, Py_ssize_t start_pos); + BOOL is_partial; - match = test->match; - step = state->reverse ? -1 : 1; - at_boundary = state->encoding->at_grapheme_boundary; + if (state->reverse) + start_pos = search_start_END_OF_STRING_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_END_OF_STRING(state, test, start_pos, + &is_partial); - for (;;) { - if (at_boundary(state, start_pos) == match) - break; - if (start_pos == limit) - return FALSE; - start_pos += step; + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; } - case RE_OP_PROPERTY: /* A character property. */ - start_pos = match_many_PROPERTY(state, test, start_pos, limit, FALSE); - if (start_pos >= limit) - return FALSE; + case RE_OP_END_OF_STRING_LINE: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_END_OF_STRING_LINE_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_END_OF_STRING_LINE(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } break; - case RE_OP_PROPERTY_IGN: /* A character property, ignoring case. */ - start_pos = match_many_PROPERTY_IGN(state, test, start_pos, limit, + } + case RE_OP_END_OF_WORD: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_END_OF_WORD_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_END_OF_WORD(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_GRAPHEME_BOUNDARY: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_GRAPHEME_BOUNDARY_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_GRAPHEME_BOUNDARY(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_PROPERTY: + start_pos = match_many_PROPERTY(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; + break; + case RE_OP_PROPERTY_IGN: + start_pos = match_many_PROPERTY_IGN(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; + break; + case RE_OP_PROPERTY_IGN_REV: + start_pos = match_many_PROPERTY_IGN_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; + break; + case RE_OP_PROPERTY_REV: + start_pos = match_many_PROPERTY_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; + break; + case RE_OP_RANGE: + start_pos = match_many_RANGE(state, test, start_pos, state->slice_end, FALSE); - if (start_pos >= limit) - return FALSE; + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; break; - case RE_OP_PROPERTY_IGN_REV: /* A character property backwards, ignoring case. */ - start_pos = match_many_PROPERTY_IGN_REV(state, test, start_pos, limit, - FALSE); - if (start_pos <= limit) - return FALSE; + case RE_OP_RANGE_IGN: + start_pos = match_many_RANGE_IGN(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; break; - case RE_OP_PROPERTY_REV: /* A character property backwards. */ - start_pos = match_many_PROPERTY_REV(state, test, start_pos, limit, - FALSE); - if (start_pos <= limit) - return FALSE; + case RE_OP_RANGE_IGN_REV: + start_pos = match_many_RANGE_IGN_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; break; - case RE_OP_RANGE: /* A range. */ - start_pos = match_many_RANGE(state, test, start_pos, limit, FALSE); - if (start_pos >= limit) - return FALSE; + case RE_OP_RANGE_REV: + start_pos = match_many_RANGE_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; break; - case RE_OP_RANGE_IGN: /* A range, ignoring case. */ - start_pos = match_many_RANGE_IGN(state, test, start_pos, limit, FALSE); - if (start_pos >= limit) - return FALSE; - break; - case RE_OP_RANGE_IGN_REV: /* A range backwards, ignoring case. */ - start_pos = match_many_RANGE_IGN_REV(state, test, start_pos, limit, - FALSE); - if (start_pos <= limit) - return FALSE; - break; - case RE_OP_RANGE_REV: /* A range backwards. */ - start_pos = match_many_RANGE_REV(state, test, start_pos, limit, FALSE); - if (start_pos <= limit) - return FALSE; - break; - case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ + case RE_OP_SEARCH_ANCHOR: if (state->reverse) { if (start_pos < state->search_anchor) - return FALSE; + return RE_ERROR_FAILURE; } else { if (start_pos > state->search_anchor) - return FALSE; + return RE_ERROR_FAILURE; } start_pos = state->search_anchor; break; - case RE_OP_SET_DIFF: /* A set. */ + case RE_OP_SET_DIFF: case RE_OP_SET_INTER: case RE_OP_SET_SYM_DIFF: case RE_OP_SET_UNION: - start_pos = match_many_SET(state, test, start_pos, limit, FALSE); - if (start_pos >= limit) + start_pos = match_many_SET(state, test, start_pos, state->slice_end, + FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) return FALSE; break; - case RE_OP_SET_DIFF_IGN: /* A set, ignoring case. */ + case RE_OP_SET_DIFF_IGN: case RE_OP_SET_INTER_IGN: case RE_OP_SET_SYM_DIFF_IGN: case RE_OP_SET_UNION_IGN: - start_pos = match_many_SET_IGN(state, test, start_pos, limit, FALSE); - if (start_pos >= limit) + start_pos = match_many_SET_IGN(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) return FALSE; break; - case RE_OP_SET_DIFF_IGN_REV: /* A set backwards, ignoring case. */ + case RE_OP_SET_DIFF_IGN_REV: case RE_OP_SET_INTER_IGN_REV: case RE_OP_SET_SYM_DIFF_IGN_REV: case RE_OP_SET_UNION_IGN_REV: - start_pos = match_many_SET_IGN_REV(state, test, start_pos, limit, - FALSE); - if (start_pos <= limit) + start_pos = match_many_SET_IGN_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) return FALSE; break; - case RE_OP_SET_DIFF_REV: /* A set backwards. */ + case RE_OP_SET_DIFF_REV: case RE_OP_SET_INTER_REV: case RE_OP_SET_SYM_DIFF_REV: case RE_OP_SET_UNION_REV: - start_pos = match_many_SET_REV(state, test, start_pos, limit, FALSE); - if (start_pos <= limit) + start_pos = match_many_SET_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) return FALSE; break; - case RE_OP_START_OF_LINE: /* At the start of a line. */ + case RE_OP_START_OF_LINE: { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; - Py_ssize_t step; - Py_ssize_t text_pos; + BOOL is_partial; - char_at = state->char_at; - text = state->text; - step = state->reverse ? -1 : 1; - text_pos = start_pos - 1; + if (state->reverse) + start_pos = search_start_START_OF_LINE_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_START_OF_LINE(state, test, start_pos, + &is_partial); - --limit; + if (start_pos < 0) + return RE_ERROR_FAILURE; - for (;;) { - if (text_pos < 0 || char_at(text, text_pos) == '\n') - break; - if (text_pos == limit) - return FALSE; - text_pos += step; - } - - ++text_pos; - ++limit; - - start_pos = text_pos; - break; - } - case RE_OP_START_OF_STRING: /* At the start of the string. */ - if (state->reverse) { - if (state->slice_start != 0) - return FALSE; - } else { - if (start_pos != 0) - return FALSE; - } - - start_pos = 0; - break; - case RE_OP_START_OF_WORD: /* At start of a word. */ - { - BOOL match; - Py_ssize_t step; - BOOL (*at_word_start)(RE_State* state, Py_ssize_t start_pos); - - match = test->match; - step = state->reverse ? -1 : 1; - at_word_start = state->encoding->at_word_start; - - for (;;) { - if (at_word_start(state, start_pos) == match) - break; - if (start_pos == limit) - return FALSE; - start_pos += step; + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; } - case RE_OP_STRING: /* A string literal. */ - if (!(test->status & RE_STATUS_REQUIRED) || start_pos != - state->req_pos) { - start_pos = string_search(safe_state, test, start_pos, limit); - if (start_pos < 0) - return FALSE; + case RE_OP_START_OF_STRING: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_START_OF_STRING_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_START_OF_STRING(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; - case RE_OP_STRING_FLD: /* A string literal, ignoring case. */ + } + case RE_OP_START_OF_WORD: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_START_OF_WORD_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_START_OF_WORD(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_STRING: + { + BOOL is_partial; + + start_pos = search_start_STRING(safe_state, test, start_pos, + &is_partial); + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_STRING_FLD: { Py_ssize_t new_pos; + BOOL is_partial; - if ((test->status & RE_STATUS_REQUIRED) && start_pos == state->req_pos) - new_pos = state->req_end; - else - start_pos = string_search_fld(safe_state, test, start_pos, - state->slice_end, &new_pos); + start_pos = search_start_STRING_FLD(safe_state, test, start_pos, + &new_pos, &is_partial); if (start_pos < 0) - return FALSE; + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } /* Can we look further ahead? */ if (test == node) { - if (test->next_1.node && !try_match(state, &test->next_1, new_pos, - new_position)) { - ++start_pos; + if (test->next_1.node) { + int status; - if (state->reverse) { - if (start_pos < state->slice_start) - return FALSE; - } else { - if (start_pos > state->slice_end) - return FALSE; + status = try_match(state, &test->next_1, new_pos, + new_position); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) { + ++start_pos; + + if (start_pos >= state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = state->slice_start; + return RE_ERROR_PARTIAL; + } + + return RE_ERROR_FAILURE; + } + + goto again; } - - goto again; } /* It's a possible match. */ state->match_pos = start_pos; - return TRUE; + if (info) { + info->start_pos = state->text_pos; + info->match_pos = state->match_pos; + } + + return RE_ERROR_SUCCESS; } break; } - case RE_OP_STRING_FLD_REV: /* A string literal backwards, ignoring case. */ + case RE_OP_STRING_FLD_REV: { Py_ssize_t new_pos; + BOOL is_partial; - if ((test->status & RE_STATUS_REQUIRED) && start_pos == state->req_pos) - new_pos = state->req_end; - else - start_pos = string_search_fld_rev(safe_state, test, start_pos, - state->slice_start, &new_pos); + start_pos = search_start_STRING_FLD_REV(safe_state, test, start_pos, + &new_pos, &is_partial); if (start_pos < 0) - return FALSE; + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } /* Can we look further ahead? */ if (test == node) { - if (test->next_1.node && !try_match(state, &test->next_1, new_pos, - new_position)) { - --start_pos; + if (test->next_1.node) { + int status; - if (state->reverse) { - if (start_pos < state->slice_start) - return FALSE; - } else { - if (start_pos > state->slice_end) - return FALSE; + status = try_match(state, &test->next_1, new_pos, + new_position); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) { + --start_pos; + + if (start_pos <= state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = state->slice_start; + return RE_ERROR_PARTIAL; + } + + return RE_ERROR_FAILURE; + } + + goto again; } - - goto again; } /* It's a possible match. */ state->match_pos = start_pos; - return TRUE; + if (info) { + info->start_pos = state->text_pos; + info->match_pos = state->match_pos; + } + + return RE_ERROR_SUCCESS; } break; } - case RE_OP_STRING_IGN: /* A string literal, ignoring case. */ - if (!(test->status & RE_STATUS_REQUIRED) || start_pos != - state->req_pos) { - start_pos = string_search_ign(safe_state, test, start_pos, limit); - if (start_pos < 0) - return FALSE; + case RE_OP_STRING_IGN: + { + BOOL is_partial; + + start_pos = search_start_STRING_IGN(safe_state, test, start_pos, + &is_partial); + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; - case RE_OP_STRING_IGN_REV: /* A string literal backwards, ignoring case. */ - if (!(test->status & RE_STATUS_REQUIRED) || start_pos != - state->req_pos) { - start_pos = string_search_ign_rev(safe_state, test, start_pos, - limit); - if (start_pos < 0) - return FALSE; + } + case RE_OP_STRING_IGN_REV: + { + BOOL is_partial; + + start_pos = search_start_STRING_IGN_REV(safe_state, test, start_pos, + &is_partial); + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; - case RE_OP_STRING_REV: /* A string literal backwards. */ - if (!(test->status & RE_STATUS_REQUIRED) || start_pos != - state->req_pos) { - start_pos = string_search_rev(safe_state, test, start_pos, limit); - if (start_pos < 0) - return FALSE; + } + case RE_OP_STRING_REV: + { + BOOL is_partial; + + start_pos = search_start_STRING_REV(safe_state, test, start_pos, + &is_partial); + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; } break; + } default: /* Don't call 'search_start' again. */ state->pattern->do_search_start = FALSE; @@ -6572,28 +8426,47 @@ again: state->match_pos = start_pos; new_position->node = node; new_position->text_pos = start_pos; - return TRUE; + return RE_ERROR_SUCCESS; } - text_pos = start_pos; - /* Can we look further ahead? */ if (test == node) { - text_pos += test->step; + text_pos = start_pos + test->step; - if (test->next_1.node && !try_match(state, &test->next_1, text_pos, - new_position)) { - start_pos += step; + if (test->next_1.node) { + int status; - if (state->reverse) { - if (start_pos < state->slice_start) - return FALSE; - } else { - if (start_pos > state->slice_end) - return FALSE; + status = try_match(state, &test->next_1, text_pos, new_position); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) { + if (state->reverse) { + --start_pos; + + if (start_pos < state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = state->slice_start; + return RE_ERROR_PARTIAL; + } + + return RE_ERROR_FAILURE; + } + } else { + ++start_pos; + + if (start_pos > state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = state->slice_end; + return RE_ERROR_PARTIAL; + } + + return RE_ERROR_FAILURE; + } + } + + goto again; } - - goto again; } } else { new_position->node = node; @@ -6608,7 +8481,7 @@ again: info->match_pos = state->match_pos; } - return TRUE; + return RE_ERROR_SUCCESS; } /* Saves a capture group. */ @@ -6639,9 +8512,9 @@ Py_LOCAL_INLINE(BOOL) save_capture(RE_SafeState* safe_state, size_t RE_GroupSpan* new_captures; new_capacity = public_group->capture_capacity * 2; - new_capacity = RE_MAX(new_capacity, RE_INIT_CAPTURE_SIZE); + new_capacity = max_size_t(new_capacity, RE_INIT_CAPTURE_SIZE); new_captures = (RE_GroupSpan*)safe_realloc(safe_state, - public_group->captures,new_capacity * sizeof(RE_GroupSpan)); + public_group->captures, new_capacity * sizeof(RE_GroupSpan)); if (!new_captures) return FALSE; @@ -6695,7 +8568,7 @@ Py_LOCAL_INLINE(BOOL) push_groups(RE_SafeState* safe_state) { new_block->spans = (RE_GroupSpan*)safe_alloc(safe_state, group_count * sizeof(RE_GroupSpan)); new_block->counts = (size_t*)safe_alloc(safe_state, group_count * - sizeof(size_t)); + sizeof(Py_ssize_t)); if (!new_block->spans || !new_block->counts) { safe_dealloc(safe_state, new_block->spans); safe_dealloc(safe_state, new_block->counts); @@ -6754,9 +8627,9 @@ Py_LOCAL_INLINE(void) drop_groups(RE_State* state) { Py_LOCAL_INLINE(BOOL) push_repeats(RE_SafeState* safe_state) { RE_State* state; PatternObject* pattern; - Py_ssize_t repeat_count; + size_t repeat_count; RE_SavedRepeats* current; - Py_ssize_t r; + size_t r; state = safe_state->re_state; pattern = state->pattern; @@ -6815,9 +8688,9 @@ Py_LOCAL_INLINE(BOOL) push_repeats(RE_SafeState* safe_state) { /* Pops the repeats for backtracking. */ Py_LOCAL_INLINE(void) pop_repeats(RE_State* state) { PatternObject* pattern; - Py_ssize_t repeat_count; + size_t repeat_count; RE_SavedRepeats* current; - Py_ssize_t r; + size_t r; pattern = state->pattern; @@ -6835,8 +8708,8 @@ Py_LOCAL_INLINE(void) pop_repeats(RE_State* state) { /* Saves state info before a recusive call by 'basic_match'. */ Py_LOCAL_INLINE(void) save_info(RE_State* state, RE_Info* info) { + info->backtrack_count = state->current_backtrack_block->count; info->current_backtrack_block = state->current_backtrack_block; - info->backtrack_count = info->current_backtrack_block->count; info->current_saved_groups = state->current_saved_groups; info->must_advance = state->must_advance; info->current_group_call_frame = state->current_group_call_frame; @@ -6847,13 +8720,13 @@ Py_LOCAL_INLINE(void) restore_info(RE_State* state, RE_Info* info) { state->current_group_call_frame = info->current_group_call_frame; state->must_advance = info->must_advance; state->current_saved_groups = info->current_saved_groups; - info->current_backtrack_block->count = info->backtrack_count; state->current_backtrack_block = info->current_backtrack_block; + state->current_backtrack_block->count = info->backtrack_count; } /* Inserts a new span in a guard list. */ Py_LOCAL_INLINE(BOOL) insert_guard_span(RE_SafeState* safe_state, RE_GuardList* - guard_list, Py_ssize_t index) { + guard_list, size_t index) { size_t n; if (guard_list->count >= guard_list->capacity) { @@ -6882,8 +8755,8 @@ Py_LOCAL_INLINE(BOOL) insert_guard_span(RE_SafeState* safe_state, RE_GuardList* } /* Deletes a span in a guard list. */ -Py_LOCAL_INLINE(void) delete_guard_span(RE_GuardList* guard_list, Py_ssize_t - index) { +Py_LOCAL_INLINE(void) delete_guard_span(RE_GuardList* guard_list, size_t index) + { size_t n; n = guard_list->count - index - 1; @@ -7029,7 +8902,7 @@ Py_LOCAL_INLINE(void) reset_guards(RE_State* state, RE_CODE* values) { size_t repeat_count; pattern = state->pattern; - repeat_count = (size_t)pattern->repeat_count; + repeat_count = pattern->repeat_count; if (values) { size_t i; @@ -7084,7 +8957,7 @@ Py_LOCAL_INLINE(PyObject*) build_bytes_value(void* buffer, Py_ssize_t len, if (buffer_charsize == 1) return Py_BuildValue("s#", buffer, len); - byte_buffer = re_alloc(len); + byte_buffer = re_alloc((size_t)len); if (!byte_buffer) return NULL; @@ -7108,6 +8981,27 @@ too_wide: return NULL; } +/* Looks for a string in a string set. */ +Py_LOCAL_INLINE(int) string_set_contains(RE_State* state, PyObject* string_set, + Py_ssize_t first, Py_ssize_t last) { + PyObject* string; + int status; + + if (state->is_unicode) + string = build_unicode_value(state->point_to(state->text, first), last + - first, state->charsize); + else + string = build_bytes_value(state->point_to(state->text, first), last - + first, state->charsize); + if (!string) + return RE_ERROR_INTERNAL; + + status = PySet_Contains(string_set, string); + Py_DECREF(string); + + return status; +} + /* Looks for a string in a string set, ignoring case. */ Py_LOCAL_INLINE(int) string_set_contains_ign(RE_State* state, PyObject* string_set, void* buffer, Py_ssize_t index, Py_ssize_t len, Py_ssize_t @@ -7115,7 +9009,8 @@ Py_LOCAL_INLINE(int) string_set_contains_ign(RE_State* state, PyObject* Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); RE_EncodingTable* encoding; - BOOL (*possible_turkic)(Py_UCS4 ch); + RE_LocaleInfo* locale_info; + BOOL (*possible_turkic)(RE_LocaleInfo* locale_info, Py_UCS4 ch); Py_UCS4 codepoints[4]; switch (buffer_charsize) { @@ -7138,10 +9033,12 @@ Py_LOCAL_INLINE(int) string_set_contains_ign(RE_State* state, PyObject* } encoding = state->encoding; + locale_info = state->locale_info; possible_turkic = encoding->possible_turkic; /* Look for a possible Turkic 'I'. */ - while (index < len && !possible_turkic(char_at(buffer, index))) + while (index < len && !possible_turkic(locale_info, char_at(buffer, + index))) ++index; if (index < len) { @@ -7150,7 +9047,8 @@ Py_LOCAL_INLINE(int) string_set_contains_ign(RE_State* state, PyObject* int i; /* Try all the alternatives to the 'I'. */ - count = encoding->all_turkic_i(char_at(buffer, index), codepoints); + count = encoding->all_turkic_i(locale_info, char_at(buffer, index), + codepoints); for (i = 0; i < count; i++) { int status; @@ -7184,50 +9082,473 @@ Py_LOCAL_INLINE(int) string_set_contains_ign(RE_State* state, PyObject* } } +/* Creates a partial string set for truncation at the left or right side. */ +Py_LOCAL_INLINE(int) make_partial_string_set(RE_State* state, RE_Node* node) { + PatternObject* pattern; + int partial_side; + PyObject* string_set; + PyObject* partial_set; + PyObject* iter = NULL; + PyObject* item = NULL; + PyObject* slice = NULL; + + pattern = state->pattern; + partial_side = state->partial_side; + if (partial_side != RE_PARTIAL_LEFT && partial_side != RE_PARTIAL_RIGHT) + return RE_ERROR_INTERNAL; + + /* Fetch the full string set. PyList_GET_ITEM borrows a reference. */ + string_set = PyList_GET_ITEM(pattern->named_list_indexes, node->values[0]); + if (!string_set) + return RE_ERROR_INTERNAL; + + /* Gets the list of partial string sets. */ + if (!pattern->partial_named_lists[partial_side]) { + size_t size; + + size = pattern->named_lists_count * sizeof(PyObject*); + pattern->partial_named_lists[partial_side] = re_alloc(size); + if (!pattern->partial_named_lists[partial_side]) + return RE_ERROR_INTERNAL; + + memset(pattern->partial_named_lists[partial_side], 0, size); + } + + /* Get the partial string set. */ + partial_set = pattern->partial_named_lists[partial_side][node->values[0]]; + if (partial_set) + return 1; + + /* Build the partial string set. */ + partial_set = PySet_New(NULL); + if (!partial_set) + return RE_ERROR_INTERNAL; + + iter = PyObject_GetIter(string_set); + if (!iter) + goto error; + + item = PyIter_Next(iter); + + while (item) { + Py_ssize_t len; + Py_ssize_t first; + Py_ssize_t last; + + len = PySequence_Length(item); + if (len == -1) + goto error; + + first = 0; + last = len; + + while (last - first > 1) { + int status; + + /* Shorten the entry. */ + if (partial_side == RE_PARTIAL_LEFT) + ++first; + else + --last; + + slice = PySequence_GetSlice(item, first, last); + if (!slice) + goto error; + + status = PySet_Add(partial_set, slice); + Py_DECREF(slice); + if (status < 0) + goto error; + } + + Py_DECREF(item); + item = PyIter_Next(iter); + } + + if (PyErr_Occurred()) + goto error; + + Py_DECREF(iter); + + pattern->partial_named_lists[partial_side][node->values[0]] = partial_set; + + return 1; + +error: + Py_XDECREF(item); + Py_XDECREF(iter); + Py_DECREF(partial_set); + + return RE_ERROR_INTERNAL; +} + /* Tries to match a string at the current position with a member of a string - * set, ignoring case, for a forwards or reverse search. + * set, forwards or backwards. + */ +Py_LOCAL_INLINE(int) string_set_match_fwdrev(RE_SafeState* safe_state, RE_Node* + node, BOOL reverse) { + RE_State* state; + Py_ssize_t min_len; + Py_ssize_t max_len; + Py_ssize_t text_available; + Py_ssize_t slice_available; + int partial_side; + Py_ssize_t len; + Py_ssize_t first; + Py_ssize_t last; + int status; + PyObject* string_set; + + state = safe_state->re_state; + + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + acquire_GIL(safe_state); + + if (reverse) { + text_available = state->text_pos; + slice_available = state->text_pos - state->slice_start; + partial_side = RE_PARTIAL_LEFT; + } else { + text_available = state->text_length - state->text_pos; + slice_available = state->slice_end - state->text_pos; + partial_side = RE_PARTIAL_RIGHT; + } + + /* Get as many characters as we need for the longest possible match. */ + len = min_ssize_t(max_len, slice_available); + + if (reverse) { + first = state->text_pos - len; + last = state->text_pos; + } else { + first = state->text_pos; + last = state->text_pos + len; + } + + /* If we didn't get all of the characters we need, is a partial match + * allowed? + */ + if (len < max_len && len == text_available && state->partial_side == + partial_side) { + if (len == 0) { + /* An empty string is always a possible partial match. */ + status = RE_ERROR_PARTIAL; + goto finished; + } + + /* Make a set of the possible partial matches. */ + status = make_partial_string_set(state, node); + if (status < 0) + goto finished; + + /* Fetch the partial string set. */ + string_set = + state->pattern->partial_named_lists[partial_side][node->values[0]]; + + /* Is the text we have a partial match? */ + status = string_set_contains(state, string_set, first, last); + if (status < 0) + goto finished; + + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= len; + else + state->text_pos += len; + + status = RE_ERROR_PARTIAL; + goto finished; + } + } + + /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, + node->values[0]); + if (!string_set) { + status = RE_ERROR_INTERNAL; + goto finished; + } + + /* We've already looked for a partial match (if allowed), but what about a + * complete match? + */ + while (len >= min_len) { + status = string_set_contains(state, string_set, first, last); + + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= len; + else + state->text_pos += len; + + status = 1; + goto finished; + } + + /* Look for a shorter match. */ + --len; + if (reverse) + ++first; + else + --last; + } + + /* No match. */ + status = 0; + +finished: + release_GIL(safe_state); + + return status; +} + +/* Tries to match a string at the current position with a member of a string + * set, ignoring case, forwards or backwards. + */ +Py_LOCAL_INLINE(int) string_set_match_fld_fwdrev(RE_SafeState* safe_state, + RE_Node* node, BOOL reverse) { + RE_State* state; + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + folded); + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + Py_ssize_t folded_charsize; + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + Py_ssize_t min_len; + Py_ssize_t max_len; + Py_ssize_t buf_len; + void* folded; + int status; + BOOL* end_of_fold = NULL; + Py_ssize_t text_available; + Py_ssize_t slice_available; + Py_ssize_t t_pos; + Py_ssize_t f_pos; + int step; + int partial_side; + Py_ssize_t len; + Py_ssize_t consumed; + Py_UCS4 codepoints[RE_MAX_FOLDED]; + PyObject* string_set; + Py_ssize_t first; + Py_ssize_t last; + + state = safe_state->re_state; + full_case_fold = state->encoding->full_case_fold; + char_at = state->char_at; + + /* The folded string will have the same width as the original string. */ + folded_charsize = state->charsize; + + switch (folded_charsize) { + case 1: + set_char_at = bytes1_set_char_at; + break; + case 2: + set_char_at = bytes2_set_char_at; + break; + case 4: + set_char_at = bytes4_set_char_at; + break; + default: + return RE_ERROR_INTERNAL; + } + + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + acquire_GIL(safe_state); + + /* Allocate a buffer for the folded string. */ + buf_len = max_len + RE_MAX_FOLDED; + folded = re_alloc((size_t)(buf_len * folded_charsize)); + if (!folded) { + status = RE_ERROR_MEMORY; + goto finished; + } + + end_of_fold = re_alloc((size_t)buf_len * sizeof(BOOL)); + if (!end_of_fold) { + status = RE_ERROR_MEMORY; + goto finished; + } + + memset(end_of_fold, 0, (size_t)buf_len * sizeof(BOOL)); + + if (reverse) { + text_available = state->text_pos; + slice_available = state->text_pos - state->slice_start; + t_pos = state->text_pos - 1; + f_pos = buf_len; + step = -1; + partial_side = RE_PARTIAL_LEFT; + } else { + text_available = state->text_length - state->text_pos; + slice_available = state->slice_end - state->text_pos; + t_pos = state->text_pos; + f_pos = 0; + step = 1; + partial_side = RE_PARTIAL_RIGHT; + } + + /* We can stop getting characters as soon as the case-folded string is long + * enough (each codepoint from the text can expand to more than one folded + * codepoint). + */ + len = 0; + end_of_fold[len] = TRUE; + + consumed = 0; + while (len < max_len && consumed < slice_available) { + int count; + int j; + + count = full_case_fold(state->locale_info, char_at(state->text, t_pos), + codepoints); + + if (reverse) + f_pos -= count; + + for (j = 0; j < count; j++) + set_char_at(folded, f_pos + j, codepoints[j]); + + if (!reverse) + f_pos += count; + + len += count; + end_of_fold[len] = TRUE; + ++consumed; + t_pos += step; + } + + if (reverse) { + first = f_pos; + last = buf_len; + } else { + first = 0; + last = f_pos; + } + + /* If we didn't get all of the characters we need, is a partial match + * allowed? + */ + if (len < max_len && len == text_available && state->partial_side == + partial_side) { + if (len == 0) { + /* An empty string is always a possible partial match. */ + status = RE_ERROR_PARTIAL; + goto finished; + } + + /* Make a set of the possible partial matches. */ + status = make_partial_string_set(state, node); + if (status < 0) + goto finished; + + /* Fetch the partial string set. */ + string_set = + state->pattern->partial_named_lists[partial_side][node->values[0]]; + + /* Is the text we have a partial match? */ + status = string_set_contains_ign(state, string_set, folded, first, + last, folded_charsize); + if (status < 0) + goto finished; + + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= consumed; + else + state->text_pos += consumed; + + status = RE_ERROR_PARTIAL; + goto finished; + } + } + + /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, + node->values[0]); + if (!string_set) { + status = RE_ERROR_INTERNAL; + goto finished; + } + + /* We've already looked for a partial match (if allowed), but what about a + * complete match? + */ + while (len >= min_len) { + if (end_of_fold[len]) { + status = string_set_contains_ign(state, string_set, folded, first, + last, folded_charsize); + + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= consumed; + else + state->text_pos += consumed; + + status = 1; + goto finished; + } + + --consumed; + } + + /* Look for a shorter match. */ + --len; + if (reverse) + ++first; + else + --last; + } + + /* No match. */ + status = 0; + +finished: + re_dealloc(end_of_fold); + re_dealloc(folded); + + release_GIL(safe_state); + + return status; +} + +/* Tries to match a string at the current position with a member of a string + * set, ignoring case, forwards or backwards. */ Py_LOCAL_INLINE(int) string_set_match_ign_fwdrev(RE_SafeState* safe_state, RE_Node* node, BOOL reverse) { - Py_ssize_t index; - Py_ssize_t min_len; - Py_ssize_t max_len; RE_State* state; - Py_ssize_t available; + Py_UCS4 (*simple_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch); Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; - Py_ssize_t text_pos; - RE_EncodingTable* encoding; - Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); Py_ssize_t folded_charsize; void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + Py_ssize_t min_len; + Py_ssize_t max_len; void* folded; - PyObject* string_set; int status; + Py_ssize_t text_available; + Py_ssize_t slice_available; + Py_ssize_t t_pos; + Py_ssize_t f_pos; + int step; + int partial_side; Py_ssize_t len; - - index = node->values[0]; - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; + Py_ssize_t i; + Py_ssize_t first; + Py_ssize_t last; + PyObject* string_set; state = safe_state->re_state; - - available = reverse ? state->text_pos - state->slice_start : - state->slice_end - state->text_pos; - - if (min_len > available) - /* Too few characters for any match. */ - return 0; - - max_len = RE_MIN(max_len, available); - + simple_case_fold = state->encoding->simple_case_fold; char_at = state->char_at; - text = state->text; - text_pos = state->text_pos; - encoding = state->encoding; - simple_case_fold = encoding->simple_case_fold; - - acquire_GIL(safe_state); /* The folded string will have the same width as the original string. */ folded_charsize = state->charsize; @@ -7243,499 +9564,138 @@ Py_LOCAL_INLINE(int) string_set_match_ign_fwdrev(RE_SafeState* safe_state, set_char_at = bytes4_set_char_at; break; default: - return 0; + return RE_ERROR_INTERNAL; } + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + acquire_GIL(safe_state); + /* Allocate a buffer for the folded string. */ - folded = re_alloc(max_len * folded_charsize); - if (!folded) - goto error; + folded = re_alloc((size_t)(max_len * folded_charsize)); + if (!folded) { + status = RE_ERROR_MEMORY; + goto finished; + } - /* Fetch the string set. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, index); - if (!string_set) - goto error; + if (reverse) { + text_available = state->text_pos; + slice_available = state->text_pos - state->slice_start; + t_pos = state->text_pos - 1; + f_pos = max_len - 1; + step = -1; + partial_side = RE_PARTIAL_LEFT; + } else { + text_available = state->text_length - state->text_pos; + slice_available = state->slice_end - state->text_pos; + t_pos = state->text_pos; + f_pos = 0; + step = 1; + partial_side = RE_PARTIAL_RIGHT; + } - status = 0; + /* Get as many characters as we need for the longest possible match. */ + len = min_ssize_t(max_len, slice_available); - /* Attempt matches for a decreasing length. */ - for (len = max_len; status == 0 && len >= min_len; len--) { - Py_ssize_t offset; - Py_ssize_t inc_len; - int i; + for (i = 0; i < len; i ++) { + Py_UCS4 ch; - if (reverse) { - offset = -len; - inc_len = -len; - } else { - offset = 0; - inc_len = len; + ch = simple_case_fold(state->locale_info, char_at(state->text, t_pos)); + set_char_at(folded, f_pos, ch); + t_pos += step; + f_pos += step; + } + + if (reverse) { + first = f_pos; + last = max_len; + } else { + first = 0; + last = f_pos; + } + + /* If we didn't get all of the characters we need, is a partial match + * allowed? + */ + if (len < max_len && len == text_available && state->partial_side == + partial_side) { + if (len == 0) { + /* An empty string is always a possible partial match. */ + status = RE_ERROR_PARTIAL; + goto finished; } - for (i = 0; i < len; i++) { - Py_UCS4 ch; + /* Make a set of the possible partial matches. */ + status = make_partial_string_set(state, node); + if (status < 0) + goto finished; - ch = simple_case_fold(char_at(text, text_pos + offset + i)); - set_char_at(folded, i, ch); - } + /* Fetch the partial string set. */ + string_set = + state->pattern->partial_named_lists[partial_side][node->values[0]]; - status = string_set_contains_ign(state, string_set, folded, 0, len, - folded_charsize); + /* Is the text we have a partial match? */ + status = string_set_contains_ign(state, string_set, folded, first, + last, folded_charsize); + if (status < 0) + goto finished; - if (status == 1) + if (status == 1) { /* Advance past the match. */ - state->text_pos += inc_len; - } + if (reverse) + state->text_pos -= len; + else + state->text_pos += len; - re_dealloc(folded); + status = RE_ERROR_PARTIAL; + goto finished; + } + } - release_GIL(safe_state); + /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, + node->values[0]); + if (!string_set) { + status = RE_ERROR_INTERNAL; + goto finished; + } - return status; + /* We've already looked for a partial match (if allowed), but what about a + * complete match? + */ + while (len >= min_len) { + status = string_set_contains_ign(state, string_set, folded, first, + last, folded_charsize); -error: - re_dealloc(folded); + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= len; + else + state->text_pos += len; - release_GIL(safe_state); + status = 1; + goto finished; + } - return RE_ERROR_INTERNAL; -} - -/* Tries to match a string at the current position with a member of a string - * set. - */ -Py_LOCAL_INLINE(int) string_set_match(RE_SafeState* safe_state, RE_Node* node) - { - Py_ssize_t index; - Py_ssize_t min_len; - Py_ssize_t max_len; - RE_State* state; - Py_ssize_t available; - void* (*point_to)(void* text, Py_ssize_t pos); - void* text; - Py_ssize_t text_pos; - PyObject* string_set; - int status; - Py_ssize_t len; - - index = node->values[0]; - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; - - state = safe_state->re_state; - - available = state->slice_end - state->text_pos; - if (min_len > available) - /* Too few characters for any match. */ - return 0; - - max_len = RE_MIN(max_len, available); - - point_to = state->point_to; - text = state->text; - text_pos = state->text_pos; - - acquire_GIL(safe_state); - - /* Fetch the string set. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, index); - if (!string_set) - goto error; - - status = 0; - - /* Attempt matches for a decreasing length. */ - for (len = max_len; status == 0 && len >= min_len; len--) { - PyObject* string; - - if (state->is_unicode) - string = build_unicode_value(point_to(text, text_pos), len, - state->charsize); + /* Look for a shorter match. */ + --len; + if (reverse) + ++first; else - string = build_bytes_value(point_to(text, text_pos), len, - state->charsize); - if (!string) - goto error; - - status = PySet_Contains(string_set, string); - Py_DECREF(string); - - if (status == 1) - /* Advance past the match. */ - state->text_pos += len; + --last; } - release_GIL(safe_state); - - return status; - -error: - release_GIL(safe_state); - - return RE_ERROR_INTERNAL; -} - -/* Tries to match a string at the current position with a member of a string - * set, ignoring case. - */ -Py_LOCAL_INLINE(int) string_set_match_fld(RE_SafeState* safe_state, RE_Node* - node) { - Py_ssize_t index; - Py_ssize_t min_len; - Py_ssize_t max_len; - RE_State* state; - Py_ssize_t available; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; - Py_ssize_t text_pos; - RE_EncodingTable* encoding; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_ssize_t buf_size; - Py_ssize_t folded_charsize; - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - void* folded; - PyObject* string_set; - int status; - Py_ssize_t end_fetch; - Py_ssize_t len; - Py_UCS4 codepoints[RE_MAX_FOLDED]; - - index = node->values[0]; - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; - - state = safe_state->re_state; - - available = state->slice_end - state->text_pos; - if ((Py_ssize_t)possible_unfolded_length(min_len) > available) - /* Too few characters for any match. */ - return 0; - - char_at = state->char_at; - text = state->text; - text_pos = state->text_pos; - encoding = state->encoding; - full_case_fold = encoding->full_case_fold; - - /* The folded string will have the same width as the original string. */ - folded_charsize = state->charsize; - - switch (folded_charsize) { - case 1: - set_char_at = bytes1_set_char_at; - break; - case 2: - set_char_at = bytes2_set_char_at; - break; - case 4: - set_char_at = bytes4_set_char_at; - break; - default: - return 0; - } - - acquire_GIL(safe_state); - - /* Allocate a buffer for the folded string, plus a little extra. */ - buf_size = max_len + RE_MAX_FOLDED; - folded = re_alloc(buf_size * folded_charsize); - if (!folded) - goto error; - - /* Fetch the string set. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, index); - if (!string_set) - goto error; - + /* No match. */ status = 0; - /* Attempt matches for a decreasing length. */ - end_fetch = text_pos + max_len; - - for (len = max_len; status == 0 && len >= min_len; len--) { - Py_ssize_t pos; - int folded_len; - - pos = text_pos; - folded_len = 0; - - /* Fetch until we have enough characters. */ - while (pos < end_fetch && folded_len < len) { - int count; - int i; - - count = full_case_fold(char_at(text, pos), codepoints); - - for (i = 0; i < count; i++) { - Py_UCS4 ch; - - ch = codepoints[i]; - set_char_at(folded, folded_len + i, ch); - } - - folded_len += count; - - ++pos; - } - - /* Do we have an acceptable number? */ - if (min_len <= folded_len && folded_len <= len) { - status = string_set_contains_ign(state, string_set, folded, 0, - folded_len, folded_charsize); - - if (status == 1) - /* Advance past the match. */ - state->text_pos = pos; - } - - /* If we got fewer than expected, next time we want still fewer. */ - len = RE_MIN(len, folded_len); - - /* Fetch one fewer next time. */ - end_fetch = pos - 1; - } - +finished: re_dealloc(folded); release_GIL(safe_state); return status; - -error: - re_dealloc(folded); - - release_GIL(safe_state); - - return RE_ERROR_INTERNAL; -} - -/* Tries to match a string at the current position with a member of a string - * set, ignoring case. - */ -Py_LOCAL_INLINE(int) string_set_match_fld_rev(RE_SafeState* safe_state, - RE_Node* node) { - Py_ssize_t index; - Py_ssize_t min_len; - Py_ssize_t max_len; - RE_State* state; - Py_ssize_t available; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; - Py_ssize_t text_pos; - RE_EncodingTable* encoding; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_ssize_t buf_size; - Py_ssize_t folded_charsize; - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - void* (*point_to)(void* text, Py_ssize_t pos); - void* folded; - PyObject* string_set; - int status; - Py_ssize_t end_fetch; - Py_ssize_t len; - Py_UCS4 codepoints[RE_MAX_FOLDED]; - - index = node->values[0]; - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; - - state = safe_state->re_state; - - available = state->text_pos - state->slice_start; - if ((Py_ssize_t)possible_unfolded_length(min_len) > available) - /* Too few characters for any match. */ - return 0; - - char_at = state->char_at; - text = state->text; - text_pos = state->text_pos; - encoding = state->encoding; - full_case_fold = encoding->full_case_fold; - - /* The folded string will have the same width as the original string. */ - folded_charsize = state->charsize; - - switch (folded_charsize) { - case 1: - set_char_at = bytes1_set_char_at; - point_to = bytes1_point_to; - break; - case 2: - set_char_at = bytes2_set_char_at; - point_to = bytes2_point_to; - break; - case 4: - set_char_at = bytes4_set_char_at; - point_to = bytes4_point_to; - break; - default: - return 0; - } - - acquire_GIL(safe_state); - - /* Allocate a buffer for the folded string, plus a little extra. */ - buf_size = max_len + RE_MAX_FOLDED; - folded = re_alloc(buf_size * folded_charsize); - if (!folded) - goto error; - - /* Fetch the string set. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, index); - if (!string_set) - goto error; - - status = 0; - - /* Attempt matches for a decreasing length. */ - end_fetch = text_pos - max_len; - - for (len = max_len; status == 0 && len >= min_len; len--) { - Py_ssize_t pos; - int folded_len; - - pos = text_pos; - folded_len = 0; - - /* Fetch until we have enough characters. */ - while (pos > end_fetch && folded_len < len) { - int count; - int i; - - count = full_case_fold(char_at(text, pos - 1), codepoints); - - folded_len += count; - - for (i = 0; i < count; i++) { - Py_UCS4 ch; - - ch = codepoints[i]; - set_char_at(folded, buf_size - folded_len + i, ch); - } - - --pos; - } - - /* Do we have an acceptable number? */ - if (min_len <= folded_len && folded_len <= len) { - status = string_set_contains_ign(state, string_set, - point_to(folded, buf_size - len), 0, folded_len, - folded_charsize); - - if (status == 1) - /* Advance past the match. */ - state->text_pos = pos; - } - - /* If we got fewer than expected, next time we want still fewer. */ - len = RE_MIN(len, folded_len); - - /* Fetch one fewer next time. */ - end_fetch = pos + 1; - } - - re_dealloc(folded); - - release_GIL(safe_state); - - return status; - -error: - re_dealloc(folded); - - release_GIL(safe_state); - - return RE_ERROR_INTERNAL; -} - -/* Tries to match a string at the current position with a member of a string - * set, ignoring case, for a forwards search. - */ -Py_LOCAL_INLINE(int) string_set_match_ign(RE_SafeState* safe_state, RE_Node* - node) { - return string_set_match_ign_fwdrev(safe_state, node, FALSE); -} - -/* Tries to match a string at the current position with a member of a string - * set, ignoring case, for a reverse search. - */ -Py_LOCAL_INLINE(int) string_set_match_ign_rev(RE_SafeState* safe_state, - RE_Node* node) { - return string_set_match_ign_fwdrev(safe_state, node, TRUE); -} - -/* Tries to match a string at the current position with a member of a string - * set. - */ -Py_LOCAL_INLINE(int) string_set_match_rev(RE_SafeState* safe_state, RE_Node* - node) { - Py_ssize_t index; - Py_ssize_t min_len; - Py_ssize_t max_len; - RE_State* state; - Py_ssize_t available; - void* (*point_to)(void* text, Py_ssize_t pos); - void* text; - Py_ssize_t text_pos; - PyObject* string_set; - int status; - Py_ssize_t len; - - index = node->values[0]; - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; - - state = safe_state->re_state; - - available = state->text_pos - state->slice_start; - if (min_len > available) - /* Too few characters for any match. */ - return 0; - - max_len = RE_MIN(max_len, available); - - point_to = state->point_to; - text = state->text; - text_pos = state->text_pos; - - acquire_GIL(safe_state); - - /* Fetch the string set. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, index); - if (!string_set) - goto error; - - status = 0; - - /* Attempt matches for a decreasing length. */ - for (len = max_len; status == 0 && len >= min_len; len--) { - PyObject* string; - - if (state->is_unicode) - string = build_unicode_value(point_to(text, text_pos - len), len, - state->charsize); - else - string = build_bytes_value(point_to(text, text_pos - len), len, - state->charsize); - if (!string) - goto error; - - status = PySet_Contains(string_set, string); - Py_DECREF(string); - - if (status == 1) - /* Advance past the match. */ - state->text_pos -= len; - } - - release_GIL(safe_state); - - return status; - -error: - release_GIL(safe_state); - - return RE_ERROR_INTERNAL; } /* Checks whether any additional fuzzy error is permitted. */ @@ -7765,42 +9725,72 @@ Py_LOCAL_INLINE(BOOL) this_error_permitted(RE_State* state, int fuzzy_type) { values[RE_FUZZY_VAL_COST_BASE + fuzzy_type] <= state->max_cost; } -Py_LOCAL_INLINE(BOOL) next_fuzzy_match_one(RE_State* state, RE_FuzzyData* data) +/* Checks whether we've reachsd the end of the text during a fuzzy partial + * match. + */ +Py_LOCAL_INLINE(int) check_fuzzy_partial(RE_State* state, Py_ssize_t text_pos) { + switch (state->partial_side) { + case RE_PARTIAL_LEFT: + if (text_pos < 0) + return RE_ERROR_PARTIAL; + break; + case RE_PARTIAL_RIGHT: + if (text_pos > state->text_length) + return RE_ERROR_PARTIAL; + break; + } + + return RE_ERROR_FAILURE; +} + +/* Checks a fuzzy match of an item. */ +Py_LOCAL_INLINE(int) next_fuzzy_match_item(RE_State* state, RE_FuzzyData* data, + BOOL is_string, int step) { Py_ssize_t new_pos; if (this_error_permitted(state, data->fuzzy_type)) { switch (data->fuzzy_type) { case RE_FUZZY_DEL: /* Could a character at text_pos have been deleted? */ - data->new_node = data->new_node->next_1.node; - return TRUE; + if (is_string) + data->new_string_pos += step; + else + data->new_node = data->new_node->next_1.node; + return RE_ERROR_SUCCESS; case RE_FUZZY_INS: /* Could the character at text_pos have been inserted? */ - new_pos = data->new_text_pos + data->step; - if (data->permit_insertion && state->slice_start <= new_pos && - new_pos <= state->slice_end) { - data->new_text_pos = new_pos; - return TRUE; - } - break; - case RE_FUZZY_SUB: - /* Could the character at text_pos have been substituted? */ - new_pos = data->new_text_pos + data->step; + if (!data->permit_insertion) + return RE_ERROR_FAILURE; + + new_pos = data->new_text_pos + step; if (state->slice_start <= new_pos && new_pos <= state->slice_end) { data->new_text_pos = new_pos; - data->new_node = data->new_node->next_1.node; - return TRUE; + return RE_ERROR_SUCCESS; } - break; + + return check_fuzzy_partial(state, new_pos); + case RE_FUZZY_SUB: + /* Could the character at text_pos have been substituted? */ + new_pos = data->new_text_pos + step; + if (state->slice_start <= new_pos && new_pos <= state->slice_end) { + data->new_text_pos = new_pos; + if (is_string) + data->new_string_pos += step; + else + data->new_node = data->new_node->next_1.node; + return RE_ERROR_SUCCESS; + } + + return check_fuzzy_partial(state, new_pos); } } - return FALSE; + return RE_ERROR_FAILURE; } -/* Tries a fuzzy match of a single-character item. */ -Py_LOCAL_INLINE(BOOL) fuzzy_match_one(RE_SafeState* safe_state, BOOL search, +/* Tries a fuzzy match of an item of width 0 or 1. */ +Py_LOCAL_INLINE(int) fuzzy_match_item(RE_SafeState* safe_state, BOOL search, Py_ssize_t* text_pos, RE_Node** node, int step) { RE_State* state; RE_FuzzyData data; @@ -7812,165 +9802,25 @@ Py_LOCAL_INLINE(BOOL) fuzzy_match_one(RE_SafeState* safe_state, BOOL search, if (!any_error_permitted(state)) { *node = NULL; - return TRUE; + return RE_ERROR_SUCCESS; } data.new_text_pos = *text_pos; data.new_node = *node; - data.step = step; fuzzy_info = &state->fuzzy_info; values = fuzzy_info->node->values; - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || data.new_text_pos != - state->search_anchor; - - for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - if (next_fuzzy_match_one(state, &data)) - goto found; - } - - *node = NULL; - return TRUE; - -found: - if (!add_backtrack(safe_state, (*node)->op)) - return FALSE; - bt_data = state->backtrack; - bt_data->fuzzy_one.position.text_pos = *text_pos; - bt_data->fuzzy_one.position.node = *node; - bt_data->fuzzy_one.fuzzy_type = (RE_INT8)data.fuzzy_type; - bt_data->fuzzy_one.step = (RE_INT8)step; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - *text_pos = data.new_text_pos; - *node = data.new_node; - - return TRUE; -} - -/* Retries a fuzzy match of a single-character item. */ -Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_one(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node** node) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - data.new_text_pos = bt_data->fuzzy_one.position.text_pos; - data.new_node = bt_data->fuzzy_one.position.node; - data.fuzzy_type = bt_data->fuzzy_one.fuzzy_type; - data.step = bt_data->fuzzy_one.step; - - --fuzzy_info->counts[data.fuzzy_type]; - --fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - --state->total_errors; - state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || data.new_text_pos != - state->search_anchor; - - for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - if (next_fuzzy_match_one(state, &data)) - goto found; - } - - discard_backtrack(state); - *node = NULL; - return TRUE; - -found: - bt_data->fuzzy_one.fuzzy_type = (RE_INT8)data.fuzzy_type; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - *text_pos = data.new_text_pos; - *node = data.new_node; - - return TRUE; -} - -Py_LOCAL_INLINE(BOOL) next_fuzzy_match_zero(RE_State* state, RE_FuzzyData* - data) { - if (this_error_permitted(state, data->fuzzy_type)) { - switch (data->fuzzy_type) { - case RE_FUZZY_DEL: - /* Could a character at text_pos have been deleted? */ - data->new_node = data->new_node->next_1.node; - return TRUE; - case RE_FUZZY_INS: - /* Could the character at text_pos have been inserted? */ - if (data->permit_insertion && data->new_text_pos != data->limit) { - data->new_text_pos += data->step; - return TRUE; - } - break; - case RE_FUZZY_SUB: - /* Could the character at text_pos have been substituted? */ - if (data->new_text_pos != data->limit) { - data->new_node = data->new_node->next_1.node; - return TRUE; - } - break; + if (step == 0) { + if (data.new_node->status & RE_STATUS_REVERSE) { + data.step = -1; + data.limit = state->slice_start; + } else { + data.step = 1; + data.limit = state->slice_end; } - } - - return FALSE; -} - -/* Tries a fuzzy match of a zero-width item. */ -Py_LOCAL_INLINE(BOOL) fuzzy_match_zero(RE_SafeState* safe_state, BOOL search, - Py_ssize_t* text_pos, RE_Node** node) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - - state = safe_state->re_state; - - if (!any_error_permitted(state)) { - *node = NULL; - return TRUE; - } - - data.new_text_pos = *text_pos; - data.new_node = *node; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - if (data.new_node->status & RE_STATUS_REVERSE) { - data.step = -1; - data.limit = state->slice_start; - } else { - data.step = 1; - data.limit = state->slice_end; - } + } else + data.step = step; /* Permit insertion except initially when searching (it's better just to * start searching one character later). @@ -7980,20 +9830,27 @@ Py_LOCAL_INLINE(BOOL) fuzzy_match_zero(RE_SafeState* safe_state, BOOL search, for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; data.fuzzy_type++) { - if (next_fuzzy_match_zero(state, &data)) + int status; + + status = next_fuzzy_match_item(state, &data, FALSE, step); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) goto found; } *node = NULL; - return TRUE; + return RE_ERROR_SUCCESS; found: if (!add_backtrack(safe_state, (*node)->op)) - return FALSE; + return RE_ERROR_FAILURE; bt_data = state->backtrack; - bt_data->fuzzy_zero.position.text_pos = *text_pos; - bt_data->fuzzy_zero.position.node = *node; - bt_data->fuzzy_zero.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_item.position.text_pos = *text_pos; + bt_data->fuzzy_item.position.node = *node; + bt_data->fuzzy_item.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_item.step = (RE_INT8)step; ++fuzzy_info->counts[data.fuzzy_type]; ++fuzzy_info->counts[RE_FUZZY_ERR]; @@ -8004,34 +9861,28 @@ found: *text_pos = data.new_text_pos; *node = data.new_node; - return TRUE; + return RE_ERROR_SUCCESS; } -/* Retries a fuzzy match of a zero-width item. */ -Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_zero(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node** node) { +/* Retries a fuzzy match of a item of width 0 or 1. */ +Py_LOCAL_INLINE(int) retry_fuzzy_match_item(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node** node, BOOL advance) { RE_State* state; RE_FuzzyData data; RE_FuzzyInfo* fuzzy_info; RE_CODE* values; RE_BacktrackData* bt_data; + int step; state = safe_state->re_state; fuzzy_info = &state->fuzzy_info; values = fuzzy_info->node->values; bt_data = state->backtrack; - data.new_text_pos = bt_data->fuzzy_zero.position.text_pos; - data.new_node = bt_data->fuzzy_zero.position.node; - data.fuzzy_type = bt_data->fuzzy_zero.fuzzy_type; - - if (data.new_node->status & RE_STATUS_REVERSE) { - data.step = -1; - data.limit = state->slice_start; - } else { - data.step = 1; - data.limit = state->slice_end; - } + data.new_text_pos = bt_data->fuzzy_item.position.text_pos; + data.new_node = bt_data->fuzzy_item.position.node; + data.fuzzy_type = bt_data->fuzzy_item.fuzzy_type; + data.step = bt_data->fuzzy_item.step; if (data.fuzzy_type >= 0) { --fuzzy_info->counts[data.fuzzy_type]; @@ -8048,18 +9899,26 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_zero(RE_SafeState* safe_state, BOOL data.permit_insertion = !search || data.new_text_pos != state->search_anchor; + step = advance ? data.step : 0; + for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; data.fuzzy_type++) { - if (next_fuzzy_match_zero(state, &data)) + int status; + + status = next_fuzzy_match_item(state, &data, FALSE, step); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) goto found; } discard_backtrack(state); *node = NULL; - return TRUE; + return RE_ERROR_SUCCESS; found: - bt_data->fuzzy_zero.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_item.fuzzy_type = (RE_INT8)data.fuzzy_type; ++fuzzy_info->counts[data.fuzzy_type]; ++fuzzy_info->counts[RE_FUZZY_ERR]; @@ -8070,11 +9929,11 @@ found: *text_pos = data.new_text_pos; *node = data.new_node; - return TRUE; + return RE_ERROR_SUCCESS; } /* Tries a fuzzy insertion. */ -Py_LOCAL_INLINE(BOOL) fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t +Py_LOCAL_INLINE(int) fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t text_pos, RE_Node* node) { RE_State* state; RE_BacktrackData* bt_data; @@ -8085,7 +9944,7 @@ Py_LOCAL_INLINE(BOOL) fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t /* No insertion or deletion. */ if (!add_backtrack(safe_state, node->op)) - return FALSE; + return RE_ERROR_FAILURE; bt_data = state->backtrack; bt_data->fuzzy_insert.position.text_pos = text_pos; bt_data->fuzzy_insert.position.node = node; @@ -8103,13 +9962,13 @@ Py_LOCAL_INLINE(BOOL) fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t fuzzy_info->counts[RE_FUZZY_INS] < values[RE_FUZZY_VAL_MIN_INS] || fuzzy_info->counts[RE_FUZZY_SUB] < values[RE_FUZZY_VAL_MIN_SUB] || fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MIN_ERR]) - state->too_few_errors = TRUE; + state->too_few_errors = RE_ERROR_SUCCESS; - return TRUE; + return RE_ERROR_SUCCESS; } /* Retries a fuzzy insertion. */ -Py_LOCAL_INLINE(BOOL) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t* +Py_LOCAL_INLINE(int) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t* text_pos, RE_Node** node) { RE_State* state; RE_FuzzyInfo* fuzzy_info; @@ -8139,7 +9998,7 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t* /* Could the character at text_pos have been inserted? */ if (!this_error_permitted(state, RE_FUZZY_INS) || new_text_pos == limit) { - Py_ssize_t count; + size_t count; count = bt_data->fuzzy_insert.count; @@ -8152,7 +10011,7 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t* discard_backtrack(state); *node = NULL; - return TRUE; + return RE_ERROR_SUCCESS; } ++bt_data->fuzzy_insert.count; @@ -8171,50 +10030,16 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t* fuzzy_info->counts[RE_FUZZY_INS] < values[RE_FUZZY_VAL_MIN_INS] || fuzzy_info->counts[RE_FUZZY_SUB] < values[RE_FUZZY_VAL_MIN_SUB] || fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MIN_ERR]) - state->too_few_errors = TRUE; + state->too_few_errors = RE_ERROR_SUCCESS; - *text_pos = new_text_pos + step * bt_data->fuzzy_insert.count; + *text_pos = new_text_pos + step * (Py_ssize_t)bt_data->fuzzy_insert.count; *node = new_node; - return TRUE; -} - -Py_LOCAL_INLINE(BOOL) next_fuzzy_match_string(RE_State* state, RE_FuzzyData* - data) { - Py_ssize_t new_pos; - - if (this_error_permitted(state, data->fuzzy_type)) { - switch (data->fuzzy_type) { - case RE_FUZZY_DEL: - /* Could a character at text_pos have been deleted? */ - data->new_string_pos += data->step; - return TRUE; - case RE_FUZZY_INS: - /* Could the character at text_pos have been inserted? */ - new_pos = data->new_text_pos + data->step; - if (data->permit_insertion && state->slice_start <= new_pos && - new_pos <= state->slice_end) { - data->new_text_pos = new_pos; - return TRUE; - } - break; - case RE_FUZZY_SUB: - /* Could the character at text_pos have been substituted? */ - new_pos = data->new_text_pos + data->step; - if (state->slice_start <= new_pos && new_pos <= state->slice_end) { - data->new_text_pos = new_pos; - data->new_string_pos += data->step; - return TRUE; - } - break; - } - } - - return FALSE; + return RE_ERROR_SUCCESS; } /* Tries a fuzzy match of a string. */ -Py_LOCAL_INLINE(BOOL) fuzzy_match_string(RE_SafeState* safe_state, BOOL search, +Py_LOCAL_INLINE(int) fuzzy_match_string(RE_SafeState* safe_state, BOOL search, Py_ssize_t* text_pos, RE_Node* node, Py_ssize_t* string_pos, BOOL* matched, int step) { RE_State* state; @@ -8227,7 +10052,7 @@ Py_LOCAL_INLINE(BOOL) fuzzy_match_string(RE_SafeState* safe_state, BOOL search, if (!any_error_permitted(state)) { *matched = FALSE; - return TRUE; + return RE_ERROR_SUCCESS; } data.new_text_pos = *text_pos; @@ -8245,16 +10070,22 @@ Py_LOCAL_INLINE(BOOL) fuzzy_match_string(RE_SafeState* safe_state, BOOL search, for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; data.fuzzy_type++) { - if (next_fuzzy_match_string(state, &data)) + int status; + + status = next_fuzzy_match_item(state, &data, TRUE, data.step); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) goto found; } *matched = FALSE; - return TRUE; + return RE_ERROR_SUCCESS; found: if (!add_backtrack(safe_state, node->op)) - return FALSE; + return RE_ERROR_FAILURE; bt_data = state->backtrack; bt_data->fuzzy_string.position.text_pos = *text_pos; bt_data->fuzzy_string.position.node = node; @@ -8272,11 +10103,11 @@ found: *string_pos = data.new_string_pos; *matched = TRUE; - return TRUE; + return RE_ERROR_SUCCESS; } /* Retries a fuzzy match of a string. */ -Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string(RE_SafeState* safe_state, BOOL +Py_LOCAL_INLINE(int) retry_fuzzy_match_string(RE_SafeState* safe_state, BOOL search, Py_ssize_t* text_pos, RE_Node** node, Py_ssize_t* string_pos, BOOL* matched) { RE_State* state; @@ -8311,13 +10142,19 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string(RE_SafeState* safe_state, BOOL for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; data.fuzzy_type++) { - if (next_fuzzy_match_string(state, &data)) + int status; + + status = next_fuzzy_match_item(state, &data, TRUE, data.step); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) goto found; } discard_backtrack(state); *matched = FALSE; - return TRUE; + return RE_ERROR_SUCCESS; found: bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; @@ -8333,11 +10170,12 @@ found: *string_pos = data.new_string_pos; *matched = TRUE; - return TRUE; + return RE_ERROR_SUCCESS; } -Py_LOCAL_INLINE(BOOL) next_fuzzy_match_string_fld(RE_State* state, - RE_FuzzyData* data) { +/* Checks a fuzzy match of a atring. */ +Py_LOCAL_INLINE(int) next_fuzzy_match_string_fld(RE_State* state, RE_FuzzyData* + data) { int new_pos; if (this_error_permitted(state, data->fuzzy_type)) { @@ -8345,33 +10183,37 @@ Py_LOCAL_INLINE(BOOL) next_fuzzy_match_string_fld(RE_State* state, case RE_FUZZY_DEL: /* Could a character at text_pos have been deleted? */ data->new_string_pos += data->step; - return TRUE; + return RE_ERROR_SUCCESS; case RE_FUZZY_INS: /* Could the character at text_pos have been inserted? */ + if (!data->permit_insertion) + return RE_ERROR_FAILURE; + new_pos = data->new_folded_pos + data->step; - if (data->permit_insertion && 0 <= new_pos && new_pos <= - data->folded_len) { + if (0 <= new_pos && new_pos <= data->folded_len) { data->new_folded_pos = new_pos; - return TRUE; + return RE_ERROR_SUCCESS; } - break; + + return check_fuzzy_partial(state, new_pos); case RE_FUZZY_SUB: /* Could the character at text_pos have been substituted? */ new_pos = data->new_folded_pos + data->step; if (0 <= new_pos && new_pos <= data->folded_len) { data->new_folded_pos = new_pos; data->new_string_pos += data->step; - return TRUE; + return RE_ERROR_SUCCESS; } - break; + + return check_fuzzy_partial(state, new_pos); } } - return FALSE; + return RE_ERROR_FAILURE; } /* Tries a fuzzy match of a string, ignoring case. */ -Py_LOCAL_INLINE(BOOL) fuzzy_match_string_fld(RE_SafeState* safe_state, BOOL +Py_LOCAL_INLINE(int) fuzzy_match_string_fld(RE_SafeState* safe_state, BOOL search, Py_ssize_t* text_pos, RE_Node* node, Py_ssize_t* string_pos, int* folded_pos, int folded_len, BOOL* matched, int step) { RE_State* state; @@ -8385,7 +10227,7 @@ Py_LOCAL_INLINE(BOOL) fuzzy_match_string_fld(RE_SafeState* safe_state, BOOL if (!any_error_permitted(state)) { *matched = FALSE; - return TRUE; + return RE_ERROR_SUCCESS; } new_text_pos = *text_pos; @@ -8401,23 +10243,32 @@ Py_LOCAL_INLINE(BOOL) fuzzy_match_string_fld(RE_SafeState* safe_state, BOOL * start searching one character later). */ data.permit_insertion = !search || new_text_pos != state->search_anchor; - if (step > 0) - data.permit_insertion |= data.new_folded_pos != 0; - else - data.permit_insertion |= data.new_folded_pos != folded_len; + if (step > 0) { + if (data.new_folded_pos != 0) + data.permit_insertion = RE_ERROR_SUCCESS; + } else { + if (data.new_folded_pos != folded_len) + data.permit_insertion = RE_ERROR_SUCCESS; + } for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; data.fuzzy_type++) { - if (next_fuzzy_match_string_fld(state, &data)) + int status; + + status = next_fuzzy_match_string_fld(state, &data); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) goto found; } *matched = FALSE; - return TRUE; + return RE_ERROR_SUCCESS; found: if (!add_backtrack(safe_state, node->op)) - return FALSE; + return RE_ERROR_FAILURE; bt_data = state->backtrack; bt_data->fuzzy_string.position.text_pos = *text_pos; bt_data->fuzzy_string.position.node = node; @@ -8438,11 +10289,11 @@ found: *folded_pos = data.new_folded_pos; *matched = TRUE; - return TRUE; + return RE_ERROR_SUCCESS; } /* Retries a fuzzy match of a string, ignoring case. */ -Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string_fld(RE_SafeState* safe_state, +Py_LOCAL_INLINE(int) retry_fuzzy_match_string_fld(RE_SafeState* safe_state, BOOL search, Py_ssize_t* text_pos, RE_Node** node, Py_ssize_t* string_pos, int* folded_pos, BOOL* matched) { RE_State* state; @@ -8476,21 +10327,29 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string_fld(RE_SafeState* safe_state, * start searching one character later). */ data.permit_insertion = !search || new_text_pos != state->search_anchor; - if (data.step > 0) - data.permit_insertion |= data.new_folded_pos != 0; - else - data.permit_insertion |= data.new_folded_pos != - bt_data->fuzzy_string.folded_len; + if (data.step > 0) { + if (data.new_folded_pos != 0) + data.permit_insertion = RE_ERROR_SUCCESS; + } else { + if (data.new_folded_pos != bt_data->fuzzy_string.folded_len) + data.permit_insertion = RE_ERROR_SUCCESS; + } for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; data.fuzzy_type++) { - if (next_fuzzy_match_string_fld(state, &data)) + int status; + + status = next_fuzzy_match_string_fld(state, &data); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) goto found; } discard_backtrack(state); *matched = FALSE; - return TRUE; + return RE_ERROR_SUCCESS; found: bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; @@ -8507,11 +10366,12 @@ found: *folded_pos = data.new_folded_pos; *matched = TRUE; - return TRUE; + return RE_ERROR_SUCCESS; } -Py_LOCAL_INLINE(BOOL) next_fuzzy_match_string_fld2(RE_State* state, - RE_FuzzyData* data) { +/* Checks a fuzzy match of a atring. */ +Py_LOCAL_INLINE(int) next_fuzzy_match_group_fld(RE_State* state, RE_FuzzyData* + data) { int new_pos; if (this_error_permitted(state, data->fuzzy_type)) { @@ -8519,33 +10379,37 @@ Py_LOCAL_INLINE(BOOL) next_fuzzy_match_string_fld2(RE_State* state, case RE_FUZZY_DEL: /* Could a character at text_pos have been deleted? */ data->new_gfolded_pos += data->step; - return TRUE; + return RE_ERROR_SUCCESS; case RE_FUZZY_INS: /* Could the character at text_pos have been inserted? */ + if (!data->permit_insertion) + return RE_ERROR_FAILURE; + new_pos = data->new_folded_pos + data->step; - if (data->permit_insertion && 0 <= new_pos && new_pos <= - data->folded_len) { + if (0 <= new_pos && new_pos <= data->folded_len) { data->new_folded_pos = new_pos; - return TRUE; + return RE_ERROR_SUCCESS; } - break; + + return check_fuzzy_partial(state, new_pos); case RE_FUZZY_SUB: /* Could the character at text_pos have been substituted? */ new_pos = data->new_folded_pos + data->step; if (0 <= new_pos && new_pos <= data->folded_len) { data->new_folded_pos = new_pos; data->new_gfolded_pos += data->step; - return TRUE; + return RE_ERROR_SUCCESS; } - break; + + return check_fuzzy_partial(state, new_pos); } } - return FALSE; + return RE_ERROR_FAILURE; } /* Tries a fuzzy match of a group reference, ignoring case. */ -Py_LOCAL_INLINE(BOOL) fuzzy_match_string_fld2(RE_SafeState* safe_state, BOOL +Py_LOCAL_INLINE(int) fuzzy_match_group_fld(RE_SafeState* safe_state, BOOL search, Py_ssize_t* text_pos, RE_Node* node, int* folded_pos, int folded_len, Py_ssize_t* group_pos, int* gfolded_pos, int gfolded_len, BOOL* matched, int step) { @@ -8561,7 +10425,7 @@ Py_LOCAL_INLINE(BOOL) fuzzy_match_string_fld2(RE_SafeState* safe_state, BOOL if (!any_error_permitted(state)) { *matched = FALSE; - return TRUE; + return RE_ERROR_SUCCESS; } new_text_pos = *text_pos; @@ -8578,23 +10442,32 @@ Py_LOCAL_INLINE(BOOL) fuzzy_match_string_fld2(RE_SafeState* safe_state, BOOL * start searching one character later). */ data.permit_insertion = !search || new_text_pos != state->search_anchor; - if (step > 0) - data.permit_insertion |= data.new_folded_pos != 0; - else - data.permit_insertion |= data.new_folded_pos != folded_len; + if (data.step > 0) { + if (data.new_folded_pos != 0) + data.permit_insertion = RE_ERROR_SUCCESS; + } else { + if (data.new_folded_pos != folded_len) + data.permit_insertion = RE_ERROR_SUCCESS; + } for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; data.fuzzy_type++) { - if (next_fuzzy_match_string_fld2(state, &data)) + int status; + + status = next_fuzzy_match_group_fld(state, &data); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) goto found; } *matched = FALSE; - return TRUE; + return RE_ERROR_SUCCESS; found: if (!add_backtrack(safe_state, node->op)) - return FALSE; + return RE_ERROR_FAILURE; bt_data = state->backtrack; bt_data->fuzzy_string.position.text_pos = *text_pos; bt_data->fuzzy_string.position.node = node; @@ -8618,13 +10491,13 @@ found: *gfolded_pos = data.new_gfolded_pos; *matched = TRUE; - return TRUE; + return RE_ERROR_SUCCESS; } /* Retries a fuzzy match of a group reference, ignoring case. */ -Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string_fld2(RE_SafeState* safe_state, - BOOL search, Py_ssize_t* text_pos, RE_Node** node, int* folded_pos, - Py_ssize_t* group_pos, int* gfolded_pos, BOOL* matched) { +Py_LOCAL_INLINE(int) retry_fuzzy_match_group_fld(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node** node, int* folded_pos, Py_ssize_t* + group_pos, int* gfolded_pos, BOOL* matched) { RE_State* state; RE_FuzzyData data; RE_FuzzyInfo* fuzzy_info; @@ -8662,13 +10535,19 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string_fld2(RE_SafeState* safe_state, for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; data.fuzzy_type++) { - if (next_fuzzy_match_string_fld2(state, &data)) + int status; + + status = next_fuzzy_match_group_fld(state, &data); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) goto found; } discard_backtrack(state); *matched = FALSE; - return TRUE; + return RE_ERROR_SUCCESS; found: bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; @@ -8686,7 +10565,7 @@ found: *gfolded_pos = data.new_gfolded_pos; *matched = TRUE; - return TRUE; + return RE_ERROR_SUCCESS; } /* Locates the required string, if there's one. */ @@ -8711,15 +10590,23 @@ Py_LOCAL_INLINE(Py_ssize_t) locate_required_string(RE_SafeState* safe_state) { /* Search for the required string and calculate where to start matching. */ switch (pattern->req_string->op) { case RE_OP_STRING: + { + BOOL is_partial; + found_pos = string_search(safe_state, pattern->req_string, - state->text_pos, state->slice_end); + state->text_pos, state->slice_end, &is_partial); if (found_pos < 0) /* The required string wasn't found. */ return -1; + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + /* Record where the required string matched. */ state->req_pos = found_pos; - state->req_end = found_pos + pattern->req_string->value_count; + state->req_end = found_pos + + (Py_ssize_t)pattern->req_string->value_count; if (pattern->req_offset >= 0) { /* Step back from the required string to where we should start @@ -8730,13 +10617,21 @@ Py_LOCAL_INLINE(Py_ssize_t) locate_required_string(RE_SafeState* safe_state) { return found_pos; } break; + } case RE_OP_STRING_FLD: + { + BOOL is_partial; + found_pos = string_search_fld(safe_state, pattern->req_string, - state->text_pos, state->slice_end, &end_pos); + state->text_pos, state->slice_end, &end_pos, &is_partial); if (found_pos < 0) /* The required string wasn't found. */ return -1; + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + /* Record where the required string matched. */ state->req_pos = found_pos; state->req_end = end_pos; @@ -8750,77 +10645,25 @@ Py_LOCAL_INLINE(Py_ssize_t) locate_required_string(RE_SafeState* safe_state) { return found_pos; } break; + } case RE_OP_STRING_FLD_REV: + { + BOOL is_partial; + found_pos = string_search_fld_rev(safe_state, pattern->req_string, - state->text_pos, state->slice_start, &end_pos); + state->text_pos, state->slice_start, &end_pos, &is_partial); if (found_pos < 0) /* The required string wasn't found. */ return -1; + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + /* Record where the required string matched. */ state->req_pos = found_pos; state->req_end = end_pos; - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos += pattern->req_offset; - if (found_pos <= state->text_pos) - return found_pos; - } - break; - case RE_OP_STRING_IGN: - found_pos = string_search_ign(safe_state, pattern->req_string, - state->text_pos, state->slice_end); - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos + pattern->req_string->value_count; - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos -= pattern->req_offset; - if (found_pos >= state->text_pos) - return found_pos; - } - break; - case RE_OP_STRING_IGN_REV: - found_pos = string_search_ign_rev(safe_state, pattern->req_string, - state->text_pos, state->slice_start); - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos - pattern->req_string->value_count; - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos += pattern->req_offset; - if (found_pos <= state->text_pos) - return found_pos; - } - break; - case RE_OP_STRING_REV: - found_pos = string_search_rev(safe_state, pattern->req_string, - state->text_pos, state->slice_start); - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos - pattern->req_string->value_count; - if (pattern->req_offset >= 0) { /* Step back from the required string to where we should start * matching. @@ -8831,25 +10674,173 @@ Py_LOCAL_INLINE(Py_ssize_t) locate_required_string(RE_SafeState* safe_state) { } break; } + case RE_OP_STRING_IGN: + { + BOOL is_partial; + + found_pos = string_search_ign(safe_state, pattern->req_string, + state->text_pos, state->slice_end, &is_partial); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos + + (Py_ssize_t)pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos -= pattern->req_offset; + if (found_pos >= state->text_pos) + return found_pos; + } + break; + } + case RE_OP_STRING_IGN_REV: + { + BOOL is_partial; + + found_pos = string_search_ign_rev(safe_state, pattern->req_string, + state->text_pos, state->slice_start, &is_partial); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos - + (Py_ssize_t)pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos += pattern->req_offset; + if (found_pos <= state->text_pos) + return found_pos; + } + break; + } + case RE_OP_STRING_REV: + { + BOOL is_partial; + + found_pos = string_search_rev(safe_state, pattern->req_string, + state->text_pos, state->slice_start, &is_partial); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos - + (Py_ssize_t)pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos += pattern->req_offset; + if (found_pos <= state->text_pos) + return found_pos; + } + break; + } + } /* Start matching from the current position. */ return state->text_pos; } +/* Tries to match a character pattern. */ +Py_LOCAL_INLINE(int) match_one(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + switch (node->op) { + case RE_OP_ANY: + return try_match_ANY(state, node, text_pos); + case RE_OP_ANY_ALL: + return try_match_ANY_ALL(state, node, text_pos); + case RE_OP_ANY_ALL_REV: + return try_match_ANY_ALL_REV(state, node, text_pos); + case RE_OP_ANY_REV: + return try_match_ANY_REV(state, node, text_pos); + case RE_OP_ANY_U: + return try_match_ANY_U(state, node, text_pos); + case RE_OP_ANY_U_REV: + return try_match_ANY_U_REV(state, node, text_pos); + case RE_OP_CHARACTER: + return try_match_CHARACTER(state, node, text_pos); + case RE_OP_CHARACTER_IGN: + return try_match_CHARACTER_IGN(state, node, text_pos); + case RE_OP_CHARACTER_IGN_REV: + return try_match_CHARACTER_IGN_REV(state, node, text_pos); + case RE_OP_CHARACTER_REV: + return try_match_CHARACTER_REV(state, node, text_pos); + case RE_OP_PROPERTY: + return try_match_PROPERTY(state, node, text_pos); + case RE_OP_PROPERTY_IGN: + return try_match_PROPERTY_IGN(state, node, text_pos); + case RE_OP_PROPERTY_IGN_REV: + return try_match_PROPERTY_IGN_REV(state, node, text_pos); + case RE_OP_PROPERTY_REV: + return try_match_PROPERTY_REV(state, node, text_pos); + case RE_OP_RANGE: + return try_match_RANGE(state, node, text_pos); + case RE_OP_RANGE_IGN: + return try_match_RANGE_IGN(state, node, text_pos); + case RE_OP_RANGE_IGN_REV: + return try_match_RANGE_IGN_REV(state, node, text_pos); + case RE_OP_RANGE_REV: + return try_match_RANGE_REV(state, node, text_pos); + case RE_OP_SET_DIFF: + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + return try_match_SET(state, node, text_pos); + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + return try_match_SET_IGN(state, node, text_pos); + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + return try_match_SET_IGN_REV(state, node, text_pos); + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + return try_match_SET_REV(state, node, text_pos); + } + + return FALSE; +} + /* Performs a depth-first match or search from the context. */ Py_LOCAL_INLINE(int) basic_match(RE_SafeState* safe_state, RE_Node* start_node, BOOL search, BOOL recursive_call) { RE_State* state; - Py_ssize_t slice_start; - Py_ssize_t slice_end; - Py_ssize_t text_pos; RE_EncodingTable* encoding; + RE_LocaleInfo* locale_info; PatternObject* pattern; - Py_ssize_t text_length; RE_NextNode start_pair; - void* text; Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - BOOL (*has_property)(RE_CODE property, Py_UCS4 ch); Py_ssize_t pattern_step; /* The overall step of the pattern (forwards or backwards). */ Py_ssize_t string_pos; BOOL do_search_start; @@ -8857,20 +10848,14 @@ Py_LOCAL_INLINE(int) basic_match(RE_SafeState* safe_state, RE_Node* start_node, int folded_pos; int gfolded_pos; RE_Node* node; + int status; TRACE(("<>\n")) state = safe_state->re_state; - - slice_start = state->slice_start; - slice_end = state->slice_end; - text_pos = state->text_pos; - encoding = state->encoding; - + locale_info = state->locale_info; pattern = state->pattern; - text_length = state->text_length; - /* Look beyond any initial group node. */ start_pair.node = start_node; if (recursive_call) @@ -8883,7 +10868,7 @@ Py_LOCAL_INLINE(int) basic_match(RE_SafeState* safe_state, RE_Node* start_node, case RE_OP_END_OF_STRING: if (state->reverse) { /* Searching backwards. */ - if (text_pos != text_length) + if (state->text_pos != state->text_length) return RE_ERROR_FAILURE; /* Don't bother to search further because it's anchored. */ @@ -8893,7 +10878,7 @@ Py_LOCAL_INLINE(int) basic_match(RE_SafeState* safe_state, RE_Node* start_node, case RE_OP_START_OF_STRING: if (!state->reverse) { /* Searching forwards. */ - if (text_pos != 0) + if (state->text_pos != 0) return RE_ERROR_FAILURE; /* Don't bother to search further because it's anchored. */ @@ -8902,9 +10887,7 @@ Py_LOCAL_INLINE(int) basic_match(RE_SafeState* safe_state, RE_Node* start_node, break; } - text = state->text; char_at = state->char_at; - has_property = encoding->has_property; pattern_step = state->reverse ? -1 : 1; string_pos = -1; do_search_start = pattern->do_search_start; @@ -8947,18 +10930,18 @@ next_match_1: * a fast search for the next possible match. This enables us to * avoid the overhead of the call subsequently. */ - if (!search_start(safe_state, &start_pair, &new_position, 0)) - return RE_ERROR_FAILURE; + status = search_start(safe_state, &start_pair, &new_position, 0); + if (status != RE_ERROR_SUCCESS) + return status; node = new_position.node; - text_pos = new_position.text_pos; + state->text_pos = new_position.text_pos; if (node->op == RE_OP_SUCCESS) { /* Must the match advance past its start? */ - if (text_pos != state->search_anchor || !state->must_advance) { - state->text_pos = text_pos; + if (state->text_pos != state->search_anchor || + !state->must_advance) return RE_ERROR_SUCCESS; - } state->text_pos = state->match_pos + pattern_step; goto next_match_1; @@ -8971,40 +10954,46 @@ next_match_1: * search for the next possible match. */ node = start_node; - text_pos = state->text_pos; next_match_2: if (state->reverse) { - if (text_pos < slice_start) + if (state->text_pos < state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + return RE_ERROR_FAILURE; + } } else { - if (text_pos > slice_end) + if (state->text_pos > state->slice_end) { + if (state-> partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + return RE_ERROR_FAILURE; + } } - state->match_pos = text_pos; + state->match_pos = state->text_pos; if (node->op == RE_OP_SUCCESS) { /* Must the match advance past its start? */ - if (text_pos != state->search_anchor || !state->must_advance) { + if (state->text_pos != state->search_anchor || + !state->must_advance) { BOOL success; if (state->match_all && !recursive_call) { /* We want to match all of the slice. */ if (state->reverse) - success = text_pos == slice_start; + success = state->text_pos == state->slice_start; else - success = text_pos == slice_end; + success = state->text_pos == state->slice_end; } else success = TRUE; - if (success) { - state->text_pos = text_pos; + if (success) return RE_ERROR_SUCCESS; - } } - text_pos = state->match_pos + pattern_step; + state->text_pos = state->match_pos + pattern_step; goto next_match_2; } } @@ -9019,7 +11008,7 @@ next_match_2: advance: /* The main matching loop. */ for (;;) { - TRACE(("%d|", text_pos)) + TRACE(("%d|", state->text_pos)) /* Should we abort the matching? */ ++state->iterations; @@ -9028,15 +11017,22 @@ advance: return RE_ERROR_INTERRUPTED; switch (node->op) { - case RE_OP_ANY: /* Any character, except a newline. */ + case RE_OP_ANY: /* Any character except a newline. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos < slice_end && char_at(text, text_pos) != '\n') { - ++text_pos; + status = try_match_ANY(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + ++state->text_pos; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -9045,71 +11041,103 @@ advance: case RE_OP_ANY_ALL: /* Any character at all. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos < slice_end) { - ++text_pos; + status = try_match_ANY_ALL(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + ++state->text_pos; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return status; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_ANY_ALL_REV: /* Any character at all backwards. */ + case RE_OP_ANY_ALL_REV: /* Any character at all, backwards. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos > slice_start) { - --text_pos; + status = try_match_ANY_ALL_REV(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + --state->text_pos; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return status; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_ANY_REV: /* Any character backwards, except a newline. */ + case RE_OP_ANY_REV: /* Any character except a newline, backwards. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos > slice_start && char_at(text, text_pos - 1) != '\n') - { - --text_pos; + status = try_match_ANY_REV(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + --state->text_pos; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return status; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_ANY_U: /* Any character, except a line separator. */ + case RE_OP_ANY_U: /* Any character except a line separator. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos < slice_end && - !state->encoding->is_line_sep(char_at(text, text_pos))) { - ++text_pos; + status = try_match_ANY_U(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + ++state->text_pos; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return status; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_ANY_U_REV: /* Any character backwards, except a line separator. */ + case RE_OP_ANY_U_REV: /* Any character except a line separator, backwards. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos > slice_start && - !state->encoding->is_line_sep(char_at(text, text_pos - 1))) { - --text_pos; + status = try_match_ANY_U_REV(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + --state->text_pos; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -9132,7 +11160,6 @@ advance: save_info(state, &info); - state->text_pos = text_pos; state->must_advance = FALSE; status = basic_match(safe_state, node->nonstring.next_2.node, @@ -9148,17 +11175,23 @@ advance: goto backtrack; node = node->next_1.node; - text_pos = state->text_pos; break; } - case RE_OP_BOUNDARY: /* At a word boundary. */ + case RE_OP_BOUNDARY: /* On a word boundary. */ TRACE(("%s %d\n", re_op_text[node->op], node->match)) - if (encoding->at_boundary(state, text_pos) == node->match) + status = try_match_BOUNDARY(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -9169,15 +11202,20 @@ advance: RE_Position next_position; TRACE(("%s\n", re_op_text[node->op])) - if (try_match(state, &node->next_1, text_pos, &next_position)) { + status = try_match(state, &node->next_1, state->text_pos, + &next_position); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { if (!add_backtrack(safe_state, RE_OP_BRANCH)) return RE_ERROR_BACKTRACKING; state->backtrack->branch.position.node = node->nonstring.next_2.node; - state->backtrack->branch.position.text_pos = text_pos; + state->backtrack->branch.position.text_pos = state->text_pos; node = next_position.node; - text_pos = next_position.text_pos; + state->text_pos = next_position.text_pos; } else node = node->nonstring.next_2.node; break; @@ -9195,105 +11233,157 @@ advance: node = node->next_1.node; break; } - case RE_OP_CHARACTER: /* A character literal. */ + case RE_OP_CHARACTER: /* A character. */ TRACE(("%s %d %d\n", re_op_text[node->op], node->match, node->values[0])) - if (text_pos < slice_end && (char_at(text, text_pos) == - node->values[0]) == node->match) { - text_pos += node->step; + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + matches_CHARACTER(encoding, locale_info, node, + char_at(state->text, state->text_pos)) == node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_CHARACTER_IGN: /* A character literal, ignoring case. */ + case RE_OP_CHARACTER_IGN: /* A character, ignoring case. */ TRACE(("%s %d %d\n", re_op_text[node->op], node->match, node->values[0])) - if (text_pos < slice_end && same_char_ign(encoding, char_at(text, - text_pos), node->values[0]) == node->match) { - text_pos += node->step; + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + matches_CHARACTER_IGN(encoding, locale_info, node, + char_at(state->text, state->text_pos)) == node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_CHARACTER_IGN_REV: /* A character literal backwards, ignoring case. */ + case RE_OP_CHARACTER_IGN_REV: /* A character, backwards, ignoring case. */ TRACE(("%s %d %d\n", re_op_text[node->op], node->match, node->values[0])) - if (text_pos > slice_start && same_char_ign(encoding, char_at(text, - text_pos - 1), node->values[0]) == node->match) { - text_pos += node->step; + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_CHARACTER_IGN(encoding, locale_info, node, + char_at(state->text, state->text_pos - 1)) == node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_CHARACTER_REV: /* A character literal backwards. */ + case RE_OP_CHARACTER_REV: /* A character, backwards. */ TRACE(("%s %d %d\n", re_op_text[node->op], node->match, node->values[0])) - if (text_pos > slice_start && (char_at(text, text_pos - 1) == - node->values[0]) == node->match) { - text_pos += node->step; + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_CHARACTER(encoding, locale_info, node, + char_at(state->text, state->text_pos - 1)) == node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_DEFAULT_BOUNDARY: /* At a default word boundary. */ + case RE_OP_DEFAULT_BOUNDARY: /* On a default word boundary. */ TRACE(("%s %d\n", re_op_text[node->op], node->match)) - if (encoding->at_default_boundary(state, text_pos) == node->match) + status = try_match_DEFAULT_BOUNDARY(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ - TRACE(("%s %d\n", re_op_text[node->op], node->match)) + case RE_OP_DEFAULT_END_OF_WORD: /* At the default end of a word. */ + TRACE(("%s\n", re_op_text[node->op])) - if (encoding->at_default_word_end(state, text_pos) == node->match) + status = try_match_DEFAULT_END_OF_WORD(state, node, + state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ - TRACE(("%s %d\n", re_op_text[node->op], node->match)) + case RE_OP_DEFAULT_START_OF_WORD: /* At the default start of a word. */ + TRACE(("%s\n", re_op_text[node->op])) - if (encoding->at_default_word_start(state, text_pos) == - node->match) + status = try_match_DEFAULT_START_OF_WORD(state, node, + state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -9302,7 +11392,7 @@ advance: case RE_OP_END_FUZZY: /* End of fuzzy matching. */ TRACE(("%s\n", re_op_text[node->op])) - if (!fuzzy_insert(safe_state, text_pos, node)) + if (!fuzzy_insert(safe_state, state->text_pos, node)) return RE_ERROR_BACKTRACKING; /* If there were too few errors, in the fuzzy section, try again. @@ -9312,6 +11402,13 @@ advance: goto backtrack; } + state->total_fuzzy_counts[RE_FUZZY_SUB] += + state->fuzzy_info.counts[RE_FUZZY_SUB]; + state->total_fuzzy_counts[RE_FUZZY_INS] += + state->fuzzy_info.counts[RE_FUZZY_INS]; + state->total_fuzzy_counts[RE_FUZZY_DEL] += + state->fuzzy_info.counts[RE_FUZZY_DEL]; + node = node->next_1.node; break; case RE_OP_END_GREEDY_REPEAT: /* End of a greedy repeat. */ @@ -9320,8 +11417,10 @@ advance: RE_RepeatData* rp_data; BOOL changed; BOOL try_body; + int body_status; RE_Position next_body_position; BOOL try_tail; + int tail_status; RE_Position next_tail_position; RE_BacktrackData* bt_data; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) @@ -9340,7 +11439,7 @@ advance: /* Have we advanced through the text or has a capture group change? */ changed = rp_data->capture_change != state->capture_change || - text_pos != rp_data->start; + state->text_pos != rp_data->start; /* The counts are of type size_t, so the format needs to specify * that. @@ -9352,18 +11451,37 @@ advance: /* Could the body or tail match? */ try_body = changed && (rp_data->count < node->values[2] || ~node->values[2] == 0) && !is_repeat_guarded(safe_state, index, - text_pos, RE_STATUS_BODY) && try_match(state, &node->next_1, - text_pos, &next_body_position); + state->text_pos, RE_STATUS_BODY); + if (try_body) { + body_status = try_match(state, &node->next_1, state->text_pos, + &next_body_position); + + if (body_status == RE_ERROR_FAILURE) + try_body = FALSE; + } else + body_status = RE_ERROR_FAILURE; + try_tail = (!changed || rp_data->count >= node->values[1]) && - !is_repeat_guarded(safe_state, index, text_pos, RE_STATUS_TAIL) - && try_match(state, &node->nonstring.next_2, text_pos, - &next_tail_position); + !is_repeat_guarded(safe_state, index, state->text_pos, + RE_STATUS_TAIL); + if(try_tail) { + tail_status = try_match(state, &node->nonstring.next_2, + state->text_pos, &next_tail_position); + + if (tail_status == RE_ERROR_FAILURE) + try_tail = FALSE; + } else + tail_status = RE_ERROR_FAILURE; + if (!try_body && !try_tail) { /* Neither the body nor the tail could match. */ --rp_data->count; goto backtrack; } + if (body_status < 0 || (body_status == 0 && tail_status < 0)) + return RE_ERROR_PARTIAL; + /* Record info in case we backtrack into the body. */ if (!add_backtrack(safe_state, RE_OP_BODY_END)) return RE_ERROR_BACKTRACKING; @@ -9374,10 +11492,11 @@ advance: bt_data->repeat.capture_change = rp_data->capture_change; if (try_body) { + /* Both the body and the tail could match. */ if (try_tail) { - /* Both the body and the tail could match, but the body - * takes precedence. If the body fails to match then we - * want to try the tail before backtracking further. + /* The body takes precedence. If the body fails to match + * then we want to try the tail before backtracking + * further. */ /* Record backtracking info for matching the tail. */ @@ -9389,7 +11508,7 @@ advance: bt_data->repeat.count = rp_data->count; bt_data->repeat.start = rp_data->start; bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = text_pos; + bt_data->repeat.text_pos = state->text_pos; } /* Record backtracking info in case the body fails to match. */ @@ -9397,20 +11516,20 @@ advance: return RE_ERROR_BACKTRACKING; bt_data = state->backtrack; bt_data->repeat.index = index; - bt_data->repeat.text_pos = text_pos; + bt_data->repeat.text_pos = state->text_pos; rp_data->capture_change = state->capture_change; - rp_data->start = text_pos; + rp_data->start = state->text_pos; /* Advance into the body. */ node = next_body_position.node; - text_pos = next_body_position.text_pos; + state->text_pos = next_body_position.text_pos; } else { /* Only the tail could match. */ /* Advance into the tail. */ node = next_tail_position.node; - text_pos = next_tail_position.text_pos; + state->text_pos = next_tail_position.text_pos; } break; } @@ -9439,13 +11558,13 @@ advance: bt_data->group.current_capture = group->current_capture; if (pattern->group_info[private_index - 1].referenced && - group->span.end != text_pos) + group->span.end != state->text_pos) ++state->capture_change; - group->span.end = text_pos; + group->span.end = state->text_pos; /* Save the capture? */ if (node->values[2]) { - group->current_capture = group->capture_count; + group->current_capture = (Py_ssize_t)group->capture_count; if (!save_capture(safe_state, private_index, public_index)) return RE_ERROR_MEMORY; } @@ -9459,8 +11578,10 @@ advance: RE_RepeatData* rp_data; BOOL changed; BOOL try_body; + int body_status; RE_Position next_body_position; BOOL try_tail; + int tail_status; RE_Position next_tail_position; RE_BacktrackData* bt_data; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) @@ -9479,7 +11600,7 @@ advance: /* Have we advanced through the text or has a capture group change? */ changed = rp_data->capture_change != state->capture_change || - text_pos != rp_data->start; + state->text_pos != rp_data->start; /* The counts are of type size_t, so the format needs to specify * that. @@ -9491,17 +11612,35 @@ advance: /* Could the body or tail match? */ try_body = changed && (rp_data->count < node->values[2] || ~node->values[2] == 0) && !is_repeat_guarded(safe_state, index, - text_pos, RE_STATUS_BODY) && try_match(state, &node->next_1, - text_pos, &next_body_position); - try_tail = (!changed || rp_data->count >= node->values[1]) && - try_match(state, &node->nonstring.next_2, text_pos, - &next_tail_position); + state->text_pos, RE_STATUS_BODY); + if (try_body) { + body_status = try_match(state, &node->next_1, state->text_pos, + &next_body_position); + + if (body_status == RE_ERROR_FAILURE) + try_body = FALSE; + } else + body_status = RE_ERROR_FAILURE; + + try_tail = (!changed || rp_data->count >= node->values[1]); + if (try_tail) { + tail_status = try_match(state, &node->nonstring.next_2, + state->text_pos, &next_tail_position); + + if (tail_status == RE_ERROR_FAILURE) + try_tail = FALSE; + } else + tail_status = RE_ERROR_FAILURE; + if (!try_body && !try_tail) { /* Neither the body nor the tail could match. */ --rp_data->count; goto backtrack; } + if (body_status < 0 || (body_status == 0 && tail_status < 0)) + return RE_ERROR_PARTIAL; + /* Record info in case we backtrack into the body. */ if (!add_backtrack(safe_state, RE_OP_BODY_END)) return RE_ERROR_BACKTRACKING; @@ -9512,10 +11651,11 @@ advance: bt_data->repeat.capture_change = rp_data->capture_change; if (try_body) { + /* Both the body and the tail could match. */ if (try_tail) { - /* Both the body and the tail could match, but the tail - * takes precedence. If the tail fails to match then we - * want to try the body before backtracking further. + /* The tail takes precedence. If the tail fails to match + * then we want to try the body before backtracking + * further. */ /* Record backtracking info for matching the body. */ @@ -9527,11 +11667,11 @@ advance: bt_data->repeat.count = rp_data->count; bt_data->repeat.start = rp_data->start; bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = text_pos; + bt_data->repeat.text_pos = state->text_pos; /* Advance into the tail. */ node = next_tail_position.node; - text_pos = next_tail_position.text_pos; + state->text_pos = next_tail_position.text_pos; } else { /* Only the body could match. */ @@ -9542,32 +11682,39 @@ advance: return RE_ERROR_BACKTRACKING; bt_data = state->backtrack; bt_data->repeat.index = index; - bt_data->repeat.text_pos = text_pos; + bt_data->repeat.text_pos = state->text_pos; rp_data->capture_change = state->capture_change; - rp_data->start = text_pos; + rp_data->start = state->text_pos; /* Advance into the body. */ node = next_body_position.node; - text_pos = next_body_position.text_pos; + state->text_pos = next_body_position.text_pos; } } else { /* Only the tail could match. */ /* Advance into the tail. */ node = next_tail_position.node; - text_pos = next_tail_position.text_pos; + state->text_pos = next_tail_position.text_pos; } break; } case RE_OP_END_OF_LINE: /* At the end of a line. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos == text_length || char_at(text, text_pos) == '\n') + status = try_match_END_OF_LINE(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -9576,11 +11723,18 @@ advance: case RE_OP_END_OF_LINE_U: /* At the end of a line. */ TRACE(("%s\n", re_op_text[node->op])) - if (encoding->at_line_end(state, text_pos)) + status = try_match_END_OF_LINE_U(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -9589,11 +11743,18 @@ advance: case RE_OP_END_OF_STRING: /* At the end of the string. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos == text_length) + status = try_match_END_OF_STRING(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -9602,11 +11763,19 @@ advance: case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos == text_length || text_pos == state->final_newline) + status = try_match_END_OF_STRING_LINE(state, node, + state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -9615,24 +11784,39 @@ advance: case RE_OP_END_OF_STRING_LINE_U: /* At end of string or final newline. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos == text_length || text_pos == state->final_line_sep) + status = try_match_END_OF_STRING_LINE_U(state, node, + state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_END_OF_WORD: /* At end of a word. */ - TRACE(("%s %d\n", re_op_text[node->op], node->match)) + case RE_OP_END_OF_WORD: /* At the end of a word. */ + TRACE(("%s\n", re_op_text[node->op])) - if (encoding->at_word_end(state, text_pos) == node->match) + status = try_match_END_OF_WORD(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -9653,7 +11837,7 @@ advance: memmove(&bt_data->fuzzy.fuzzy_info, fuzzy_info, sizeof(RE_FuzzyInfo)); bt_data->fuzzy.index = node->values[0]; - bt_data->fuzzy.text_pos = text_pos; + bt_data->fuzzy.text_pos = state->text_pos; /* Initialise the new fuzzy info. */ memset(fuzzy_info->counts, 0, 4 * sizeof(fuzzy_info->counts[0])); @@ -9663,14 +11847,21 @@ advance: node = node->next_1.node; break; } - case RE_OP_GRAPHEME_BOUNDARY: /* At a grapheme boundary. */ - TRACE(("%s %d\n", re_op_text[node->op], node->match)) + case RE_OP_GRAPHEME_BOUNDARY: /* On a grapheme boundary. */ + TRACE(("%s\n", re_op_text[node->op])) - if (encoding->at_grapheme_boundary(state, text_pos) == node->match) + status = try_match_GRAPHEME_BOUNDARY(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -9682,8 +11873,10 @@ advance: RE_RepeatData* rp_data; RE_BacktrackData* bt_data; BOOL try_body; + int body_status; RE_Position next_body_position; BOOL try_tail; + int tail_status; RE_Position next_tail_position; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) @@ -9701,23 +11894,41 @@ advance: bt_data->repeat.count = rp_data->count; bt_data->repeat.start = rp_data->start; bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = text_pos; + bt_data->repeat.text_pos = state->text_pos; /* Initialise the new repeat. */ rp_data->count = 0; - rp_data->start = text_pos; + rp_data->start = state->text_pos; rp_data->capture_change = state->capture_change; /* Could the body or tail match? */ try_body = node->values[2] > 0 && !is_repeat_guarded(safe_state, - index, text_pos, RE_STATUS_BODY) && try_match(state, - &node->next_1, text_pos, &next_body_position); - try_tail = node->values[1] == 0 && try_match(state, - &node->nonstring.next_2, text_pos, &next_tail_position); + index, state->text_pos, RE_STATUS_BODY); + if (try_body) { + body_status = try_match(state, &node->next_1, state->text_pos, + &next_body_position); + + if (body_status == RE_ERROR_FAILURE) + try_body = FALSE; + } else + body_status = RE_ERROR_FAILURE; + + try_tail = node->values[1] == 0; + if (try_tail) { + tail_status = try_match(state, &node->nonstring.next_2, + state->text_pos, &next_tail_position); + + if (tail_status == RE_ERROR_FAILURE) + try_tail = FALSE; + } else + tail_status = RE_ERROR_FAILURE; if (!try_body && !try_tail) /* Neither the body nor the tail could match. */ goto backtrack; + if (body_status < 0 || (body_status == 0 && tail_status < 0)) + return RE_ERROR_PARTIAL; + if (try_body) { if (try_tail) { /* Both the body and the tail could match, but the body @@ -9734,18 +11945,18 @@ advance: bt_data->repeat.count = rp_data->count; bt_data->repeat.start = rp_data->start; bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = text_pos; + bt_data->repeat.text_pos = state->text_pos; } /* Advance into the body. */ node = next_body_position.node; - text_pos = next_body_position.text_pos; + state->text_pos = next_body_position.text_pos; } else { /* Only the tail could match. */ /* Advance into the tail. */ node = next_tail_position.node; - text_pos = next_tail_position.text_pos; + state->text_pos = next_tail_position.text_pos; } break; } @@ -9754,6 +11965,7 @@ advance: RE_CODE index; RE_RepeatData* rp_data; size_t count; + BOOL is_partial; BOOL match; RE_BacktrackData* bt_data; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) @@ -9762,13 +11974,18 @@ advance: index = node->values[0]; rp_data = &state->repeats[index]; - if (is_repeat_guarded(safe_state, index, text_pos, RE_STATUS_BODY)) + if (is_repeat_guarded(safe_state, index, state->text_pos, + RE_STATUS_BODY)) goto backtrack; /* Count how many times the character repeats, up to the maximum. */ - count = count_one(state, node->nonstring.next_2.node, text_pos, - (size_t)node->values[2]); + count = count_one(state, node->nonstring.next_2.node, + state->text_pos, node->values[2], &is_partial); + if (is_partial) { + state->text_pos += (Py_ssize_t)count * node->step; + return RE_ERROR_PARTIAL; + } /* Unmatch until it's not guarded. */ match = FALSE; @@ -9777,7 +11994,7 @@ advance: /* The number of repeats is below the minimum. */ break; - if (!is_repeat_guarded(safe_state, index, text_pos + + if (!is_repeat_guarded(safe_state, index, state->text_pos + (Py_ssize_t)count * node->step, RE_STATUS_TAIL)) { /* It's not guarded at this position. */ match = TRUE; @@ -9792,8 +12009,8 @@ advance: if (!match) { /* The repeat has failed to match at this position. */ - if (!guard_repeat(safe_state, index, text_pos, RE_STATUS_BODY, - TRUE)) + if (!guard_repeat(safe_state, index, state->text_pos, + RE_STATUS_BODY, TRUE)) return RE_ERROR_MEMORY; goto backtrack; } @@ -9807,17 +12024,17 @@ advance: bt_data->repeat.text_pos = rp_data->start; bt_data->repeat.count = rp_data->count; - rp_data->start = text_pos; + rp_data->start = state->text_pos; rp_data->count = count; /* Advance into the tail. */ - text_pos += (Py_ssize_t)count * node->step; + state->text_pos += (Py_ssize_t)count * node->step; node = node->next_1.node; break; } case RE_OP_GROUP_CALL: /* Group call. */ { - RE_CODE index; + size_t index; size_t g; size_t r; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) @@ -9916,8 +12133,10 @@ advance: RE_RepeatData* rp_data; RE_BacktrackData* bt_data; BOOL try_body; + int body_status; RE_Position next_body_position; BOOL try_tail; + int tail_status; RE_Position next_tail_position; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) @@ -9935,23 +12154,42 @@ advance: bt_data->repeat.count = rp_data->count; bt_data->repeat.start = rp_data->start; bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = text_pos; + bt_data->repeat.text_pos = state->text_pos; /* Initialise the new repeat. */ rp_data->count = 0; - rp_data->start = text_pos; + rp_data->start = state->text_pos; rp_data->capture_change = state->capture_change; /* Could the body or tail match? */ try_body = node->values[2] > 0 && !is_repeat_guarded(safe_state, - index, text_pos, RE_STATUS_BODY) && try_match(state, - &node->next_1, text_pos, &next_body_position); - try_tail = node->values[1] == 0 && try_match(state, - &node->nonstring.next_2, text_pos, &next_tail_position); + index, state->text_pos, RE_STATUS_BODY); + if (try_body) { + body_status = try_match(state, &node->next_1, state->text_pos, + &next_body_position); + + if (body_status == RE_ERROR_FAILURE) + try_body = FALSE; + } else + body_status = RE_ERROR_FAILURE; + + try_tail = node->values[1] == 0; + if(try_tail) { + tail_status = try_match(state, &node->nonstring.next_2, + state->text_pos, &next_tail_position); + + if (tail_status == RE_ERROR_FAILURE) + try_tail = FALSE; + } else + tail_status = RE_ERROR_FAILURE; + if (!try_body && !try_tail) /* Neither the body nor the tail could match. */ goto backtrack; + if (body_status < 0 || (body_status == 0 && tail_status < 0)) + return RE_ERROR_PARTIAL; + if (try_body) { if (try_tail) { /* Both the body and the tail could match, but the tail @@ -9968,22 +12206,22 @@ advance: bt_data->repeat.count = rp_data->count; bt_data->repeat.start = rp_data->start; bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = text_pos; + bt_data->repeat.text_pos = state->text_pos; /* Advance into the tail. */ node = next_tail_position.node; - text_pos = next_tail_position.text_pos; + state->text_pos = next_tail_position.text_pos; } else { /* Advance into the body. */ node = next_body_position.node; - text_pos = next_body_position.text_pos; + state->text_pos = next_body_position.text_pos; } } else { /* Only the tail could match. */ /* Advance into the tail. */ node = next_tail_position.node; - text_pos = next_tail_position.text_pos; + state->text_pos = next_tail_position.text_pos; } break; } @@ -9992,25 +12230,31 @@ advance: RE_CODE index; RE_RepeatData* rp_data; size_t count; + BOOL is_partial; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) /* Repeat indexes are 0-based. */ index = node->values[0]; rp_data = &state->repeats[index]; - if (is_repeat_guarded(safe_state, index, text_pos, RE_STATUS_BODY)) + if (is_repeat_guarded(safe_state, index, state->text_pos, + RE_STATUS_BODY)) goto backtrack; /* Count how many times the character repeats, up to the minimum. */ - count = count_one(state, node->nonstring.next_2.node, text_pos, - (size_t)node->values[1]); + count = count_one(state, node->nonstring.next_2.node, + state->text_pos, node->values[1], &is_partial); + if (is_partial) { + state->text_pos += (Py_ssize_t)count * node->step; + return RE_ERROR_PARTIAL; + } /* Have we matched at least the minimum? */ if (count < node->values[1]) { /* The repeat has failed to match at this position. */ - if (!guard_repeat(safe_state, index, text_pos, RE_STATUS_BODY, - TRUE)) + if (!guard_repeat(safe_state, index, state->text_pos, + RE_STATUS_BODY, TRUE)) return RE_ERROR_MEMORY; goto backtrack; } @@ -10031,12 +12275,12 @@ advance: bt_data->repeat.text_pos = rp_data->start; bt_data->repeat.count = rp_data->count; - rp_data->start = text_pos; + rp_data->start = state->text_pos; rp_data->count = count; } /* Advance into the tail. */ - text_pos += (Py_ssize_t)count * node->step; + state->text_pos += (Py_ssize_t)count * node->step; node = node->next_1.node; break; } @@ -10044,6 +12288,9 @@ advance: { RE_Info info; size_t capture_change; + Py_ssize_t saved_slice_start; + Py_ssize_t saved_slice_end; + Py_ssize_t saved_text_pos; BOOL too_few_errors; int status; TRACE(("%s %d\n", re_op_text[node->op], node->match)) @@ -10057,9 +12304,11 @@ advance: /* Save the other info. */ save_info(state, &info); + saved_slice_start = state->slice_start; + saved_slice_end = state->slice_end; + saved_text_pos = state->text_pos; state->slice_start = 0; - state->slice_end = text_length; - state->text_pos = text_pos; + state->slice_end = state->text_length; state->must_advance = FALSE; too_few_errors = state->too_few_errors; @@ -10071,8 +12320,9 @@ advance: reset_guards(state, node->values + 1); - state->slice_end = slice_end; - state->slice_start = slice_start; + state->text_pos = saved_text_pos; + state->slice_end = saved_slice_end; + state->slice_start = saved_slice_start; /* Restore the other info. */ restore_info(state, &info); @@ -10119,137 +12369,188 @@ advance: break; } case RE_OP_PROPERTY: /* A property. */ - /* values are: property */ TRACE(("%s %d %d\n", re_op_text[node->op], node->match, node->values[0])) - if (text_pos < slice_end && has_property(node->values[0], - char_at(text, text_pos)) == node->match) { - text_pos += node->step; + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + matches_PROPERTY(encoding, locale_info, node, + char_at(state->text, state->text_pos)) == node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else goto backtrack; break; case RE_OP_PROPERTY_IGN: /* A property, ignoring case. */ - /* values are: property */ TRACE(("%s %d %d\n", re_op_text[node->op], node->match, node->values[0])) - if (text_pos < slice_end && has_property_ign(encoding, - node->values[0], char_at(text, text_pos)) == node->match) { - text_pos += node->step; + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + matches_PROPERTY_IGN(encoding, locale_info, node, + char_at(state->text, state->text_pos)) == node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_PROPERTY_IGN_REV: /* A property backwards, ignoring case. */ - /* values are: property */ + case RE_OP_PROPERTY_IGN_REV: /* A property, backwards, ignoring case. */ TRACE(("%s %d %d\n", re_op_text[node->op], node->match, node->values[0])) - if (text_pos > slice_start && has_property_ign(encoding, - node->values[0], char_at(text, text_pos - 1)) == node->match) { - text_pos += node->step; + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_PROPERTY_IGN(encoding, locale_info, node, + char_at(state->text, state->text_pos - 1)) == node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_PROPERTY_REV: /* A property backwards. */ - /* values are: property */ + case RE_OP_PROPERTY_REV: /* A property, backwards. */ TRACE(("%s %d %d\n", re_op_text[node->op], node->match, node->values[0])) - if (text_pos > slice_start && has_property(node->values[0], - char_at(text, text_pos - 1)) == node->match) { - text_pos += node->step; + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_PROPERTY(encoding, locale_info, node, + char_at(state->text, state->text_pos - 1)) == node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else goto backtrack; break; case RE_OP_RANGE: /* A range. */ - /* values are: lower, upper */ TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, node->values[0], node->values[1])) - if (text_pos < slice_end && in_range(node->values[0], - node->values[1], char_at(text, text_pos)) == node->match) { - text_pos += node->step; + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && matches_RANGE(encoding, + locale_info, node, char_at(state->text, state->text_pos)) == + node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else goto backtrack; break; case RE_OP_RANGE_IGN: /* A range, ignoring case. */ - /* values are: lower, upper */ TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, node->values[0], node->values[1])) - if (text_pos < slice_end && in_range_ign(encoding, node->values[0], - node->values[1], char_at(text, text_pos)) == node->match) { - text_pos += node->step; + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + matches_RANGE_IGN(encoding, locale_info, node, + char_at(state->text, state->text_pos)) == node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_RANGE_IGN_REV: /* A range backwards, ignoring case. */ - /* values are: lower, upper */ + case RE_OP_RANGE_IGN_REV: /* A range, backwards, ignoring case. */ TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, node->values[0], node->values[1])) - if (text_pos > slice_start && in_range_ign(encoding, - node->values[0], node->values[1], char_at(text, text_pos - 1)) == + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_RANGE_IGN(encoding, locale_info, node, + char_at(state->text, state->text_pos - 1)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_RANGE_REV: /* A range, backwards. */ + TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, + node->values[0], node->values[1])) + + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && matches_RANGE(encoding, + locale_info, node, char_at(state->text, state->text_pos - 1)) == node->match) { - text_pos += node->step; + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_RANGE_REV: /* A range backwards. */ - /* values are: lower, upper */ - TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, - node->values[0], node->values[1])) + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; - if (text_pos > slice_start && in_range(node->values[0], - node->values[1], char_at(text, text_pos - 1)) == node->match) { - text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; if (!node) goto backtrack; } else @@ -10259,7 +12560,6 @@ advance: { RE_GroupData* group; RE_GroupSpan* span; - Py_ssize_t available; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) /* Capture group indexes are 1-based (excluding group 0, which is @@ -10276,27 +12576,28 @@ advance: span = &group->captures[group->current_capture]; - /* Are there enough characters? */ - available = state->slice_end - text_pos; - if (!(node->status & RE_STATUS_FUZZY) && span->end - span->start > - available) - goto backtrack; - if (string_pos < 0) string_pos = span->start; /* Try comparing. */ while (string_pos < span->end) { - if (text_pos < slice_end && char_at(text, text_pos) == - char_at(text, string_pos)) { + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + same_char(char_at(state->text, state->text_pos), + char_at(state->text, string_pos))) { ++string_pos; - ++text_pos; + ++state->text_pos; } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string(safe_state, search, &text_pos, - node, &string_pos, &matched, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; @@ -10317,7 +12618,8 @@ advance: { RE_GroupData* group; RE_GroupSpan* span; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, + Py_UCS4* folded); int folded_len; int gfolded_len; Py_UCS4 folded[RE_MAX_FOLDED]; @@ -10347,27 +12649,33 @@ advance: gfolded_pos = 0; gfolded_len = 0; } else { - folded_len = full_case_fold(char_at(text, text_pos), folded); - gfolded_len = full_case_fold(char_at(text, string_pos), - gfolded); + folded_len = full_case_fold(locale_info, char_at(state->text, + state->text_pos), folded); + gfolded_len = full_case_fold(locale_info, char_at(state->text, + string_pos), gfolded); } /* Try comparing. */ while (string_pos < span->end) { /* Case-fold at current position in text. */ if (folded_pos >= folded_len) { - if (text_pos < slice_end) - folded_len = full_case_fold(char_at(text, text_pos), - folded); + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end) + folded_len = full_case_fold(locale_info, + char_at(state->text, state->text_pos), folded); else folded_len = 0; + folded_pos = 0; } /* Case-fold at current position in group. */ if (gfolded_pos >= gfolded_len) { - gfolded_len = full_case_fold(char_at(text, string_pos), - gfolded); + gfolded_len = full_case_fold(locale_info, + char_at(state->text, string_pos), gfolded); gfolded_pos = 0; } @@ -10378,10 +12686,12 @@ advance: } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string_fld2(safe_state, search, &text_pos, - node, &folded_pos, folded_len, &string_pos, &gfolded_pos, - gfolded_len, &matched, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_group_fld(safe_state, search, + &state->text_pos, node, &folded_pos, folded_len, + &string_pos, &gfolded_pos, gfolded_len, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; @@ -10392,7 +12702,7 @@ advance: } if (folded_pos >= folded_len && folded_len > 0) - ++text_pos; + ++state->text_pos; if (gfolded_pos >= gfolded_len) ++string_pos; @@ -10411,7 +12721,8 @@ advance: { RE_GroupData* group; RE_GroupSpan* span; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, + Py_UCS4* folded); int folded_len; int gfolded_len; Py_UCS4 folded[RE_MAX_FOLDED]; @@ -10441,28 +12752,33 @@ advance: gfolded_pos = 0; gfolded_len = 0; } else { - folded_len = full_case_fold(char_at(text, text_pos - 1), - folded); - gfolded_len = full_case_fold(char_at(text, string_pos - 1), - gfolded); + folded_len = full_case_fold(locale_info, char_at(state->text, + state->text_pos - 1), folded); + gfolded_len = full_case_fold(locale_info, char_at(state->text, + string_pos - 1), gfolded); } /* Try comparing. */ while (string_pos > span->start) { /* Case-fold at current position in text. */ if (folded_pos <= 0) { - if (text_pos > slice_start) - folded_len = full_case_fold(char_at(text, text_pos - - 1), folded); + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start) + folded_len = full_case_fold(locale_info, + char_at(state->text, state->text_pos - 1), folded); else folded_len = 0; + folded_pos = folded_len; } /* Case-fold at current position in group. */ if (gfolded_pos <= 0) { - gfolded_len = full_case_fold(char_at(text, string_pos - 1), - gfolded); + gfolded_len = full_case_fold(locale_info, + char_at(state->text, string_pos - 1), gfolded); gfolded_pos = gfolded_len; } @@ -10473,10 +12789,12 @@ advance: } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string_fld2(safe_state, search, &text_pos, - node, &folded_pos, folded_len, &string_pos, &gfolded_pos, - gfolded_len, &matched, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_group_fld(safe_state, search, + &state->text_pos, node, &folded_pos, folded_len, + &string_pos, &gfolded_pos, gfolded_len, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; @@ -10487,7 +12805,7 @@ advance: } if (folded_pos <= 0 && folded_len > 0) - --text_pos; + --state->text_pos; if (gfolded_pos <= 0) --string_pos; @@ -10506,7 +12824,6 @@ advance: { RE_GroupData* group; RE_GroupSpan* span; - Py_ssize_t available; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) /* Capture group indexes are 1-based (excluding group 0, which is @@ -10523,27 +12840,28 @@ advance: span = &group->captures[group->current_capture]; - /* Are there enough characters? */ - available = state->slice_end - text_pos; - if (!(node->status & RE_STATUS_FUZZY) && span->end - span->start > - available) - goto backtrack; - if (string_pos < 0) string_pos = span->start; /* Try comparing. */ while (string_pos < span->end) { - if (text_pos < slice_end && same_char_ign(encoding, - char_at(text, text_pos), char_at(text, string_pos))) { + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + same_char_ign(encoding, locale_info, char_at(state->text, + state->text_pos), char_at(state->text, string_pos))) { ++string_pos; - ++text_pos; + ++state->text_pos; } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string(safe_state, search, &text_pos, - node, &string_pos, &matched, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; @@ -10564,7 +12882,6 @@ advance: { RE_GroupData* group; RE_GroupSpan* span; - Py_ssize_t available; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) /* Capture group indexes are 1-based (excluding group 0, which is @@ -10581,28 +12898,29 @@ advance: span = &group->captures[group->current_capture]; - /* Are there enough characters? */ - available = text_pos - state->slice_start; - if (!(node->status & RE_STATUS_FUZZY) && span->end - span->start > - available) - goto backtrack; - if (string_pos < 0) string_pos = span->end; /* Try comparing. */ while (string_pos > span->start) { - if (text_pos > slice_start && same_char_ign(encoding, - char_at(text, text_pos - 1), char_at(text, string_pos - 1))) + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + same_char_ign(encoding, locale_info, char_at(state->text, + state->text_pos - 1), char_at(state->text, string_pos - 1))) { --string_pos; - --text_pos; + --state->text_pos; } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string(safe_state, search, &text_pos, - node, &string_pos, &matched, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; @@ -10623,7 +12941,6 @@ advance: { RE_GroupData* group; RE_GroupSpan* span; - Py_ssize_t available; TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) /* Capture group indexes are 1-based (excluding group 0, which is @@ -10640,27 +12957,28 @@ advance: span = &group->captures[group->current_capture]; - /* Are there enough characters? */ - available = text_pos - state->slice_start; - if (!(node->status & RE_STATUS_FUZZY) && span->end - span->start > - available) - goto backtrack; - if (string_pos < 0) string_pos = span->end; /* Try comparing. */ while (string_pos > span->start) { - if (text_pos > slice_start && char_at(text, text_pos - 1) == - char_at(text, string_pos - 1)) { + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + same_char(char_at(state->text, state->text_pos - 1), + char_at(state->text, string_pos - 1))) { --string_pos; - --text_pos; + --state->text_pos; } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string(safe_state, search, &text_pos, - node, &string_pos, &matched, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; @@ -10680,11 +12998,14 @@ advance: case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - if (text_pos == state->search_anchor) + if (state->text_pos == state->search_anchor) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -10696,13 +13017,21 @@ advance: case RE_OP_SET_UNION: TRACE(("%s %d\n", re_op_text[node->op], node->match)) - if (text_pos < slice_end && in_set(encoding, node, char_at(text, - text_pos)) == node->match) { - text_pos += node->step; + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && matches_SET(encoding, + locale_info, node, char_at(state->text, state->text_pos)) == + node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else @@ -10714,13 +13043,21 @@ advance: case RE_OP_SET_UNION_IGN: TRACE(("%s %d\n", re_op_text[node->op], node->match)) - if (text_pos < slice_end && in_set_ign(encoding, node, - char_at(text, text_pos)) == node->match) { - text_pos += node->step; + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && matches_SET_IGN(encoding, + locale_info, node, char_at(state->text, state->text_pos)) == + node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else @@ -10732,13 +13069,20 @@ advance: case RE_OP_SET_UNION_IGN_REV: TRACE(("%s %d\n", re_op_text[node->op], node->match)) - if (text_pos > slice_start && in_set_ign(encoding, node, - char_at(text, text_pos - 1)) == node->match) { - text_pos += node->step; + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_SET_IGN(encoding, locale_info, node, char_at(state->text, + state->text_pos - 1)) == node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else @@ -10750,13 +13094,20 @@ advance: case RE_OP_SET_UNION_REV: TRACE(("%s %d\n", re_op_text[node->op], node->match)) - if (text_pos > slice_start && in_set(encoding, node, char_at(text, - text_pos - 1)) == node->match) { - text_pos += node->step; + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && matches_SET(encoding, + locale_info, node, char_at(state->text, state->text_pos - 1)) == + node->match) { + state->text_pos += node->step; node = node->next_1.node; } else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_one(safe_state, search, &text_pos, &node, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!node) goto backtrack; } else @@ -10787,13 +13138,13 @@ advance: bt_data->group.current_capture = group->current_capture; if (pattern->group_info[private_index - 1].referenced && - group->span.start != text_pos) + group->span.start != state->text_pos) ++state->capture_change; - group->span.start = text_pos; + group->span.start = state->text_pos; /* Save the capture? */ if (node->values[2]) { - group->current_capture = group->capture_count; + group->current_capture = (Py_ssize_t)group->capture_count; if (!save_capture(safe_state, private_index, public_index)) return RE_ERROR_MEMORY; } @@ -10804,11 +13155,18 @@ advance: case RE_OP_START_OF_LINE: /* At the start of a line. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos == 0 || char_at(text, text_pos - 1) == '\n') + status = try_match_START_OF_LINE(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -10817,11 +13175,18 @@ advance: case RE_OP_START_OF_LINE_U: /* At the start of a line. */ TRACE(("%s\n", re_op_text[node->op])) - if (encoding->at_line_start(state, text_pos)) + status = try_match_START_OF_LINE_U(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else @@ -10830,45 +13195,54 @@ advance: case RE_OP_START_OF_STRING: /* At the start of the string. */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos == 0) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_START_OF_WORD: /* At start of a word. */ - TRACE(("%s %d\n", re_op_text[node->op], node->match)) + status = try_match_START_OF_STRING(state, node, state->text_pos); + if (status < 0) + return status; - if (encoding->at_word_start(state, text_pos) == node->match) + if (status == RE_ERROR_SUCCESS) node = node->next_1.node; else if (node->status & RE_STATUS_FUZZY) { - if (!fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + if (!node) goto backtrack; } else goto backtrack; break; - case RE_OP_STRING: /* A string literal. */ + case RE_OP_START_OF_WORD: /* At the start of a word. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_START_OF_WORD(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_STRING: /* A string. */ { Py_ssize_t length; - Py_ssize_t available; RE_CODE* values; TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - if ((node->status & RE_STATUS_REQUIRED) && text_pos == + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == state->req_pos && string_pos < 0) - text_pos = state->req_end; + state->text_pos = state->req_end; else { - /* Are there enough characters to match? */ - length = node->value_count; - available = slice_end - text_pos; - if (length > available && !(node->status & RE_STATUS_FUZZY)) - goto backtrack; + length = (Py_ssize_t)node->value_count; if (string_pos < 0) string_pos = 0; @@ -10877,16 +13251,23 @@ advance: /* Try comparing. */ while (string_pos < length) { - if (text_pos < slice_end && char_at(text, text_pos) == - values[string_pos]) { + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + same_char(char_at(state->text, state->text_pos), + values[string_pos])) { ++string_pos; - ++text_pos; + ++state->text_pos; } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string(safe_state, search, &text_pos, - node, &string_pos, &matched, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; @@ -10904,26 +13285,21 @@ advance: node = node->next_1.node; break; } - case RE_OP_STRING_FLD: /* A string literal, ignoring case. */ + case RE_OP_STRING_FLD: /* A string, ignoring case. */ { Py_ssize_t length; - Py_ssize_t available; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, + Py_UCS4* folded); RE_CODE* values; int folded_len; Py_UCS4 folded[RE_MAX_FOLDED]; TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - if ((node->status & RE_STATUS_REQUIRED) && text_pos == + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == state->req_pos && string_pos < 0) - text_pos = state->req_end; + state->text_pos = state->req_end; else { - /* Are there enough characters to match? */ - length = node->value_count; - available = slice_end - text_pos; - if ((Py_ssize_t)possible_unfolded_length(length) > available && - !(node->status & RE_STATUS_FUZZY)) - goto backtrack; + length = (Py_ssize_t)node->value_count; full_case_fold = encoding->full_case_fold; @@ -10932,13 +13308,13 @@ advance: folded_pos = 0; folded_len = 0; } else { - folded_len = full_case_fold(char_at(text, text_pos), - folded); + folded_len = full_case_fold(locale_info, + char_at(state->text, state->text_pos), folded); if (folded_pos >= folded_len) { - if (text_pos >= slice_end) + if (state->text_pos >= state->slice_end) goto backtrack; - ++text_pos; + ++state->text_pos; folded_pos = 0; folded_len = 0; } @@ -10949,32 +13325,38 @@ advance: /* Try comparing. */ while (string_pos < length) { if (folded_pos >= folded_len) { - folded_len = full_case_fold(char_at(text, text_pos), - folded); + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + folded_len = full_case_fold(locale_info, + char_at(state->text, state->text_pos), folded); folded_pos = 0; } - if (same_char_ign(encoding, folded[folded_pos], - values[string_pos])) { + if (same_char_ign(encoding, locale_info, + folded[folded_pos], values[string_pos])) { ++string_pos; ++folded_pos; if (folded_pos >= folded_len) - ++text_pos; + ++state->text_pos; } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string_fld(safe_state, search, - &text_pos, node, &string_pos, &folded_pos, - folded_len, &matched, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_string_fld(safe_state, search, + &state->text_pos, node, &string_pos, &folded_pos, + folded_len, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; } if (folded_pos >= folded_len) - ++text_pos; + ++state->text_pos; } else { string_pos = -1; goto backtrack; @@ -10986,7 +13368,7 @@ advance: BOOL matched; if (!fuzzy_match_string_fld(safe_state, search, - &text_pos, node, &string_pos, &folded_pos, + &state->text_pos, node, &string_pos, &folded_pos, folded_len, &matched, 1)) return RE_ERROR_BACKTRACKING; @@ -10996,7 +13378,7 @@ advance: } if (folded_pos >= folded_len) - ++text_pos; + ++state->text_pos; } } @@ -11010,26 +13392,21 @@ advance: node = node->next_1.node; break; } - case RE_OP_STRING_FLD_REV: /* A string literal, ignoring case. */ + case RE_OP_STRING_FLD_REV: /* A string, ignoring case. */ { Py_ssize_t length; - Py_ssize_t available; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, + Py_UCS4* folded); RE_CODE* values; int folded_len; Py_UCS4 folded[RE_MAX_FOLDED]; TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - if ((node->status & RE_STATUS_REQUIRED) && text_pos == + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == state->req_pos && string_pos < 0) - text_pos = state->req_end; + state->text_pos = state->req_end; else { - /* Are there enough characters to match? */ - length = node->value_count; - available = text_pos - slice_start; - if ((Py_ssize_t)possible_unfolded_length(length) > available && - !(node->status & RE_STATUS_FUZZY)) - goto backtrack; + length = (Py_ssize_t)node->value_count; full_case_fold = encoding->full_case_fold; @@ -11038,13 +13415,13 @@ advance: folded_pos = 0; folded_len = 0; } else { - folded_len = full_case_fold(char_at(text, text_pos - 1), - folded); + folded_len = full_case_fold(locale_info, + char_at(state->text, state->text_pos - 1), folded); if (folded_pos <= 0) { - if (text_pos <= slice_start) + if (state->text_pos <= state->slice_start) goto backtrack; - --text_pos; + --state->text_pos; folded_pos = 0; folded_len = 0; } @@ -11055,32 +13432,38 @@ advance: /* Try comparing. */ while (string_pos > 0) { if (folded_pos <= 0) { - folded_len = full_case_fold(char_at(text, text_pos - - 1), folded); + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + folded_len = full_case_fold(locale_info, + char_at(state->text, state->text_pos - 1), folded); folded_pos = folded_len; } - if (same_char_ign(encoding, folded[folded_pos - 1], - values[string_pos - 1])) { + if (same_char_ign(encoding, locale_info, folded[folded_pos + - 1], values[string_pos - 1])) { --string_pos; --folded_pos; if (folded_pos <= 0) - --text_pos; + --state->text_pos; } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string_fld(safe_state, search, - &text_pos, node, &string_pos, &folded_pos, - folded_len, &matched, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_string_fld(safe_state, search, + &state->text_pos, node, &string_pos, &folded_pos, + folded_len, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; } if (folded_pos <= 0) - --text_pos; + --state->text_pos; } else { string_pos = -1; goto backtrack; @@ -11092,7 +13475,7 @@ advance: BOOL matched; if (!fuzzy_match_string_fld(safe_state, search, - &text_pos, node, &string_pos, &folded_pos, + &state->text_pos, node, &string_pos, &folded_pos, folded_len, &matched, -1)) return RE_ERROR_BACKTRACKING; @@ -11102,7 +13485,7 @@ advance: } if (folded_pos <= 0) - --text_pos; + --state->text_pos; } } @@ -11116,22 +13499,17 @@ advance: node = node->next_1.node; break; } - case RE_OP_STRING_IGN: /* A string literal, ignoring case. */ + case RE_OP_STRING_IGN: /* A string, ignoring case. */ { Py_ssize_t length; - Py_ssize_t available; RE_CODE* values; TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - if ((node->status & RE_STATUS_REQUIRED) && text_pos == + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == state->req_pos && string_pos < 0) - text_pos = state->req_end; + state->text_pos = state->req_end; else { - /* Are there enough characters to match? */ - length = node->value_count; - available = slice_end - text_pos; - if (length > available && !(node->status & RE_STATUS_FUZZY)) - goto backtrack; + length = (Py_ssize_t)node->value_count; if (string_pos < 0) string_pos = 0; @@ -11140,16 +13518,23 @@ advance: /* Try comparing. */ while (string_pos < length) { - if (text_pos < slice_end && same_char_ign(encoding, - char_at(text, text_pos), values[string_pos])) { + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + same_char_ign(encoding, locale_info, char_at(state->text, + state->text_pos), values[string_pos])) { ++string_pos; - ++text_pos; + ++state->text_pos; } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string(safe_state, search, &text_pos, - node, &string_pos, &matched, 1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; @@ -11167,22 +13552,17 @@ advance: node = node->next_1.node; break; } - case RE_OP_STRING_IGN_REV: /* A string literal, ignoring case. */ + case RE_OP_STRING_IGN_REV: /* A string, ignoring case. */ { Py_ssize_t length; - Py_ssize_t available; RE_CODE* values; TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - if ((node->status & RE_STATUS_REQUIRED) && text_pos == + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == state->req_pos && string_pos < 0) - text_pos = state->req_end; + state->text_pos = state->req_end; else { - /* Are there enough characters to match? */ - length = node->value_count; - available = text_pos - slice_start; - if (length > available && !(node->status & RE_STATUS_FUZZY)) - goto backtrack; + length = (Py_ssize_t)node->value_count; if (string_pos < 0) string_pos = length; @@ -11191,16 +13571,23 @@ advance: /* Try comparing. */ while (string_pos > 0) { - if (text_pos > slice_start && same_char_ign(encoding, - char_at(text, text_pos - 1), values[string_pos - 1])) { + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + same_char_ign(encoding, locale_info, char_at(state->text, + state->text_pos - 1), values[string_pos - 1])) { --string_pos; - --text_pos; + --state->text_pos; } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string(safe_state, search, &text_pos, - node, &string_pos, &matched, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; @@ -11218,22 +13605,17 @@ advance: node = node->next_1.node; break; } - case RE_OP_STRING_REV: /* A string literal. */ + case RE_OP_STRING_REV: /* A string. */ { Py_ssize_t length; - Py_ssize_t available; RE_CODE* values; TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - if ((node->status & RE_STATUS_REQUIRED) && text_pos == + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == state->req_pos && string_pos < 0) - text_pos = state->req_end; + state->text_pos = state->req_end; else { - /* Are there enough characters to match? */ - length = node->value_count; - available = text_pos - slice_start; - if (length > available && !(node->status & RE_STATUS_FUZZY)) - goto backtrack; + length = (Py_ssize_t)node->value_count; if (string_pos < 0) string_pos = length; @@ -11242,16 +13624,23 @@ advance: /* Try comparing. */ while (string_pos > 0) { - if (text_pos > slice_start && char_at(text, text_pos - 1) - == values[string_pos - 1]) { + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + same_char(char_at(state->text, state->text_pos - 1), + values[string_pos - 1])) { --string_pos; - --text_pos; + --state->text_pos; } else if (node->status & RE_STATUS_FUZZY) { BOOL matched; - if (!fuzzy_match_string(safe_state, search, &text_pos, - node, &string_pos, &matched, -1)) - return RE_ERROR_BACKTRACKING; + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + if (!matched) { string_pos = -1; goto backtrack; @@ -11274,13 +13663,11 @@ advance: int status; TRACE(("%s\n", re_op_text[node->op])) - state->text_pos = text_pos; - status = string_set_match(safe_state, node); + status = string_set_match_fwdrev(safe_state, node, FALSE); if (status < 0) return status; if (status == 0) goto backtrack; - text_pos = state->text_pos; node = node->next_1.node; break; } @@ -11289,13 +13676,11 @@ advance: int status; TRACE(("%s\n", re_op_text[node->op])) - state->text_pos = text_pos; - status = string_set_match_fld(safe_state, node); + status = string_set_match_fld_fwdrev(safe_state, node, FALSE); if (status < 0) return status; if (status == 0) goto backtrack; - text_pos = state->text_pos; node = node->next_1.node; break; } @@ -11304,13 +13689,11 @@ advance: int status; TRACE(("%s\n", re_op_text[node->op])) - state->text_pos = text_pos; - status = string_set_match_fld_rev(safe_state, node); + status = string_set_match_fld_fwdrev(safe_state, node, TRUE); if (status < 0) return status; if (status == 0) goto backtrack; - text_pos = state->text_pos; node = node->next_1.node; break; } @@ -11319,13 +13702,11 @@ advance: int status; TRACE(("%s\n", re_op_text[node->op])) - state->text_pos = text_pos; - status = string_set_match_ign(safe_state, node); + status = string_set_match_ign_fwdrev(safe_state, node, FALSE); if (status < 0) return status; if (status == 0) goto backtrack; - text_pos = state->text_pos; node = node->next_1.node; break; } @@ -11334,13 +13715,11 @@ advance: int status; TRACE(("%s\n", re_op_text[node->op])) - state->text_pos = text_pos; - status = string_set_match_ign_rev(safe_state, node); + status = string_set_match_ign_fwdrev(safe_state, node, TRUE); if (status < 0) return status; if (status == 0) goto backtrack; - text_pos = state->text_pos; node = node->next_1.node; break; } @@ -11349,13 +13728,11 @@ advance: int status; TRACE(("%s\n", re_op_text[node->op])) - state->text_pos = text_pos; - status = string_set_match_rev(safe_state, node); + status = string_set_match_fwdrev(safe_state, node, TRUE); if (status < 0) return status; if (status == 0) goto backtrack; - text_pos = state->text_pos; node = node->next_1.node; break; } @@ -11363,21 +13740,20 @@ advance: /* Must the match advance past its start? */ TRACE(("%s\n", re_op_text[node->op])) - if (text_pos == state->search_anchor && state->must_advance) + if (state->text_pos == state->search_anchor && state->must_advance) goto backtrack; if (state->match_all && !recursive_call) { /* We want to match all of the slice. */ if (state->reverse) { - if (text_pos != slice_start) + if (state->text_pos != state->slice_start) goto backtrack; } else { - if (text_pos != slice_end) + if (state->text_pos != state->slice_end) goto backtrack; } } - state->text_pos = text_pos; return RE_ERROR_SUCCESS; default: /* Illegal opcode! */ TRACE(("UNKNOWN OP %d\n", node->op)) @@ -11399,44 +13775,47 @@ backtrack: bt_data = last_backtrack(state); switch (bt_data->op) { - case RE_OP_ANY: /* Any character, except a newline. */ + case RE_OP_ANY: /* Any character except a newline. */ case RE_OP_ANY_ALL: /* Any character at all. */ - case RE_OP_ANY_ALL_REV: /* Any character at all. */ - case RE_OP_ANY_REV: /* Any character, except a newline. */ - case RE_OP_ANY_U: /* Any character, except a line separator. */ - case RE_OP_ANY_U_REV: /* Any character, except a line separator. */ - case RE_OP_CHARACTER: /* A character literal. */ - case RE_OP_CHARACTER_IGN: /* A character literal, ignoring case. */ - case RE_OP_CHARACTER_IGN_REV: /* A character literal, ignoring case. */ - case RE_OP_CHARACTER_REV: /* A character literal. */ + case RE_OP_ANY_ALL_REV: /* Any character at all, backwards. */ + case RE_OP_ANY_REV: /* Any character except a newline, backwards. */ + case RE_OP_ANY_U: /* Any character except a line separator. */ + case RE_OP_ANY_U_REV: /* Any character except a line separator, backwards. */ + case RE_OP_CHARACTER: /* A character. */ + case RE_OP_CHARACTER_IGN: /* A character, ignoring case. */ + case RE_OP_CHARACTER_IGN_REV: /* A character, ignoring case, backwards. */ + case RE_OP_CHARACTER_REV: /* A character, backwards. */ case RE_OP_PROPERTY: /* A property. */ case RE_OP_PROPERTY_IGN: /* A property, ignoring case. */ - case RE_OP_PROPERTY_IGN_REV: /* A property, ignoring case. */ - case RE_OP_PROPERTY_REV: /* A property. */ + case RE_OP_PROPERTY_IGN_REV: /* A property, ignoring case, backwards. */ + case RE_OP_PROPERTY_REV: /* A property, backwards. */ case RE_OP_RANGE: /* A range. */ case RE_OP_RANGE_IGN: /* A range, ignoring case. */ - case RE_OP_RANGE_IGN_REV: /* A range, ignoring case. */ - case RE_OP_RANGE_REV: /* A range. */ - case RE_OP_SET_DIFF: /* Character set. */ - case RE_OP_SET_DIFF_IGN: /* Character set, ignoring case. */ - case RE_OP_SET_DIFF_IGN_REV: /* Character set, ignoring case. */ - case RE_OP_SET_DIFF_REV: /* Character set. */ - case RE_OP_SET_INTER: /* Character set. */ - case RE_OP_SET_INTER_IGN: /* Character set, ignoring case. */ - case RE_OP_SET_INTER_IGN_REV: /* Character set, ignoring case. */ - case RE_OP_SET_INTER_REV: /* Character set. */ - case RE_OP_SET_SYM_DIFF: /* Character set. */ - case RE_OP_SET_SYM_DIFF_IGN: /* Character set, ignoring case. */ - case RE_OP_SET_SYM_DIFF_IGN_REV: /* Character set, ignoring case. */ - case RE_OP_SET_SYM_DIFF_REV: /* Character set. */ - case RE_OP_SET_UNION: /* Character set. */ - case RE_OP_SET_UNION_IGN: /* Character set, ignoring case. */ - case RE_OP_SET_UNION_IGN_REV: /* Character set, ignoring case. */ - case RE_OP_SET_UNION_REV: /* Character set. */ + case RE_OP_RANGE_IGN_REV: /* A range, ignoring case, backwards. */ + case RE_OP_RANGE_REV: /* A range, backwards. */ + case RE_OP_SET_DIFF: /* Set difference. */ + case RE_OP_SET_DIFF_IGN: /* Set difference, ignoring case. */ + case RE_OP_SET_DIFF_IGN_REV: /* Set difference, ignoring case, backwards. */ + case RE_OP_SET_DIFF_REV: /* Set difference, backwards. */ + case RE_OP_SET_INTER: /* Set intersection. */ + case RE_OP_SET_INTER_IGN: /* Set intersection, ignoring case. */ + case RE_OP_SET_INTER_IGN_REV: /* Set intersection, ignoring case, backwards. */ + case RE_OP_SET_INTER_REV: /* Set intersection, backwards. */ + case RE_OP_SET_SYM_DIFF: /* Set symmetric difference. */ + case RE_OP_SET_SYM_DIFF_IGN: /* Set symmetric difference, ignoring case. */ + case RE_OP_SET_SYM_DIFF_IGN_REV: /* Set symmetric difference, ignoring case, backwards. */ + case RE_OP_SET_SYM_DIFF_REV: /* Set symmetric difference, backwards. */ + case RE_OP_SET_UNION: /* Set union. */ + case RE_OP_SET_UNION_IGN: /* Set union, ignoring case. */ + case RE_OP_SET_UNION_IGN_REV: /* Set union, ignoring case, backwards. */ + case RE_OP_SET_UNION_REV: /* Set union, backwards. */ TRACE(("%s\n", re_op_text[bt_data->op])) - if (!retry_fuzzy_match_one(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = retry_fuzzy_match_item(safe_state, search, + &state->text_pos, &node, TRUE); + if (status < 0) + return RE_ERROR_PARTIAL; + if (node) goto advance; break; @@ -11477,8 +13856,8 @@ backtrack: discard_backtrack(state); break; } - case RE_OP_BOUNDARY: /* At a word boundary. */ - case RE_OP_DEFAULT_BOUNDARY: /* At a default word boundary. */ + case RE_OP_BOUNDARY: /* On a word boundary. */ + case RE_OP_DEFAULT_BOUNDARY: /* On a default word boundary. */ case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ case RE_OP_END_OF_LINE: /* At the end of a line. */ @@ -11487,7 +13866,7 @@ backtrack: case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ case RE_OP_END_OF_STRING_LINE_U: /* At end of string or final newline. */ case RE_OP_END_OF_WORD: /* At end of a word. */ - case RE_OP_GRAPHEME_BOUNDARY: /* At a grapheme boundary. */ + case RE_OP_GRAPHEME_BOUNDARY: /* On a grapheme boundary. */ case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ case RE_OP_START_OF_LINE: /* At the start of a line. */ case RE_OP_START_OF_LINE_U: /* At the start of a line. */ @@ -11495,8 +13874,11 @@ backtrack: case RE_OP_START_OF_WORD: /* At start of a word. */ TRACE(("%s\n", re_op_text[bt_data->op])) - if (!retry_fuzzy_match_zero(safe_state, search, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = retry_fuzzy_match_item(safe_state, search, + &state->text_pos, &node, FALSE); + if (status < 0) + return RE_ERROR_PARTIAL; + if (node) goto advance; break; @@ -11504,7 +13886,7 @@ backtrack: TRACE(("%s\n", re_op_text[bt_data->op])) node = bt_data->branch.position.node; - text_pos = bt_data->branch.position.text_pos; + state->text_pos = bt_data->branch.position.text_pos; discard_backtrack(state); goto advance; case RE_OP_CALL_REF: /* A group call ref. */ @@ -11516,9 +13898,17 @@ backtrack: case RE_OP_END_FUZZY: /* End of fuzzy matching. */ TRACE(("%s\n", re_op_text[bt_data->op])) + state->total_fuzzy_counts[RE_FUZZY_SUB] -= + state->fuzzy_info.counts[RE_FUZZY_SUB]; + state->total_fuzzy_counts[RE_FUZZY_INS] -= + state->fuzzy_info.counts[RE_FUZZY_INS]; + state->total_fuzzy_counts[RE_FUZZY_DEL] -= + state->fuzzy_info.counts[RE_FUZZY_DEL]; + /* We need to retry the fuzzy match. */ - if (!retry_fuzzy_insert(safe_state, &text_pos, &node)) - return RE_ERROR_BACKTRACKING; + status = retry_fuzzy_insert(safe_state, &state->text_pos, &node); + if (status < 0) + return RE_ERROR_PARTIAL; /* If there were too few errors, in the fuzzy section, try again. */ @@ -11528,6 +13918,13 @@ backtrack: } if (node) { + state->total_fuzzy_counts[RE_FUZZY_SUB] += + state->fuzzy_info.counts[RE_FUZZY_SUB]; + state->total_fuzzy_counts[RE_FUZZY_INS] += + state->fuzzy_info.counts[RE_FUZZY_INS]; + state->total_fuzzy_counts[RE_FUZZY_DEL] += + state->fuzzy_info.counts[RE_FUZZY_DEL]; + node = node->next_1.node; goto advance; } @@ -11566,9 +13963,9 @@ backtrack: return RE_ERROR_FAILURE; /* Can we advance? */ - text_pos = state->match_pos; - end_pos = state->reverse ? slice_start : slice_end; - if (text_pos == end_pos) + state->text_pos = state->match_pos; + end_pos = state->reverse ? state->slice_start : state->slice_end; + if (state->text_pos == end_pos) return RE_ERROR_FAILURE; /* Skip over any repeated leading characters. */ @@ -11577,22 +13974,23 @@ backtrack: case RE_OP_LAZY_REPEAT_ONE: { size_t count; + BOOL is_partial; /* How many characters did the repeat actually match? */ count = count_one(state, start_node->nonstring.next_2.node, - text_pos, (size_t)start_node->values[2]); + state->text_pos, start_node->values[2], &is_partial); /* If it's fewer than the maximum then skip over those * characters. */ if (count < start_node->values[2]) - text_pos += (Py_ssize_t)count * pattern_step; + state->text_pos += (Py_ssize_t)count * pattern_step; break; } } /* Advance and try to match again. */ - state->text_pos = text_pos + pattern_step; + state->text_pos += pattern_step; goto start_match; } @@ -11651,12 +14049,12 @@ backtrack: /* Unmatch one character at a time until the tail could match or we * have reached the minimum. */ - text_pos = rp_data->start; + state->text_pos = rp_data->start; count = rp_data->count; step = node->step; - pos = text_pos + (Py_ssize_t)count * step; - limit = text_pos + (Py_ssize_t)node->values[1] * step; + pos = state->text_pos + (Py_ssize_t)count * step; + limit = state->text_pos + (Py_ssize_t)node->values[1] * step; /* The tail failed to match at this position. */ if (!guard_repeat(safe_state, bt_data->repeat.index, pos, @@ -11709,7 +14107,7 @@ backtrack: for (;;) { --pos; - if ((char_at(text, pos) == ch) == m && + if (same_char(char_at(state->text, pos), ch) == m && !is_repeat_guarded(safe_state, index, pos, RE_STATUS_TAIL)) { match = TRUE; @@ -11731,8 +14129,9 @@ backtrack: for (;;) { --pos; - if (same_char_ign(encoding, char_at(text, pos), ch) == - m && !is_repeat_guarded(safe_state, index, pos, + if (same_char_ign(encoding, locale_info, + char_at(state->text, pos), ch) == m && + !is_repeat_guarded(safe_state, index, pos, RE_STATUS_TAIL)) { match = TRUE; break; @@ -11753,8 +14152,9 @@ backtrack: for (;;) { ++pos; - if (same_char_ign(encoding, char_at(text, pos - 1), ch) - == m && !is_repeat_guarded(safe_state, index, pos, + if (same_char_ign(encoding, locale_info, + char_at(state->text, pos - 1), ch) == m && + !is_repeat_guarded(safe_state, index, pos, RE_STATUS_TAIL)) { match = TRUE; break; @@ -11775,8 +14175,8 @@ backtrack: for (;;) { ++pos; - if ((char_at(text, pos - 1) == ch) == m && - !is_repeat_guarded(safe_state, index, pos, + if (same_char(char_at(state->text, pos - 1), ch) == m + && !is_repeat_guarded(safe_state, index, pos, RE_STATUS_TAIL)) { match = TRUE; break; @@ -11792,21 +14192,25 @@ backtrack: { Py_ssize_t length; - length = test->value_count; + length = (Py_ssize_t)test->value_count; /* The tail is a string. We don't want to go off the end of * the slice. */ - pos = RE_MIN(pos, slice_end - length); + pos = min_ssize_t(pos - 1, state->slice_end - length); for (;;) { Py_ssize_t found; + BOOL is_partial; if (pos < limit) break; found = string_search_rev(safe_state, test, pos + - length, limit); + length, limit, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -11824,8 +14228,9 @@ backtrack: } case RE_OP_STRING_FLD: { - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - size_t folded_length; + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 + ch, Py_UCS4* folded); + Py_ssize_t folded_length; size_t i; Py_UCS4 folded[RE_MAX_FOLDED]; @@ -11833,23 +14238,28 @@ backtrack: folded_length = 0; for (i = 0; i < test->value_count; i++) - folded_length += full_case_fold(test->values[i], - folded); + folded_length += full_case_fold(locale_info, + test->values[i], folded); /* The tail is a string. We don't want to go off the end of * the slice. */ - pos = RE_MIN(pos, slice_end - (Py_ssize_t)folded_length); + pos = min_ssize_t(pos - 1, state->slice_end - + folded_length); for (;;) { Py_ssize_t found; Py_ssize_t new_pos; + BOOL is_partial; if (pos < limit) break; found = string_search_fld_rev(safe_state, test, pos + - folded_length, limit, &new_pos); + folded_length, limit, &new_pos, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -11867,8 +14277,9 @@ backtrack: } case RE_OP_STRING_FLD_REV: { - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - size_t folded_length; + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 + ch, Py_UCS4* folded); + Py_ssize_t folded_length; size_t i; Py_UCS4 folded[RE_MAX_FOLDED]; @@ -11876,23 +14287,28 @@ backtrack: folded_length = 0; for (i = 0; i < test->value_count; i++) - folded_length += full_case_fold(test->values[i], - folded); + folded_length += full_case_fold(locale_info, + test->values[i], folded); /* The tail is a string. We don't want to go off the end of * the slice. */ - pos = RE_MAX(pos, slice_start + (Py_ssize_t)folded_length); + pos = max_ssize_t(pos + 1, state->slice_start + + folded_length); for (;;) { Py_ssize_t found; Py_ssize_t new_pos; + BOOL is_partial; if (pos > limit) break; found = string_search_fld(safe_state, test, pos - - folded_length, limit, &new_pos); + folded_length, limit, &new_pos, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -11912,21 +14328,25 @@ backtrack: { Py_ssize_t length; - length = test->value_count; + length = (Py_ssize_t)test->value_count; /* The tail is a string. We don't want to go off the end of * the slice. */ - pos = RE_MIN(pos, slice_end - length); + pos = min_ssize_t(pos - 1, state->slice_end - length); for (;;) { Py_ssize_t found; + BOOL is_partial; if (pos < limit) break; found = string_search_ign_rev(safe_state, test, pos + - length, limit); + length, limit, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -11946,21 +14366,25 @@ backtrack: { Py_ssize_t length; - length = test->value_count; + length = (Py_ssize_t)test->value_count; /* The tail is a string. We don't want to go off the end of * the slice. */ - pos = RE_MAX(pos, slice_start + length); + pos = max_ssize_t(pos + 1, state->slice_start + length); for (;;) { Py_ssize_t found; + BOOL is_partial; if (pos > limit) break; found = string_search_ign(safe_state, test, pos - - length, limit); + length, limit, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -11980,21 +14404,25 @@ backtrack: { Py_ssize_t length; - length = test->value_count; + length = (Py_ssize_t)test->value_count; /* The tail is a string. We don't want to go off the end of * the slice. */ - pos = RE_MAX(pos, slice_start + length); + pos = max_ssize_t(pos + 1, state->slice_start + length); for (;;) { Py_ssize_t found; + BOOL is_partial; if (pos > limit) break; found = string_search(safe_state, test, pos - length, - limit); + limit, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -12016,9 +14444,14 @@ backtrack: pos -= step; - if (try_match(state, &node->next_1, pos, - &next_position) && !is_repeat_guarded(safe_state, - index, pos, RE_STATUS_TAIL)) { + status = try_match(state, &node->next_1, pos, + &next_position); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { match = TRUE; break; } @@ -12031,7 +14464,7 @@ backtrack: } if (match) { - count = abs_ssize_t(pos - text_pos); + count = (size_t)abs_ssize_t(pos - state->text_pos); /* The tail could match. */ if (count > node->values[1]) @@ -12053,7 +14486,7 @@ backtrack: } node = node->next_1.node; - text_pos = pos; + state->text_pos = pos; goto advance; } else { /* We've backtracked the repeat as far as we can. */ @@ -12096,7 +14529,7 @@ backtrack: size_t count; Py_ssize_t step; Py_ssize_t pos; - size_t available; + Py_ssize_t available; size_t max_count; Py_ssize_t limit; RE_Node* repeated; @@ -12113,15 +14546,15 @@ backtrack: /* Match one character at a time until the tail could match or we * have reached the maximum. */ - text_pos = rp_data->start; + state->text_pos = rp_data->start; count = rp_data->count; step = node->step; - pos = text_pos + (Py_ssize_t)count * step; - available = step > 0 ? slice_end - text_pos : text_pos - - slice_start; - max_count = RE_MIN(node->values[2], available); - limit = text_pos + (Py_ssize_t)max_count * step; + pos = state->text_pos + (Py_ssize_t)count * step; + available = step > 0 ? state->slice_end - state->text_pos : + state->text_pos - state->slice_start; + max_count = min_size_t((size_t)available, node->values[2]); + limit = state->text_pos + (Py_ssize_t)max_count * step; repeated = node->nonstring.next_2.node; @@ -12136,12 +14569,21 @@ backtrack: for (;;) { RE_Position next_position; - if (!match_one(state, repeated, pos)) + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) break; pos += step; - if (try_match(state, &node->next_1, pos, &next_position) && + status = try_match(state, &node->next_1, pos, + &next_position); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS && !is_repeat_guarded(safe_state, index, pos, RE_STATUS_TAIL)) { match = TRUE; @@ -12166,18 +14608,26 @@ backtrack: /* The tail is a character. We don't want to go off the end * of the slice. */ - limit = RE_MIN(limit, slice_end - 1); + limit = min_ssize_t(limit, state->slice_end - 1); for (;;) { + if (pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + if (pos >= limit) break; - if (!match_one(state, repeated, pos)) + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) break; ++pos; - if ((char_at(text, pos) == ch) == m && + if (same_char(char_at(state->text, pos), ch) == m && !is_repeat_guarded(safe_state, index, pos, RE_STATUS_TAIL)) { match = TRUE; @@ -12195,19 +14645,28 @@ backtrack: /* The tail is a character. We don't want to go off the end * of the slice. */ - limit = RE_MIN(limit, slice_end - 1); + limit = min_ssize_t(limit, state->slice_end - 1); for (;;) { + if (pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + if (pos >= limit) break; - if (!match_one(state, repeated, pos)) + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) break; ++pos; - if (same_char_ign(encoding, char_at(text, pos), ch) == - m && !is_repeat_guarded(safe_state, index, pos, + if (same_char_ign(encoding, locale_info, + char_at(state->text, pos), ch) == m && + !is_repeat_guarded(safe_state, index, pos, RE_STATUS_TAIL)) { match = TRUE; break; @@ -12224,19 +14683,27 @@ backtrack: /* The tail is a character. We don't want to go off the end * of the slice. */ - limit = RE_MAX(limit, slice_start + 1); + limit = max_ssize_t(limit, state->slice_start + 1); for (;;) { + if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + if (pos <= limit) break; - if (!match_one(state, repeated, pos)) + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) break; --pos; - if (same_char_ign(encoding, char_at(text, pos - 1), ch) - == m && !is_repeat_guarded(safe_state, index, pos, + if (same_char_ign(encoding, locale_info, + char_at(state->text, pos - 1), ch) == m && + !is_repeat_guarded(safe_state, index, pos, RE_STATUS_TAIL)) { match = TRUE; break; @@ -12253,19 +14720,26 @@ backtrack: /* The tail is a character. We don't want to go off the end * of the slice. */ - limit = RE_MAX(limit, slice_start + 1); + limit = max_ssize_t(limit, state->slice_start + 1); for (;;) { + if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + if (pos <= limit) break; - if (!match_one(state, repeated, pos)) + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) break; --pos; - if ((char_at(text, pos - 1) == ch) == m && - !is_repeat_guarded(safe_state, index, pos, + if (same_char(char_at(state->text, pos - 1), ch) == m + && !is_repeat_guarded(safe_state, index, pos, RE_STATUS_TAIL)) { match = TRUE; break; @@ -12277,22 +14751,30 @@ backtrack: { Py_ssize_t length; - length = test->value_count; + length = (Py_ssize_t)test->value_count; /* The tail is a string. We don't want to go off the end of * the slice. */ - limit = RE_MIN(limit, slice_end - length); + limit = min_ssize_t(limit, state->slice_end - length); for (;;) { Py_ssize_t found; + BOOL is_partial; + + if (pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; if (pos >= limit) break; /* Look for the tail string. */ found = string_search(safe_state, test, pos + 1, limit - + length); + + length, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -12301,9 +14783,16 @@ backtrack: pos = found; else { /* Check that what precedes the tail will match. */ - while (pos != found && match_one(state, repeated, - pos)) + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + ++pos; + } if (pos != found) /* Something preceding the tail didn't match. @@ -12324,18 +14813,26 @@ backtrack: /* The tail is a string. We don't want to go off the end of * the slice. */ - limit = RE_MIN(limit, slice_end); + limit = min_ssize_t(limit, state->slice_end); for (;;) { Py_ssize_t found; Py_ssize_t new_pos; + BOOL is_partial; + + if (pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; if (pos >= limit) break; /* Look for the tail string. */ found = string_search_fld(safe_state, test, pos + 1, - limit, &new_pos); + limit, &new_pos, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -12344,9 +14841,16 @@ backtrack: pos = found; else { /* Check that what precedes the tail will match. */ - while (pos != found && match_one(state, repeated, - pos)) + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + ++pos; + } if (pos != found) /* Something preceding the tail didn't match. @@ -12367,18 +14871,25 @@ backtrack: /* The tail is a string. We don't want to go off the end of * the slice. */ - limit = RE_MIN(limit, slice_start); + limit = max_ssize_t(limit, state->slice_start); for (;;) { Py_ssize_t found; Py_ssize_t new_pos; + BOOL is_partial; + + if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; if (pos <= limit) break; /* Look for the tail string. */ found = string_search_fld_rev(safe_state, test, pos - - 1, limit, &new_pos); + 1, limit, &new_pos, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -12387,9 +14898,16 @@ backtrack: pos = found; else { /* Check that what precedes the tail will match. */ - while (pos != found && match_one(state, repeated, - pos)) + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + --pos; + } if (pos != found) /* Something preceding the tail didn't match. @@ -12409,22 +14927,30 @@ backtrack: { Py_ssize_t length; - length = test->value_count; + length = (Py_ssize_t)test->value_count; /* The tail is a string. We don't want to go off the end of * the slice. */ - limit = RE_MIN(limit, slice_end - length); + limit = min_ssize_t(limit, state->slice_end - length); for (;;) { Py_ssize_t found; + BOOL is_partial; + + if (pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; if (pos >= limit) break; /* Look for the tail string. */ found = string_search_ign(safe_state, test, pos + 1, - limit + length); + limit + length, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -12433,9 +14959,16 @@ backtrack: pos = found; else { /* Check that what precedes the tail will match. */ - while (pos != found && match_one(state, repeated, - pos)) + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + ++pos; + } if (pos != found) /* Something preceding the tail didn't match. @@ -12455,22 +14988,29 @@ backtrack: { Py_ssize_t length; - length = test->value_count; + length = (Py_ssize_t)test->value_count; /* The tail is a string. We don't want to go off the end of * the slice. */ - limit = RE_MAX(limit, slice_start + length); + limit = max_ssize_t(limit, state->slice_start + length); for (;;) { Py_ssize_t found; + BOOL is_partial; + + if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; if (pos <= limit) break; /* Look for the tail string. */ found = string_search_ign_rev(safe_state, test, pos - - 1, limit - length); + 1, limit - length, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -12479,9 +15019,16 @@ backtrack: pos = found; else { /* Check that what precedes the tail will match. */ - while (pos != found && match_one(state, repeated, - pos)) + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + --pos; + } if (pos != found) /* Something preceding the tail didn't match. @@ -12501,22 +15048,29 @@ backtrack: { Py_ssize_t length; - length = test->value_count; + length = (Py_ssize_t)test->value_count; /* The tail is a string. We don't want to go off the end of * the slice. */ - limit = RE_MAX(limit, slice_start + length); + limit = max_ssize_t(limit, state->slice_start + length); for (;;) { Py_ssize_t found; + BOOL is_partial; + + if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; if (pos <= limit) break; /* Look for the tail string. */ found = string_search_rev(safe_state, test, pos - 1, - limit - length); + limit - length, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + if (found < 0) break; @@ -12525,9 +15079,16 @@ backtrack: pos = found; else { /* Check that what precedes the tail will match. */ - while (pos != found && match_one(state, repeated, - pos)) + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + --pos; + } if (pos != found) /* Something preceding the tail didn't match. @@ -12547,14 +15108,23 @@ backtrack: for (;;) { RE_Position next_position; - if (!match_one(state, repeated, pos)) + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) break; pos += step; - if (try_match(state, &node->next_1, pos, - &next_position) && !is_repeat_guarded(safe_state, - index, pos, RE_STATUS_TAIL)) { + status = try_match(state, &node->next_1, pos, + &next_position); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (status == RE_ERROR_SUCCESS && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { match = TRUE; break; } @@ -12568,8 +15138,8 @@ backtrack: if (match) { /* The tail could match. */ - count = abs_ssize_t(pos - text_pos); - text_pos = pos; + count = (size_t)abs_ssize_t(pos - state->text_pos); + state->text_pos = pos; if (count < max_count) { /* The match is shorter than the maximum, so we might need @@ -12626,7 +15196,7 @@ backtrack: /* Advance into the body. */ node = bt_data->repeat.position.node; - text_pos = bt_data->repeat.position.text_pos; + state->text_pos = bt_data->repeat.position.text_pos; goto advance; } case RE_OP_MATCH_TAIL: @@ -12644,42 +15214,51 @@ backtrack: /* Advance into the tail. */ node = bt_data->repeat.position.node; - text_pos = bt_data->repeat.position.text_pos; + state->text_pos = bt_data->repeat.position.text_pos; discard_backtrack(state); goto advance; } case RE_OP_REF_GROUP: /* Reference to a capture group. */ case RE_OP_REF_GROUP_IGN: /* Reference to a capture group, ignoring case. */ - case RE_OP_REF_GROUP_IGN_REV: /* Reference to a capture group backwards, ignoring case. */ - case RE_OP_REF_GROUP_REV: /* Reference to a capture group backwards. */ - case RE_OP_STRING: /* A string literal. */ - case RE_OP_STRING_IGN: /* A string literal, ignoring case. */ - case RE_OP_STRING_IGN_REV: /* A string literal backwards, ignoring case. */ - case RE_OP_STRING_REV: /* A string literal backwards. */ + case RE_OP_REF_GROUP_IGN_REV: /* Reference to a capture group, backwards, ignoring case. */ + case RE_OP_REF_GROUP_REV: /* Reference to a capture group, backwards. */ + case RE_OP_STRING: /* A string. */ + case RE_OP_STRING_IGN: /* A string, ignoring case. */ + case RE_OP_STRING_IGN_REV: /* A string, backwards, ignoring case. */ + case RE_OP_STRING_REV: /* A string, backwards. */ { BOOL matched; TRACE(("%s\n", re_op_text[bt_data->op])) - if (!retry_fuzzy_match_string(safe_state, search, &text_pos, &node, - &string_pos, &matched)) - return RE_ERROR_BACKTRACKING; + status = retry_fuzzy_match_string(safe_state, search, + &state->text_pos, &node, &string_pos, &matched); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (matched) goto advance; + string_pos = -1; break; } case RE_OP_REF_GROUP_FLD: /* Reference to a capture group, ignoring case. */ - case RE_OP_REF_GROUP_FLD_REV: /* Reference to a capture group backwards, ignoring case. */ + case RE_OP_REF_GROUP_FLD_REV: /* Reference to a capture group, backwards, ignoring case. */ { BOOL matched; TRACE(("%s\n", re_op_text[bt_data->op])) - if (!retry_fuzzy_match_string_fld2(safe_state, search, &text_pos, - &node, &folded_pos, &string_pos, &gfolded_pos, &matched)) - return RE_ERROR_BACKTRACKING; + status = retry_fuzzy_match_group_fld(safe_state, search, + &state->text_pos, &node, &folded_pos, &string_pos, &gfolded_pos, + &matched); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (matched) goto advance; + string_pos = -1; break; } @@ -12707,17 +15286,20 @@ backtrack: discard_backtrack(state); break; } - case RE_OP_STRING_FLD: /* A string literal, ignoring case. */ - case RE_OP_STRING_FLD_REV: /* A string literal backwards, ignoring case. */ + case RE_OP_STRING_FLD: /* A string, ignoring case. */ + case RE_OP_STRING_FLD_REV: /* A string, backwards, ignoring case. */ { BOOL matched; TRACE(("%s\n", re_op_text[bt_data->op])) - if (!retry_fuzzy_match_string_fld(safe_state, search, &text_pos, - &node, &string_pos, &folded_pos, &matched)) - return RE_ERROR_BACKTRACKING; + status = retry_fuzzy_match_string_fld(safe_state, search, + &state->text_pos, &node, &string_pos, &folded_pos, &matched); + if (status < 0) + return RE_ERROR_PARTIAL; + if (matched) goto advance; + string_pos = -1; break; } @@ -12843,6 +15425,20 @@ Py_LOCAL_INLINE(void) discard_groups(RE_SafeState* safe_state, RE_GroupData* release_GIL(safe_state); } +/* Saves the fuzzy info. */ +Py_LOCAL_INLINE(void) save_fuzzy_counts(RE_State* state, size_t* fuzzy_counts) + { + Py_MEMCPY(fuzzy_counts, state->total_fuzzy_counts, + sizeof(state->total_fuzzy_counts)); +} + +/* Restores the fuzzy info. */ +Py_LOCAL_INLINE(void) restore_fuzzy_counts(RE_State* state, size_t* + fuzzy_counts) { + Py_MEMCPY(state->total_fuzzy_counts, fuzzy_counts, + sizeof(state->total_fuzzy_counts)); +} + /* Performs a match or search from the current text position. * * The state can sometimes be shared across threads. In such instances there's @@ -12851,7 +15447,7 @@ Py_LOCAL_INLINE(void) discard_groups(RE_SafeState* safe_state, RE_GroupData* Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { RE_State* state; PatternObject* pattern; - size_t available; + Py_ssize_t available; BOOL get_best; BOOL enhance_match; BOOL must_advance; @@ -12861,6 +15457,7 @@ Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { int status; Py_ssize_t slice_start; Py_ssize_t slice_end; + size_t best_fuzzy_counts[RE_FUZZY_COUNT]; TRACE(("<>\n")) state = safe_state->re_state; @@ -12890,7 +15487,7 @@ Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { enhance_match = (pattern->flags & RE_FLAG_ENHANCEMATCH) != 0 && !get_best; /* The maximum permitted cost. */ - state->max_cost = pattern->is_fuzzy ? RE_UNLIMITED : 0; + state->max_cost = pattern->is_fuzzy ? PY_SSIZE_T_MAX : 0; best_groups = NULL; @@ -12912,18 +15509,18 @@ Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { init_match(state); status = RE_ERROR_SUCCESS; - if (state->max_cost == 0) { - /* An exact match. */ + if (state->max_cost == 0 && state->partial_side == RE_PARTIAL_NONE) { + /* An exact match, and partial matches not permitted. */ if (available < state->min_width || (available == 0 && state->must_advance)) status = RE_ERROR_FAILURE; } if (status == RE_ERROR_SUCCESS) - status = basic_match(safe_state, state->pattern->start_node, - search, FALSE); + status = basic_match(safe_state, pattern->start_node, search, + FALSE); - /* Has an error occurred? */ + /* Has an error occurred, or is it a partial match? */ if (status < 0) break; @@ -12934,6 +15531,8 @@ Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { if (!get_best && !enhance_match) break; + save_fuzzy_counts(state, best_fuzzy_counts); + if (!get_best && state->text_pos == state->match_pos) /* We want the first match. The match is already zero-width, so the * cost can't get any lower (because the fit can't get any better). @@ -12998,12 +15597,12 @@ Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { state->text_pos = best_text_pos; restore_groups(safe_state, best_groups); + restore_fuzzy_counts(state, best_fuzzy_counts); } } - if (status == RE_ERROR_SUCCESS) { + if (status == RE_ERROR_SUCCESS || status == RE_ERROR_PARTIAL) { Py_ssize_t max_end_index; - PatternObject* pattern; RE_GroupInfo* group_info; size_t g; @@ -13013,8 +15612,8 @@ Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { max_end_index = -1; /* Store the capture groups. */ - pattern = state->pattern; group_info = pattern->group_info; + for (g = 0; g < pattern->public_group_count; g++) { RE_GroupSpan* span; @@ -13028,9 +15627,9 @@ Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { if (span->start >= 0 && span->end >= 0 && group_info[g].end_index > max_end_index) { max_end_index = group_info[g].end_index; - state->lastindex = g + 1; + state->lastindex = (Py_ssize_t)g + 1; if (group_info[g].has_name) - state->lastgroup = g + 1; + state->lastgroup = (Py_ssize_t)g + 1; } } } @@ -13038,7 +15637,7 @@ Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { /* Re-acquire the GIL. */ acquire_GIL(safe_state); - if (status < 0 && !PyErr_Occurred()) + if (status < 0 && status != RE_ERROR_PARTIAL && !PyErr_Occurred()) set_error(status, NULL); return status; @@ -13164,8 +15763,8 @@ Py_LOCAL_INLINE(void) dealloc_groups(RE_GroupData* groups, size_t group_count) /* Initialises a state object. */ Py_LOCAL_INLINE(BOOL) state_init_2(RE_State* state, PatternObject* pattern, PyObject* string, RE_StringInfo* str_info, Py_ssize_t start, Py_ssize_t end, - BOOL overlapped, int concurrent, BOOL use_lock, BOOL visible_captures, BOOL - match_all) { + BOOL overlapped, int concurrent, BOOL partial, BOOL use_lock, BOOL + visible_captures, BOOL match_all) { Py_ssize_t final_pos; int i; @@ -13283,6 +15882,7 @@ Py_LOCAL_INLINE(BOOL) state_init_2(RE_State* state, PatternObject* pattern, } state->encoding = pattern->encoding; + state->locale_info = pattern->locale_info; /* The state object contains a reference to the string and also a pointer * to its contents. @@ -13294,6 +15894,11 @@ Py_LOCAL_INLINE(BOOL) state_init_2(RE_State* state, PatternObject* pattern, state->text_length = end; state->reverse = (pattern->flags & RE_FLAG_REVERSE) != 0; + if (partial) + state->partial_side = state->reverse ? RE_PARTIAL_LEFT : + RE_PARTIAL_RIGHT; + else + state->partial_side = RE_PARTIAL_NONE; state->slice_start = start; state->slice_end = state->text_length; @@ -13331,7 +15936,7 @@ Py_LOCAL_INLINE(BOOL) state_init_2(RE_State* state, PatternObject* pattern, /* If the 'new' behaviour is enabled then split correctly on zero-width * matches. */ - state->zero_width = (pattern->flags & RE_FLAG_VERSION1) != 0; + state->version_0 = (pattern->flags & RE_FLAG_VERSION1) == 0; state->must_advance = FALSE; state->pattern = pattern; @@ -13413,7 +16018,8 @@ Py_LOCAL_INLINE(void) release_buffer(RE_StringInfo* str_info) { /* Initialises a state object. */ Py_LOCAL_INLINE(BOOL) state_init(RE_State* state, PatternObject* pattern, PyObject* string, Py_ssize_t start, Py_ssize_t end, BOOL overlapped, int - concurrent, BOOL use_lock, BOOL visible_captures, BOOL match_all) { + concurrent, BOOL partial, BOOL use_lock, BOOL visible_captures, BOOL + match_all) { RE_StringInfo str_info; /* Get the string to search or match. */ @@ -13424,7 +16030,8 @@ Py_LOCAL_INLINE(BOOL) state_init(RE_State* state, PatternObject* pattern, * the string is a buffer object. */ if (!state_init_2(state, pattern, string, &str_info, start, end, - overlapped, concurrent, use_lock, visible_captures, match_all)) { + overlapped, concurrent, partial, use_lock, visible_captures, match_all)) + { #if PY_VERSION_HEX >= 0x02060000 release_buffer(&str_info); @@ -13439,9 +16046,9 @@ Py_LOCAL_INLINE(BOOL) state_init(RE_State* state, PatternObject* pattern, } /* Deallocates repeat data. */ -Py_LOCAL_INLINE(void) dealloc_repeats(RE_RepeatData* repeats, Py_ssize_t +Py_LOCAL_INLINE(void) dealloc_repeats(RE_RepeatData* repeats, size_t repeat_count) { - Py_ssize_t i; + size_t i; if (!repeats) return; @@ -13455,9 +16062,9 @@ Py_LOCAL_INLINE(void) dealloc_repeats(RE_RepeatData* repeats, Py_ssize_t } /* Deallocates fuzzy guards. */ -Py_LOCAL_INLINE(void) dealloc_fuzzy_guards(RE_FuzzyGuards* guards, Py_ssize_t +Py_LOCAL_INLINE(void) dealloc_fuzzy_guards(RE_FuzzyGuards* guards, size_t fuzzy_count) { - Py_ssize_t i; + size_t i; if (!guards) return; @@ -13599,18 +16206,68 @@ static void match_dealloc(PyObject* self_) { PyObject_DEL(self); } -/* Gets a slice from a string. */ +/* Restricts a value to a range. */ +Py_LOCAL_INLINE(Py_ssize_t) limited_range(Py_ssize_t value, Py_ssize_t lower, + Py_ssize_t upper) { + if (value < lower) + return lower; + + if (value > upper) + return upper; + + return value; +} + +/* Gets a slice from a Unicode string. */ +Py_LOCAL_INLINE(PyObject*) unicode_slice(PyObject* string, Py_ssize_t start, + Py_ssize_t end) { + Py_ssize_t length; + Py_UNICODE* buffer; + + length = PyUnicode_GET_SIZE(string); + start = limited_range(start, 0, length); + end = limited_range(end, 0, length); + + buffer = PyUnicode_AsUnicode(string); + + return PyUnicode_FromUnicode(buffer + start, end - start); +} + +/* Gets a slice from a bytestring. */ +Py_LOCAL_INLINE(PyObject*) bytes_slice(PyObject* string, Py_ssize_t start, + Py_ssize_t end) { + Py_ssize_t length; + char* buffer; + + length = PyString_GET_SIZE(string); + start = limited_range(start, 0, length); + end = limited_range(end, 0, length); + + buffer = PyString_AsString(string); + + return PyString_FromStringAndSize(buffer + start, end - start); +} + +/* Gets a slice from a string, returning either a Unicode string or a + * bytestring. + */ Py_LOCAL_INLINE(PyObject*) get_slice(PyObject* string, Py_ssize_t start, Py_ssize_t end) { + if (PyUnicode_Check(string)) + return unicode_slice(string, start, end); + + if (PyString_Check(string)) + return bytes_slice(string, start, end); + return PySequence_GetSlice(string, start, end); } /* Gets a MatchObject's group by integer index. */ -Py_LOCAL_INLINE(PyObject*) match_get_group_by_index(MatchObject* self, - Py_ssize_t index, PyObject* def) { +static PyObject* match_get_group_by_index(MatchObject* self, Py_ssize_t index, + PyObject* def) { RE_GroupSpan* span; - if (index < 0 || index > (Py_ssize_t)self->group_count) { + if (index < 0 || (size_t)index > self->group_count) { /* Raise error if we were given a bad group number. */ set_error(RE_ERROR_NO_SUCH_GROUP, NULL); return NULL; @@ -13640,7 +16297,7 @@ static PyObject* match_get_start_by_index(MatchObject* self, Py_ssize_t index) { RE_GroupSpan* span; - if (index < 0 || index > (Py_ssize_t)self->group_count) { + if (index < 0 || (size_t)index > self->group_count) { /* Raise error if we were given a bad group number. */ set_error(RE_ERROR_NO_SUCH_GROUP, NULL); return NULL; @@ -13664,7 +16321,7 @@ static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index) PyObject* item; size_t i; - if (index < 0 || index > (Py_ssize_t)self->group_count) { + if (index < 0 || (size_t)index > self->group_count) { /* Raise error if we were given a bad group number. */ set_error(RE_ERROR_NO_SUCH_GROUP, NULL); return NULL; @@ -13678,6 +16335,8 @@ static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index) item = Py_BuildValue("n", self->match_start); if (!item) goto error; + + /* PyList_SET_ITEM borrows the reference. */ PyList_SET_ITEM(result, 0, item); return result; @@ -13688,7 +16347,7 @@ static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index) */ group = &self->groups[index - 1]; - result = PyList_New(group->capture_count); + result = PyList_New((Py_ssize_t)group->capture_count); if (!result) return NULL; @@ -13696,6 +16355,8 @@ static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index) item = Py_BuildValue("n", group->captures[i].start); if (!item) goto error; + + /* PyList_SET_ITEM borrows the reference. */ PyList_SET_ITEM(result, i, item); } @@ -13710,7 +16371,7 @@ error: static PyObject* match_get_end_by_index(MatchObject* self, Py_ssize_t index) { RE_GroupSpan* span; - if (index < 0 || index > (Py_ssize_t)self->group_count) { + if (index < 0 || (size_t)index > self->group_count) { /* Raise error if we were given a bad group number. */ set_error(RE_ERROR_NO_SUCH_GROUP, NULL); return NULL; @@ -13733,7 +16394,7 @@ static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) { PyObject* item; size_t i; - if (index < 0 || index > (Py_ssize_t)self->group_count) { + if (index < 0 || (size_t)index > self->group_count) { /* Raise error if we were given a bad group number. */ set_error(RE_ERROR_NO_SUCH_GROUP, NULL); return NULL; @@ -13747,6 +16408,8 @@ static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) { item = Py_BuildValue("n", self->match_end); if (!item) goto error; + + /* PyList_SET_ITEM borrows the reference. */ PyList_SET_ITEM(result, 0, item); return result; @@ -13757,7 +16420,7 @@ static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) { */ group = &self->groups[index - 1]; - result = PyList_New(group->capture_count); + result = PyList_New((Py_ssize_t)group->capture_count); if (!result) return NULL; @@ -13765,6 +16428,8 @@ static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) { item = Py_BuildValue("n", group->captures[i].end); if (!item) goto error; + + /* PyList_SET_ITEM borrows the reference. */ PyList_SET_ITEM(result, i, item); } @@ -13779,7 +16444,7 @@ error: static PyObject* match_get_span_by_index(MatchObject* self, Py_ssize_t index) { RE_GroupSpan* span; - if (index < 0 || index > (Py_ssize_t)self->group_count) { + if (index < 0 || (size_t)index > self->group_count) { /* Raise error if we were given a bad group number. */ set_error(RE_ERROR_NO_SUCH_GROUP, NULL); return NULL; @@ -13803,7 +16468,7 @@ static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index) PyObject* item; size_t i; - if (index < 0 || index > (Py_ssize_t)self->group_count) { + if (index < 0 || (size_t)index > self->group_count) { /* Raise error if we were given a bad group number. */ set_error(RE_ERROR_NO_SUCH_GROUP, NULL); return NULL; @@ -13817,6 +16482,8 @@ static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index) item = Py_BuildValue("nn", self->match_start, self->match_end); if (!item) goto error; + + /* PyList_SET_ITEM borrows the reference. */ PyList_SET_ITEM(result, 0, item); return result; @@ -13827,7 +16494,7 @@ static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index) */ group = &self->groups[index - 1]; - result = PyList_New(group->capture_count); + result = PyList_New((Py_ssize_t)group->capture_count); if (!result) return NULL; @@ -13836,6 +16503,8 @@ static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index) group->captures[i].end); if (!item) goto error; + + /* PyList_SET_ITEM borrows the reference. */ PyList_SET_ITEM(result, i, item); } @@ -13854,7 +16523,7 @@ static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t PyObject* slice; size_t i; - if (index < 0 || index > (Py_ssize_t)self->group_count) { + if (index < 0 || (size_t)index > self->group_count) { /* Raise error if we were given a bad group number. */ set_error(RE_ERROR_NO_SUCH_GROUP, NULL); return NULL; @@ -13869,6 +16538,8 @@ static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t self->substring_offset, self->match_end - self->substring_offset); if (!slice) goto error; + + /* PyList_SET_ITEM borrows the reference. */ PyList_SET_ITEM(result, 0, slice); return result; @@ -13879,7 +16550,7 @@ static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t */ group = &self->groups[index - 1]; - result = PyList_New(group->capture_count); + result = PyList_New((Py_ssize_t)group->capture_count); if (!result) return NULL; @@ -13889,6 +16560,8 @@ static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t self->substring_offset); if (!slice) goto error; + + /* PyList_SET_ITEM borrows the reference. */ PyList_SET_ITEM(result, i, slice); } @@ -13936,7 +16609,7 @@ Py_LOCAL_INLINE(Py_ssize_t) match_get_group_index(MatchObject* self, PyObject* min_group = 1; } - if (min_group <= group && group <= (Py_ssize_t)self->group_count) + if (min_group <= group && (size_t)group <= self->group_count) return group; return -1; @@ -13975,7 +16648,7 @@ Py_LOCAL_INLINE(PyObject*) match_get_group(MatchObject* self, PyObject* index, } /* Gets info from a MatchObject by object index. */ -static PyObject* get_by_arg(MatchObject* self, PyObject* index, +Py_LOCAL_INLINE(PyObject*) get_by_arg(MatchObject* self, PyObject* index, RE_GetByIndexFunc get_by_index) { /* Check that the index is an integer or a string. */ if (PyInt_Check(index) || PyLong_Check(index) || PyUnicode_Check(index) || @@ -14000,7 +16673,7 @@ static PyObject* match_group(MatchObject* self, PyObject* args) { result = match_get_group_by_index(self, 0, Py_None); break; case 1: - /* group(x) */ + /* group(x). PyTuple_GET_ITEM borrows the reference. */ result = match_get_group(self, PyTuple_GET_ITEM(args, 0), Py_None, FALSE); break; @@ -14010,22 +16683,29 @@ static PyObject* match_group(MatchObject* self, PyObject* args) { result = PyTuple_New(size); if (!result) return NULL; + for (i = 0; i < size; i++) { - PyObject* item = match_get_group(self, PyTuple_GET_ITEM(args, i), - Py_None, FALSE); + PyObject* item; + + /* PyTuple_GET_ITEM borrows the reference. */ + item = match_get_group(self, PyTuple_GET_ITEM(args, i), Py_None, + FALSE); if (!item) { Py_DECREF(result); return NULL; } + + /* PyTuple_SET_ITEM borrows the reference. */ PyTuple_SET_ITEM(result, i, item); } break; } + return result; } /* Generic method for getting info from a MatchObject. */ -static PyObject* get_from_match(MatchObject* self, PyObject* args, +Py_LOCAL_INLINE(PyObject*) get_from_match(MatchObject* self, PyObject* args, RE_GetByIndexFunc get_by_index) { Py_ssize_t size; PyObject* result; @@ -14039,7 +16719,7 @@ static PyObject* get_from_match(MatchObject* self, PyObject* args, result = get_by_index(self, 0); break; case 1: - /* get(x) */ + /* get(x). PyTuple_GET_ITEM borrows the reference. */ result = get_by_arg(self, PyTuple_GET_ITEM(args, 0), get_by_index); break; default: @@ -14048,17 +16728,23 @@ static PyObject* get_from_match(MatchObject* self, PyObject* args, result = PyTuple_New(size); if (!result) return NULL; + for (i = 0; i < size; i++) { - PyObject* item = get_by_arg(self, PyTuple_GET_ITEM(args, i), - get_by_index); + PyObject* item; + + /* PyTuple_GET_ITEM borrows the reference. */ + item = get_by_arg(self, PyTuple_GET_ITEM(args, i), get_by_index); if (!item) { Py_DECREF(result); return NULL; } + + /* PyTuple_SET_ITEM borrows the reference. */ PyTuple_SET_ITEM(result, i, item); } break; } + return result; } @@ -14108,22 +16794,27 @@ static PyObject* match_groups(MatchObject* self, PyObject* args, PyObject* if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:groups", kwlist, &def)) return NULL; - result = PyTuple_New(self->group_count); + result = PyTuple_New((Py_ssize_t)self->group_count); if (!result) return NULL; /* Group 0 is the entire matched portion of the string. */ for (g = 0; g < self->group_count; g++) { PyObject* item; - item = match_get_group_by_index(self, g + 1, def); - if (!item) { - Py_DECREF(result); - return NULL; - } + + item = match_get_group_by_index(self, (Py_ssize_t)g + 1, def); + if (!item) + goto error; + + /* PyTuple_SET_ITEM borrows the reference. */ PyTuple_SET_ITEM(result, g, item); } return result; + +error: + Py_DECREF(result); + return NULL; } /* MatchObject's 'groupdict' method. */ @@ -14152,14 +16843,15 @@ static PyObject* match_groupdict(MatchObject* self, PyObject* args, PyObject* PyObject* value; int status; + /* PyList_GET_ITEM borrows a reference. */ key = PyList_GET_ITEM(keys, g); if (!key) goto failed; + value = match_get_group(self, key, def, FALSE); - if (!value) { - Py_DECREF(key); + if (!value) goto failed; - } + status = PyDict_SetItem(result, key, value); Py_DECREF(value); if (status < 0) @@ -14196,19 +16888,19 @@ static PyObject* match_capturesdict(MatchObject* self) { PyObject* captures; int status; + /* PyList_GET_ITEM borrows a reference. */ key = PyList_GET_ITEM(keys, g); if (!key) goto failed; + group = match_get_group_index(self, key, FALSE); - if (group < 0) { - Py_DECREF(key); + if (group < 0) goto failed; - } + captures = match_get_captures_by_index(self, group); - if (!captures) { - Py_DECREF(key); + if (!captures) goto failed; - } + status = PyDict_SetItem(result, key, captures); Py_DECREF(captures); if (status < 0) @@ -14265,7 +16957,7 @@ Py_LOCAL_INLINE(PyObject*) call(char* module_name, char* function_name, * The replacement item could be a string literal or a group. */ Py_LOCAL_INLINE(PyObject*) get_match_replacement(MatchObject* self, PyObject* - item, Py_ssize_t group_count) { + item, size_t group_count) { Py_ssize_t index; if (PyUnicode_Check(item) || PyString_Check(item)) { @@ -14286,7 +16978,7 @@ Py_LOCAL_INLINE(PyObject*) get_match_replacement(MatchObject* self, PyObject* /* The entire matched portion of the string. */ return get_slice(self->substring, self->match_start - self->substring_offset, self->match_end - self->substring_offset); - } else if (index >= 1 && index <= group_count) { + } else if (index >= 1 && (size_t)index <= group_count) { /* A group. If it didn't match then return None instead. */ RE_GroupData* group; @@ -14307,8 +16999,17 @@ Py_LOCAL_INLINE(PyObject*) get_match_replacement(MatchObject* self, PyObject* } } -/* Adds an item to be joined. */ -Py_LOCAL_INLINE(int) add_item(JoinInfo* join_info, PyObject* item) { +/* Initialises the join list. */ +Py_LOCAL_INLINE(void) init_join_list(JoinInfo* join_info, BOOL reversed, BOOL + is_unicode) { + join_info->list = NULL; + join_info->item = NULL; + join_info->reversed = reversed; + join_info->is_unicode = is_unicode; +} + +/* Adds an item to the join list. */ +Py_LOCAL_INLINE(int) add_to_join_list(JoinInfo* join_info, PyObject* item) { PyObject* new_item; int status; @@ -14356,9 +17057,11 @@ Py_LOCAL_INLINE(int) add_item(JoinInfo* join_info, PyObject* item) { goto error; } + /* PyList_SET_ITEM borrows the reference. */ PyList_SET_ITEM(join_info->list, 0, join_info->item); join_info->item = NULL; + /* PyList_SET_ITEM borrows the reference. */ PyList_SET_ITEM(join_info->list, 1, new_item); return 0; } @@ -14370,12 +17073,16 @@ Py_LOCAL_INLINE(int) add_item(JoinInfo* join_info, PyObject* item) { error: Py_DECREF(new_item); - Py_XDECREF(join_info->list); - Py_XDECREF(join_info->item); set_error(status, NULL); return status; } +/* Clears the join list. */ +Py_LOCAL_INLINE(void) clear_join_list(JoinInfo* join_info) { + Py_XDECREF(join_info->list); + Py_XDECREF(join_info->item); +} + /* Joins together a list of strings for pattern_subx. */ Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) { /* If the list already exists then just do the join. */ @@ -14387,12 +17094,11 @@ Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) { /* The list needs to be reversed before being joined. */ PyList_Reverse(join_info->list); - if (join_info->is_unicode) - { + if (join_info->is_unicode) { /* Concatenate the Unicode strings. */ joiner = PyUnicode_FromUnicode(NULL, 0); if (!joiner) { - Py_DECREF(join_info->list); + clear_join_list(join_info); return NULL; } @@ -14400,7 +17106,7 @@ Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) { } else { joiner = PyString_FromString(""); if (!joiner) { - Py_DECREF(join_info->list); + clear_join_list(join_info); return NULL; } @@ -14409,7 +17115,7 @@ Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) { } Py_DECREF(joiner); - Py_DECREF(join_info->list); + clear_join_list(join_info); return result; } @@ -14497,10 +17203,7 @@ static PyObject* match_expand(MatchObject* self, PyObject* str_template) { if (!replacement) return NULL; - join_info.list = NULL; - join_info.item = NULL; - join_info.reversed = FALSE; - join_info.is_unicode = PyUnicode_Check(self->string); + init_join_list(&join_info, FALSE, PyUnicode_Check(self->string)); /* Add each part of the template to the list. */ size = PyList_GET_SIZE(replacement); @@ -14508,6 +17211,7 @@ static PyObject* match_expand(MatchObject* self, PyObject* str_template) { PyObject* item; PyObject* str_item; + /* PyList_GET_ITEM borrows a reference. */ item = PyList_GET_ITEM(replacement, i); str_item = get_match_replacement(self, item, self->group_count); if (!str_item) @@ -14519,7 +17223,7 @@ static PyObject* match_expand(MatchObject* self, PyObject* str_template) { else { int status; - status = add_item(&join_info, str_item); + status = add_to_join_list(&join_info, str_item); Py_DECREF(str_item); if (status < 0) goto error; @@ -14532,15 +17236,14 @@ static PyObject* match_expand(MatchObject* self, PyObject* str_template) { return join_list_info(&join_info); error: - Py_XDECREF(join_info.list); - Py_XDECREF(join_info.item); + clear_join_list(&join_info); Py_DECREF(replacement); return NULL; } #if PY_VERSION_HEX >= 0x02060000 /* Gets a MatchObject's group dictionary. */ -static PyObject* match_get_group_dict(MatchObject* self) { +Py_LOCAL_INLINE(PyObject*) match_get_group_dict(MatchObject* self) { PyObject* result; PyObject* keys; Py_ssize_t g; @@ -14557,14 +17260,16 @@ static PyObject* match_get_group_dict(MatchObject* self) { int status; PyObject* key; PyObject* value; + + /* PyList_GET_ITEM borrows a reference. */ key = PyList_GET_ITEM(keys, g); if (!key) goto failed; + value = match_get_group(self, key, Py_None, FALSE); - if (!value) { - Py_DECREF(key); + if (!value) goto failed; - } + status = PyDict_SetItem(result, key, value); Py_DECREF(value); if (status < 0) @@ -14593,12 +17298,14 @@ static PyObject* match_expandf(MatchObject* self, PyObject* str_template) { if (!format_func) return NULL; - args = PyTuple_New(self->group_count + 1); + args = PyTuple_New((Py_ssize_t)self->group_count + 1); if (!args) goto error; for (g = 0; g < self->group_count + 1; g++) - PyTuple_SetItem(args, g, match_get_group_by_index(self, g, Py_None)); + /* PyTuple_SetItem borrows the reference. */ + PyTuple_SetItem(args, (Py_ssize_t)g, match_get_group_by_index(self, + (Py_ssize_t)g, Py_None)); kwargs = match_get_group_dict(self); if (!kwargs) @@ -14636,15 +17343,15 @@ static PyObject* match_regs(MatchObject* self) { PyObject* item; size_t g; - regs = PyTuple_New(self->group_count + 1); + regs = PyTuple_New((Py_ssize_t)self->group_count + 1); if (!regs) return NULL; item = Py_BuildValue("nn", self->match_start, self->match_end); - if (!item) { - Py_DECREF(regs); - return NULL; - } + if (!item) + goto error; + + /* PyTuple_SET_ITEM borrows the reference. */ PyTuple_SET_ITEM(regs, 0, item); for (g = 0; g < self->group_count; g++) { @@ -14652,10 +17359,10 @@ static PyObject* match_regs(MatchObject* self) { span = &self->groups[g].span; item = Py_BuildValue("nn", span->start, span->end); - if (!item) { - Py_DECREF(regs); - return NULL; - } + if (!item) + goto error; + + /* PyTuple_SET_ITEM borrows the reference. */ PyTuple_SET_ITEM(regs, g + 1, item); } @@ -14663,6 +17370,10 @@ static PyObject* match_regs(MatchObject* self) { self->regs = regs; return regs; + +error: + Py_DECREF(regs); + return NULL; } /* MatchObject's slice method. */ @@ -14673,8 +17384,9 @@ Py_LOCAL_INLINE(PyObject*) match_get_group_slice(MatchObject* self, PyObject* Py_ssize_t step; Py_ssize_t slice_length; - if (PySlice_GetIndicesEx((PySliceObject*)slice, self->group_count + 1, - &start, &end, &step, &slice_length) < 0) + if (PySlice_GetIndicesEx((PySliceObject*)slice, + (Py_ssize_t)self->group_count + 1, &start, &end, &step, &slice_length) < + 0) return NULL; if (slice_length <= 0) @@ -14690,6 +17402,7 @@ Py_LOCAL_INLINE(PyObject*) match_get_group_slice(MatchObject* self, PyObject* cur = start; for (i = 0; i < slice_length; i++) { + /* PyTuple_SetItem borrows the reference. */ PyTuple_SetItem(result, i, match_get_group_by_index(self, cur, Py_None)); cur += step; @@ -14700,8 +17413,8 @@ Py_LOCAL_INLINE(PyObject*) match_get_group_slice(MatchObject* self, PyObject* } /* MatchObject's length method. */ -static Py_ssize_t match_length(MatchObject* self) { - return self->group_count + 1; +Py_LOCAL_INLINE(Py_ssize_t) match_length(MatchObject* self) { + return (Py_ssize_t)self->group_count + 1; } /* MatchObject's '__getitem__' method. */ @@ -14764,7 +17477,7 @@ static PyObject* match_detach_string(MatchObject* self, PyObject* unused) { self->substring = substring; self->substring_offset = start; - Py_XDECREF(self->string); + Py_DECREF(self->string); self->string = NULL; } } @@ -14924,8 +17637,13 @@ static PyObject* match_lastgroup(PyObject* self_) { self = (MatchObject*)self_; if (self->pattern->indexgroup && self->lastgroup >= 0) { - PyObject* index = Py_BuildValue("n", self->lastgroup); - PyObject* result = PyDict_GetItem(self->pattern->indexgroup, index); + PyObject* index; + PyObject* result; + + index = Py_BuildValue("n", self->lastgroup); + + /* PyDict_GetItem returns borrows a reference. */ + result = PyDict_GetItem(self->pattern->indexgroup, index); Py_DECREF(index); if (result) { Py_INCREF(result); @@ -14952,6 +17670,31 @@ static PyObject* match_string(PyObject* self_) { return Py_None; } } +#if PY_VERSION_HEX < 0x02060000 + +/* MatchObject's 'partial' attribute. */ +static PyObject* match_partial(PyObject* self_) { + MatchObject* self; + PyObject* result; + + self = (MatchObject*)self_; + + result = self->partial ? Py_True : Py_False; + Py_INCREF(result); + + return result; +} +#endif + +/* MatchObject's 'fuzzy_counts' attribute. */ +static PyObject* match_fuzzy_counts(PyObject* self_) { + MatchObject* self; + + self = (MatchObject*)self_; + + return Py_BuildValue("nnn", self->fuzzy_counts[RE_FUZZY_SUB], + self->fuzzy_counts[RE_FUZZY_INS], self->fuzzy_counts[RE_FUZZY_DEL]); +} static PyGetSetDef match_getset[] = { {"lastindex", (getter)match_lastindex, (setter)NULL, @@ -14962,6 +17705,12 @@ static PyGetSetDef match_getset[] = { "A tuple of the spans of the capturing groups."}, {"string", (getter)match_string, (setter)NULL, "The string that was searched, or None if it has been detached."}, +#if PY_VERSION_HEX < 0x02060000 + {"partial", (getter)match_partial, (setter)NULL, + "Whether it's a partial match."}, +#endif + {"fuzzy_counts", (getter)match_fuzzy_counts, (setter)NULL, + "A tuple of the number of substitutions, insertions and deletions."}, {NULL} /* Sentinel */ }; @@ -14972,6 +17721,10 @@ static PyMemberDef match_members[] = { "The position at which the regex engine starting searching."}, {"endpos", T_PYSSIZET, offsetof(MatchObject, endpos), READONLY, "The final position beyond which the regex engine won't search."}, +#if PY_VERSION_HEX >= 0x02060000 + {"partial", T_BOOL, offsetof(MatchObject, partial), READONLY, + "Whether it's a partial match."}, +#endif {NULL} /* Sentinel */ }; @@ -14989,13 +17742,13 @@ static PyTypeObject Match_Type = { }; /* Copies the groups. */ -Py_LOCAL_INLINE(RE_GroupData*) copy_groups(RE_GroupData* groups, Py_ssize_t +Py_LOCAL_INLINE(RE_GroupData*) copy_groups(RE_GroupData* groups, size_t group_count) { - Py_ssize_t span_count; - Py_ssize_t g; + size_t span_count; + size_t g; RE_GroupData* groups_copy; RE_GroupSpan* spans_copy; - Py_ssize_t offset; + size_t offset; /* Calculate the total size of the group info. */ span_count = 0; @@ -15054,11 +17807,8 @@ Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self) { if (!match) return NULL; - match->string = self->string; - match->substring = self->substring; - match->substring_offset = self->substring_offset; - match->pattern = self->pattern; - match->regs = self->regs; + Py_MEMCPY(match, self, sizeof(MatchObject)); + Py_INCREF(match->string); Py_INCREF(match->substring); Py_INCREF(match->pattern); @@ -15070,19 +17820,7 @@ Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self) { Py_DECREF(match); return NULL; } - } else - match->groups = NULL; - - match->group_count = self->group_count; - - match->pos = self->pos; - match->endpos = self->endpos; - - match->match_start = self->match_start; - match->match_end = self->match_end; - - match->lastindex = match->lastindex; - match->lastgroup = match->lastgroup; + } return (PyObject*)match; } @@ -15091,7 +17829,7 @@ Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self) { Py_LOCAL_INLINE(PyObject*) pattern_new_match(PatternObject* pattern, RE_State* state, int status) { /* Create MatchObject (from state object). */ - if (status > 0) { + if (status > 0 || status == RE_ERROR_PARTIAL) { MatchObject* match; /* Create a MatchObject. */ @@ -15104,6 +17842,13 @@ Py_LOCAL_INLINE(PyObject*) pattern_new_match(PatternObject* pattern, RE_State* match->substring_offset = 0; match->pattern = pattern; match->regs = NULL; + match->fuzzy_counts[RE_FUZZY_SUB] = + state->total_fuzzy_counts[RE_FUZZY_SUB]; + match->fuzzy_counts[RE_FUZZY_INS] = + state->total_fuzzy_counts[RE_FUZZY_INS]; + match->fuzzy_counts[RE_FUZZY_DEL] = + state->total_fuzzy_counts[RE_FUZZY_DEL]; + match->partial = status == RE_ERROR_PARTIAL; Py_INCREF(match->string); Py_INCREF(match->substring); Py_INCREF(match->pattern); @@ -15156,9 +17901,8 @@ Py_LOCAL_INLINE(PyObject*) state_get_group(RE_State* state, Py_ssize_t index, group = &state->groups[index - 1]; - if (string != Py_None && index >= 1 && index <= - (Py_ssize_t)state->pattern->public_group_count && group->capture_count > - 0) { + if (string != Py_None && index >= 1 && (size_t)index <= + state->pattern->public_group_count && group->capture_count > 0) { start = group->span.start; end = group->span.end; } else { @@ -15216,7 +17960,8 @@ Py_LOCAL_INLINE(void) release_state_lock(PyObject* owner, RE_SafeState* } /* Implements the functionality of ScanObject's search and match methods. */ -static PyObject* scanner_search_or_match(ScannerObject* self, BOOL search) { +Py_LOCAL_INLINE(PyObject*) scanner_search_or_match(ScannerObject* self, BOOL + search) { RE_State* state; RE_SafeState safe_state; PyObject* match; @@ -15232,8 +17977,8 @@ static PyObject* scanner_search_or_match(ScannerObject* self, BOOL search) { */ acquire_state_lock((PyObject*)self, &safe_state); - if (self->status == 0) { - /* No match. */ + if (self->status == RE_ERROR_FAILURE || self->status == RE_ERROR_PARTIAL) { + /* No or partial match. */ release_state_lock((PyObject*)self, &safe_state); Py_INCREF(Py_None); return Py_None; @@ -15246,27 +17991,25 @@ static PyObject* scanner_search_or_match(ScannerObject* self, BOOL search) { /* Look for another match. */ self->status = do_match(&safe_state, search); - if (self->status < 0) { - /* Internal error. */ - release_state_lock((PyObject*)self, &safe_state); - return NULL; - } + if (self->status >= 0 || self->status == RE_ERROR_PARTIAL) { + /* Create the match object. */ + match = pattern_new_match(self->pattern, state, self->status); - /* Create the match object. */ - match = pattern_new_match(self->pattern, state, self->status); + if (search && state->overlapped) { + /* Advance one character. */ + Py_ssize_t step; - if (search && state->overlapped) { - /* Advance one character. */ - Py_ssize_t step; - - step = state->reverse ? -1 : 1; - state->text_pos = state->match_pos + step; - state->must_advance = FALSE; + step = state->reverse ? -1 : 1; + state->text_pos = state->match_pos + step; + state->must_advance = FALSE; + } else + /* Continue from where we left off, but don't allow 2 contiguous + * zero-width matches. + */ + state->must_advance = state->text_pos == state->match_pos; } else - /* Continue from where we left off, but don't allow 2 contiguous - * zero-width matches. - */ - state->must_advance = state->text_pos == state->match_pos; + /* Internal error. */ + match = NULL; /* Release the state lock. */ release_state_lock((PyObject*)self, &safe_state); @@ -15404,6 +18147,25 @@ Py_LOCAL_INLINE(int) decode_concurrent(PyObject* concurrent) { return value ? RE_CONC_YES : RE_CONC_NO; } +/* Decodes a 'partial' argument. */ +Py_LOCAL_INLINE(BOOL) decode_partial(PyObject* partial) { + Py_ssize_t value; + + if (partial == Py_False) + return FALSE; + + if (partial == Py_True) + return TRUE; + + value = PyLong_AsLong(partial); + if (value == -1 && PyErr_Occurred()) { + PyErr_Clear(); + return TRUE; + } + + return value != 0; +} + /* Creates a new ScannerObject. */ static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kwargs) { @@ -15412,16 +18174,18 @@ static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args, Py_ssize_t start; Py_ssize_t end; int conc; + BOOL part; PyObject* string; PyObject* pos = Py_None; PyObject* endpos = Py_None; Py_ssize_t overlapped = FALSE; PyObject* concurrent = Py_None; + PyObject* partial = Py_False; static char* kwlist[] = { "string", "pos", "endpos", "overlapped", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOnO:scanner", kwlist, - &string, &pos, &endpos, &overlapped, &concurrent)) + "concurrent", "partial", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOnOO:scanner", kwlist, + &string, &pos, &endpos, &overlapped, &concurrent, &partial)) return NULL; start = as_string_index(pos, 0); @@ -15436,6 +18200,8 @@ static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args, if (conc < 0) return NULL; + part = decode_partial(partial); + /* Create a scanner object. */ self = PyObject_NEW(ScannerObject, &Scanner_Type); if (!self) @@ -15446,18 +18212,18 @@ static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args, /* The MatchObject, and therefore repeated captures, will be visible. */ if (!state_init(&self->state, pattern, string, start, end, overlapped != 0, - conc, TRUE, TRUE, FALSE)) { + conc, part, TRUE, TRUE, FALSE)) { PyObject_DEL(self); return NULL; } - self->status = 1; + self->status = RE_ERROR_SUCCESS; return (PyObject*) self; } /* Performs the split for the SplitterObject. */ -static PyObject* next_split_part(SplitterObject* self) { +Py_LOCAL_INLINE(PyObject*) next_split_part(SplitterObject* self) { RE_State* state; RE_SafeState safe_state; PyObject* result = NULL; /* Initialise to stop compiler warning. */ @@ -15473,7 +18239,7 @@ static PyObject* next_split_part(SplitterObject* self) { */ acquire_state_lock((PyObject*)self, &safe_state); - if (self->status == 0) { + if (self->status == RE_ERROR_FAILURE || self->status == RE_ERROR_PARTIAL) { /* Finished. */ release_state_lock((PyObject*)self, &safe_state); result = Py_False; @@ -15505,23 +18271,22 @@ retry: goto error; if (self->status == RE_ERROR_SUCCESS) { - if (!state->zero_width) { - /* The current behaviour is to advance one character if the + if (state->version_0) { + /* Version 0 behaviour is to advance one character if the * split was zero-width. Unfortunately, this can give an * incorrect result. GvR wants this behaviour to be * retained so as not to break any existing software which - * might rely on it. The correct behaviour is enabled by - * setting the 'new' flag. + * might rely on it. */ - if (state->text_pos == state->match_pos) { - if (self->last_pos == end_pos) - goto no_match; + if (state->text_pos == state->match_pos) { + if (self->last_pos == end_pos) + goto no_match; - /* Advance one character. */ - state->text_pos += step; - state->must_advance = FALSE; - goto retry; - } + /* Advance one character. */ + state->text_pos += step; + state->must_advance = FALSE; + goto retry; + } } ++self->split_count; @@ -15538,31 +18303,28 @@ retry: self->last_pos = state->text_pos; - /* The correct behaviour is to reject a zero-width match just - * after a split point. The current behaviour is to advance one - * character if the match was zero-width. Unfortunately, this - * can give an incorrect result. GvR wants this behaviour to be - * retained so as not to break any existing software which - * might rely on it. The correct behaviour is enabled by - * setting the 'new' flag. + /* Version 0 behaviour is to advance one character if the match + * was zero-width. Unfortunately, this can give an incorrect + * result. GvR wants this behaviour to be retained so as not to + * break any existing software which might rely on it. */ - if (state->zero_width) - /* Continue from where we left off, but don't allow a - * contiguous zero-width match. - */ - state->must_advance = TRUE; - else { + if (state->version_0) { if (state->text_pos == state->match_pos) /* Advance one character. */ state->text_pos += step; state->must_advance = FALSE; - } + } else + /* Continue from where we left off, but don't allow a + * contiguous zero-width match. + */ + state->must_advance = TRUE; } } else goto no_match; - if (self->status == RE_ERROR_FAILURE) { + if (self->status == RE_ERROR_FAILURE || self->status == + RE_ERROR_PARTIAL) { no_match: /* Get segment following last match (even if empty). */ if (state->reverse) @@ -15581,7 +18343,7 @@ no_match: } ++self->index; - if (self->index > (Py_ssize_t)state->pattern->public_group_count) + if ((size_t)self->index > state->pattern->public_group_count) self->index = 0; /* Release the state lock. */ @@ -15712,8 +18474,8 @@ static PyTypeObject Splitter_Type = { }; /* Creates a new SplitterObject. */ -static PyObject* pattern_splitter(PatternObject* pattern, PyObject* args, - PyObject* kwargs) { +Py_LOCAL_INLINE(PyObject*) pattern_splitter(PatternObject* pattern, PyObject* + args, PyObject* kwargs) { /* Create split state object. */ int conc; SplitterObject* self; @@ -15747,7 +18509,7 @@ static PyObject* pattern_splitter(PatternObject* pattern, PyObject* args, /* The MatchObject, and therefore repeated captures, will not be visible. */ if (!state_init(state, pattern, string, 0, PY_SSIZE_T_MAX, FALSE, conc, - TRUE, FALSE, FALSE)) { + FALSE, TRUE, FALSE, FALSE)) { PyObject_DEL(self); return NULL; } @@ -15762,11 +18524,13 @@ static PyObject* pattern_splitter(PatternObject* pattern, PyObject* args, } /* Implements the functionality of PatternObject's search and match methods. */ -static PyObject* pattern_search_or_match(PatternObject* self, PyObject* args, - PyObject* kwargs, char* args_desc, BOOL search, BOOL match_all) { +Py_LOCAL_INLINE(PyObject*) pattern_search_or_match(PatternObject* self, + PyObject* args, PyObject* kwargs, char* args_desc, BOOL search, BOOL + match_all) { Py_ssize_t start; Py_ssize_t end; int conc; + BOOL part; RE_State state; RE_SafeState safe_state; int status; @@ -15776,7 +18540,9 @@ static PyObject* pattern_search_or_match(PatternObject* self, PyObject* args, PyObject* pos = Py_None; PyObject* endpos = Py_None; PyObject* concurrent = Py_None; - static char* kwlist[] = { "string", "pos", "endpos", "concurrent", NULL }; + PyObject* partial = Py_False; + static char* kwlist[] = { "string", "pos", "endpos", "concurrent", + "partial", NULL }; /* When working with a short string, such as a line from a file, the * relative cost of PyArg_ParseTupleAndKeywords can be significant, and * it's worth not using it when there are only positional arguments. @@ -15787,7 +18553,8 @@ static PyObject* pattern_search_or_match(PatternObject* self, PyObject* args, else arg_count = -1; - if (1 <= arg_count && arg_count <= 4) { + if (1 <= arg_count && arg_count <= 5) { + /* PyTuple_GET_ITEM borrows the reference. */ string = PyTuple_GET_ITEM(args, 0); if (arg_count >= 2) pos = PyTuple_GET_ITEM(args, 1); @@ -15795,8 +18562,10 @@ static PyObject* pattern_search_or_match(PatternObject* self, PyObject* args, endpos = PyTuple_GET_ITEM(args, 2); if (arg_count >= 4) concurrent = PyTuple_GET_ITEM(args, 3); + if (arg_count >= 5) + partial = PyTuple_GET_ITEM(args, 4); } else if (!PyArg_ParseTupleAndKeywords(args, kwargs, args_desc, kwlist, - &string, &pos, &endpos, &concurrent)) + &string, &pos, &endpos, &concurrent, &partial)) return NULL; start = as_string_index(pos, 0); @@ -15811,9 +18580,11 @@ static PyObject* pattern_search_or_match(PatternObject* self, PyObject* args, if (conc < 0) return NULL; + part = decode_partial(partial); + /* The MatchObject, and therefore repeated captures, will be visible. */ - if (!state_init(&state, self, string, start, end, FALSE, conc, FALSE, TRUE, - match_all)) + if (!state_init(&state, self, string, start, end, FALSE, conc, part, FALSE, + TRUE, match_all)) return NULL; /* Initialise the "safe state" structure. */ @@ -15821,13 +18592,12 @@ static PyObject* pattern_search_or_match(PatternObject* self, PyObject* args, safe_state.thread_state = NULL; status = do_match(&safe_state, search); - if (status < 0) { - state_fini(&state); - return NULL; - } - /* Create the match object. */ - match = pattern_new_match(self, &state, status); + if (status >= 0 || status == RE_ERROR_PARTIAL) + /* Create the match object. */ + match = pattern_new_match(self, &state, status); + else + match = NULL; state_fini(&state); @@ -15837,21 +18607,21 @@ static PyObject* pattern_search_or_match(PatternObject* self, PyObject* args, /* PatternObject's 'match' method. */ static PyObject* pattern_match(PatternObject* self, PyObject* args, PyObject* kwargs) { - return pattern_search_or_match(self, args, kwargs, "O|OOO:match", FALSE, + return pattern_search_or_match(self, args, kwargs, "O|OOOO:match", FALSE, FALSE); } /* PatternObject's 'fullmatch' method. */ static PyObject* pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kwargs) { - return pattern_search_or_match(self, args, kwargs, "O|OOO:fullmatch", + return pattern_search_or_match(self, args, kwargs, "O|OOOO:fullmatch", FALSE, TRUE); } /* PatternObject's 'search' method. */ static PyObject* pattern_search(PatternObject* self, PyObject* args, PyObject* kwargs) { - return pattern_search_or_match(self, args, kwargs, "O|OOO:search", TRUE, + return pattern_search_or_match(self, args, kwargs, "O|OOOO:search", TRUE, FALSE); } @@ -15897,7 +18667,7 @@ Py_LOCAL_INLINE(BOOL) get_limits(PyObject* pos, PyObject* endpos, Py_ssize_t * It can return None to represent an empty string. */ Py_LOCAL_INLINE(PyObject*) get_sub_replacement(PyObject* item, PyObject* - string, RE_State* state, Py_ssize_t group_count) { + string, RE_State* state, size_t group_count) { Py_ssize_t index; if (PyUnicode_CheckExact(item) || PyString_CheckExact(item)) { @@ -15926,7 +18696,7 @@ Py_LOCAL_INLINE(PyObject*) get_sub_replacement(PyObject* item, PyObject* return get_slice(string, state->text_pos, state->match_pos); else return get_slice(string, state->match_pos, state->text_pos); - } else if (1 <= index && index <= group_count) { + } else if (1 <= index && (size_t)index <= group_count) { /* A group. */ RE_GroupData* group; @@ -15968,6 +18738,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* Py_ssize_t last_pos; PyObject* item; Py_ssize_t end_pos; + Py_ssize_t step; /* Get the string. */ if (!get_string(string, &str_info)) @@ -15985,7 +18756,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* /* If the pattern is too long for the string, then take a shortcut, unless * it's a fuzzy pattern. */ - if (!self->is_fuzzy && (Py_ssize_t)self->min_width > end - start) { + if (!self->is_fuzzy && self->min_width > end - start) { PyObject* result; Py_INCREF(string); @@ -16074,7 +18845,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* * if the replacement is callable. */ if (!state_init_2(&state, self, string, &str_info, start, end, FALSE, - concurrent, FALSE, is_callable, FALSE)) { + concurrent, FALSE, FALSE, is_callable, FALSE)) { #if PY_VERSION_HEX >= 0x02060000 release_buffer(&str_info); @@ -16087,13 +18858,11 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* safe_state.re_state = &state; safe_state.thread_state = NULL; - join_info.item = NULL; - join_info.list = NULL; - join_info.reversed = state.reverse; - join_info.is_unicode = PyUnicode_Check(string); + init_join_list(&join_info, state.reverse, PyUnicode_Check(string)); sub_count = 0; last_pos = state.reverse ? state.text_length : 0; + step = state.reverse ? -1 : 1; while (sub_count < maxsub) { int status; @@ -16114,7 +18883,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* goto error; /* Add to the list. */ - status = add_item(&join_info, item); + status = add_to_join_list(&join_info, item); Py_DECREF(item); if (status < 0) goto error; @@ -16123,7 +18892,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* /* Add this match. */ if (is_literal) { /* The replacement is a literal string. */ - status = add_item(&join_info, replacement); + status = add_to_join_list(&join_info, replacement); if (status < 0) goto error; #if PY_VERSION_HEX >= 0x02060000 @@ -16142,15 +18911,17 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* goto error; /* The args are a tuple of the capture group matches. */ - args = PyTuple_New(state.pattern->public_group_count + 1); + args = PyTuple_New((Py_ssize_t)state.pattern->public_group_count + + 1); if (!args) { Py_DECREF(match); goto error; } for (g = 0; g < state.pattern->public_group_count + 1; g++) - PyTuple_SetItem(args, g, match_get_group_by_index(match, g, - Py_None)); + /* PyTuple_SetItem borrows the reference. */ + PyTuple_SetItem(args, (Py_ssize_t)g, + match_get_group_by_index(match, (Py_ssize_t)g, Py_None)); /* The kwargs are a dict of the named capture group matches. */ kwargs = match_get_group_dict(match); @@ -16169,7 +18940,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* goto error; /* Add the result to the list. */ - status = add_item(&join_info, item); + status = add_to_join_list(&join_info, item); Py_DECREF(item); if (status < 0) goto error; @@ -16185,6 +18956,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* PyObject* item; PyObject* str_item; + /* PyList_GET_ITEM borrows a reference. */ item = PyList_GET_ITEM(replacement, i); str_item = get_sub_replacement(item, string, &state, self->public_group_count); @@ -16196,7 +18968,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* /* None for "". */ Py_DECREF(str_item); else { - status = add_item(&join_info, str_item); + status = add_to_join_list(&join_info, str_item); Py_DECREF(str_item); if (status < 0) goto error; @@ -16229,7 +19001,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* goto error; /* Add the result to the list. */ - status = add_item(&join_info, item); + status = add_to_join_list(&join_info, item); Py_DECREF(item); if (status < 0) goto error; @@ -16237,11 +19009,20 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* ++sub_count; - /* Continue from where we left off, but don't allow 2 contiguous - * zero-width matches. - */ - state.must_advance = state.match_pos == state.text_pos; last_pos = state.text_pos; + + if (state.version_0) { + /* Always advance after a zero-width match. */ + if (state.match_pos == state.text_pos) { + state.text_pos += step; + state.must_advance = FALSE; + } else + state.must_advance = TRUE; + } else + /* Continue from where we left off, but don't allow a contiguous + * zero-width match. + */ + state.must_advance = state.match_pos == state.text_pos; } /* Get the segment following the last match. We use 'length' instead of @@ -16259,7 +19040,8 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* item = get_slice(string, last_pos, str_info.length); if (!item) goto error; - status = add_item(&join_info, item); + + status = add_to_join_list(&join_info, item); Py_DECREF(item); if (status < 0) goto error; @@ -16281,8 +19063,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* return item; error: - Py_XDECREF(join_info.list); - Py_XDECREF(join_info.item); + clear_join_list(&join_info); state_fini(&state); Py_XDECREF(replacement); return NULL; @@ -16316,8 +19097,7 @@ static PyObject* pattern_sub(PatternObject* self, PyObject* args, PyObject* #if PY_VERSION_HEX >= 0x02060000 /* PatternObject's 'subf' method. */ static PyObject* pattern_subf(PatternObject* self, PyObject* args, PyObject* - kwargs) - { + kwargs) { int conc; PyObject* format; @@ -16428,7 +19208,7 @@ static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* /* The MatchObject, and therefore repeated captures, will not be visible. */ if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX, FALSE, conc, - FALSE, FALSE, FALSE)) + FALSE, FALSE, FALSE, FALSE)) return NULL; /* Initialise the "safe state" structure. */ @@ -16462,12 +19242,11 @@ static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* /* No more matches. */ break; - if (!state.zero_width) { - /* The current behaviour is to advance one character if the split - * was zero-width. Unfortunately, this can give an incorrect - * result. GvR wants this behaviour to be retained so as not to - * break any existing software which might rely on it. The correct - * behaviour is enabled by setting the 'new' flag. + if (state.version_0) { + /* Version 0 behaviour is to advance one character if the split was + * zero-width. Unfortunately, this can give an incorrect result. + * GvR wants this behaviour to be retained so as not to break any + * existing software which might rely on it. */ if (state.text_pos == state.match_pos) { if (last_pos == end_pos) @@ -16487,6 +19266,7 @@ static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* item = get_slice(string, last_pos, state.match_pos); if (!item) goto error; + status = PyList_Append(list, item); Py_DECREF(item); if (status < 0) @@ -16494,9 +19274,10 @@ static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* /* Add groups (if any). */ for (g = 1; g <= self->public_group_count; g++) { - item = state_get_group(&state, g, string, FALSE); + item = state_get_group(&state, (Py_ssize_t)g, string, FALSE); if (!item) goto error; + status = PyList_Append(list, item); Py_DECREF(item); if (status < 0) @@ -16506,25 +19287,22 @@ static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* ++split_count; last_pos = state.text_pos; - /* The correct behaviour is to reject a zero-width match just after a - * split point. The current behaviour is to advance one character if - * the match was zero-width. Unfortunately, this can give an incorrect - * result. GvR wants this behaviour to be retained so as not to break - * any existing software which might rely on it. The correct behaviour - * is enabled by setting the 'new' flag. + /* Version 0 behaviour is to advance one character if the match was + * zero-width. Unfortunately, this can give an incorrect result. GvR + * wants this behaviour to be retained so as not to break any existing + * software which might rely on it. */ - if (state.zero_width) - /* Continue from where we left off, but don't allow a contiguous - * zero-width match. - */ - state.must_advance = TRUE; - else { + if (state.version_0) { if (state.text_pos == state.match_pos) /* Advance one character. */ state.text_pos += step; state.must_advance = FALSE; - } + } else + /* Continue from where we left off, but don't allow a contiguous + * zero-width match. + */ + state.must_advance = TRUE; } /* Get segment following last match (even if empty). */ @@ -16534,6 +19312,7 @@ static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* item = get_slice(string, last_pos, state.text_length); if (!item) goto error; + status = PyList_Append(list, item); Py_DECREF(item); if (status < 0) @@ -16596,7 +19375,7 @@ static PyObject* pattern_findall(PatternObject* self, PyObject* args, PyObject* /* The MatchObject, and therefore repeated captures, will not be visible. */ if (!state_init(&state, self, string, start, end, overlapped != 0, conc, - FALSE, FALSE, FALSE)) + FALSE, FALSE, FALSE, FALSE)) return NULL; /* Initialise the "safe state" structure. */ @@ -16641,15 +19420,20 @@ static PyObject* pattern_findall(PatternObject* self, PyObject* args, PyObject* goto error; break; default: - item = PyTuple_New(self->public_group_count); + item = PyTuple_New((Py_ssize_t)self->public_group_count); if (!item) goto error; + for (g = 0; g < self->public_group_count; g++) { - PyObject* o = state_get_group(&state, g + 1, string, TRUE); + PyObject* o; + + o = state_get_group(&state, (Py_ssize_t)g + 1, string, TRUE); if (!o) { Py_DECREF(item); goto error; } + + /* PyTuple_SET_ITEM borrows the reference. */ PyTuple_SET_ITEM(item, g, o); } break; @@ -16809,6 +19593,7 @@ PyDoc_STRVAR(pattern_doc, "Compiled regex object"); /* Deallocates a PatternObject. */ static void pattern_dealloc(PyObject* self_) { PatternObject* self; + int partial_side; size_t i; self = (PatternObject*)self_; @@ -16845,11 +19630,23 @@ static void pattern_dealloc(PyObject* self_) { Py_XDECREF(self->pattern); Py_XDECREF(self->groupindex); Py_XDECREF(self->indexgroup); + + for (partial_side = 0; partial_side < 2; partial_side++) { + if (self->partial_named_lists[partial_side]) { + for (i = 0; i < self->named_lists_count; i++) + Py_XDECREF(self->partial_named_lists[partial_side][i]); + + re_dealloc(self->partial_named_lists[partial_side]); + } + } + Py_DECREF(self->named_lists); Py_DECREF(self->named_list_indexes); + re_dealloc(self->locale_info); PyObject_DEL(self); } +/* Info about the various flags that can be passed in. */ typedef struct RE_FlagName { char* name; int value; @@ -16959,6 +19756,38 @@ static PyObject* match_repr(PyObject* self_) { if (status < 0) goto error; + if (self->fuzzy_counts[RE_FUZZY_SUB] != 0 || + self->fuzzy_counts[RE_FUZZY_INS] != 0 || self->fuzzy_counts[RE_FUZZY_DEL] + != 0) { + if (! append_string(list, ", fuzzy_counts=(")) + goto error; + + if (!append_integer(list, + (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_SUB])) + goto error; + + if (! append_string(list, ", ")) + goto error; + + if (!append_integer(list, + (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_INS])) + goto error; + + if (! append_string(list, ", ")) + goto error; + if (!append_integer(list, + (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_DEL])) + goto error; + + if (! append_string(list, ")")) + goto error; + } + + if (self->partial) { + if (!append_string(list, ", partial=True")) + goto error; + } + if (! append_string(list, ">")) goto error; @@ -16968,7 +19797,6 @@ static PyObject* match_repr(PyObject* self_) { result = PyUnicode_Join(separator, list); Py_DECREF(separator); - Py_DECREF(list); return result; @@ -16985,7 +19813,8 @@ static PyObject* pattern_repr(PyObject* self_) { PyObject* item; int status; int flag_count; - Py_ssize_t i; + unsigned int i; + Py_ssize_t pos; PyObject *key; PyObject *value; PyObject* separator; @@ -17030,8 +19859,9 @@ static PyObject* pattern_repr(PyObject* self_) { } } - i = 0; - while (PyDict_Next(self->named_lists, &i, &key, &value)) { + pos = 0; + /* PyDict_Next borrows references. */ + while (PyDict_Next(self->named_lists, &pos, &key, &value)) { if (!append_string(list, ", ")) goto error; @@ -17039,11 +19869,11 @@ static PyObject* pattern_repr(PyObject* self_) { if (status < 0) goto error; - item = PyObject_Repr(value); - if (!item) + if (!append_string(list, "=")) goto error; - if (!append_string(list, "=")) + item = PyObject_Repr(value); + if (!item) goto error; status = PyList_Append(list, item); @@ -17061,7 +19891,6 @@ static PyObject* pattern_repr(PyObject* self_) { result = PyUnicode_Join(separator, list); Py_DECREF(separator); - Py_DECREF(list); return result; @@ -17176,8 +20005,7 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* body_result = add_repeat_guards(pattern, node->nonstring.next_2.node); tail_result = add_repeat_guards(pattern, node->next_1.node); - status = (RE_STATUS_T)RE_MAX(RE_MAX(result, body_result), - tail_result); + status = max_status_3(result, body_result, tail_result); node->status = RE_STATUS_VISITED_AG | status; return status; } @@ -17190,8 +20018,7 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* branch_1_result = add_repeat_guards(pattern, node->next_1.node); branch_2_result = add_repeat_guards(pattern, node->nonstring.next_2.node); - status = (RE_STATUS_T)RE_MAX(RE_MAX(result, branch_1_result), - branch_2_result); + status = max_status_3(result, branch_1_result, branch_2_result); node->status = RE_STATUS_VISITED_AG | status; return status; } @@ -17208,7 +20035,7 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* RE_RepeatInfo* repeat_info; RE_STATUS_T status; - limited = node->values[2] != RE_UNLIMITED; + limited = ~node->values[2] != 0; if (limited) body_result = RE_STATUS_LIMITED; else @@ -17222,11 +20049,10 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* if (tail_result != RE_STATUS_REF) repeat_info->status |= RE_STATUS_TAIL; if (limited) - result = (RE_STATUS_T)RE_MAX(result, RE_STATUS_LIMITED); + result = max_status_2(result, RE_STATUS_LIMITED); else - result = (RE_STATUS_T)RE_MAX(result, RE_STATUS_REPEAT); - status = (RE_STATUS_T)RE_MAX(RE_MAX(result, body_result), - tail_result); + result = max_status_2(result, RE_STATUS_REPEAT); + status = max_status_3(result, body_result, tail_result); node->status |= RE_STATUS_VISITED_AG | status; return status; } @@ -17238,7 +20064,7 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* RE_RepeatInfo* repeat_info; RE_STATUS_T status; - limited = node->values[2] != RE_UNLIMITED; + limited = ~node->values[2] != 0; tail_result = add_repeat_guards(pattern, node->next_1.node); repeat_info = &pattern->repeat_info[node->values[0]]; @@ -17246,11 +20072,10 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* if (tail_result != RE_STATUS_REF) repeat_info->status |= RE_STATUS_TAIL; if (limited) - result = (RE_STATUS_T)RE_MAX(result, RE_STATUS_LIMITED); + result = max_status_2(result, RE_STATUS_LIMITED); else - result = (RE_STATUS_T)RE_MAX(result, RE_STATUS_REPEAT); - status = (RE_STATUS_T)RE_MAX(RE_MAX(result, RE_STATUS_REPEAT), - tail_result); + result = max_status_2(result, RE_STATUS_REPEAT); + status = max_status_3(result, RE_STATUS_REPEAT, tail_result); node->status = RE_STATUS_VISITED_AG | status; return status; } @@ -17263,8 +20088,8 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* branch_1_result = add_repeat_guards(pattern, node->next_1.node); branch_2_result = add_repeat_guards(pattern, node->nonstring.next_2.node); - status = (RE_STATUS_T)RE_MAX(RE_MAX(RE_MAX(result, - branch_1_result), branch_2_result), RE_STATUS_REF); + status = max_status_4(result, branch_1_result, branch_2_result, + RE_STATUS_REF); node->status = RE_STATUS_VISITED_AG | status; return status; } @@ -17292,7 +20117,7 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* * * 'offset' is the offset of the index count within the values. */ -Py_LOCAL_INLINE(BOOL) add_index(RE_Node* node, size_t offset, RE_CODE index) { +Py_LOCAL_INLINE(BOOL) add_index(RE_Node* node, size_t offset, size_t index) { size_t index_count; size_t first_index; size_t i; @@ -17319,7 +20144,7 @@ Py_LOCAL_INLINE(BOOL) add_index(RE_Node* node, size_t offset, RE_CODE index) { ++node->value_count; node->values = new_values; - node->values[first_index + node->values[offset]++] = index; + node->values[first_index + node->values[offset]++] = (RE_CODE)index; return TRUE; } @@ -17356,7 +20181,7 @@ Py_LOCAL_INLINE(BOOL) record_subpattern_repeats_and_fuzzy_sections(RE_Node* return TRUE; case RE_OP_FUZZY: /* Record the fuzzy index. */ - if (!add_index(parent_node, offset, (RE_CODE)repeat_count + + if (!add_index(parent_node, offset, repeat_count + node->values[0])) return FALSE; node = node->next_1.node; @@ -17445,21 +20270,26 @@ Py_LOCAL_INLINE(void) discard_unused_nodes(PatternObject* pattern) { pattern->node_count = new_count; } -/* Marks all the group which are named. */ +/* Marks all the group which are named. Returns FALSE if there's an error. */ Py_LOCAL_INLINE(BOOL) mark_named_groups(PatternObject* pattern) { size_t i; for (i = 0; i < pattern->public_group_count; i++) { RE_GroupInfo* group_info; PyObject* index; + int status; group_info = &pattern->group_info[i]; index = Py_BuildValue("n", i + 1); if (!index) return FALSE; - group_info->has_name = (BOOL)PyDict_Contains(pattern->indexgroup, - index); + + status = PyDict_Contains(pattern->indexgroup, index); Py_DECREF(index); + if (status < 0) + return FALSE; + + group_info->has_name = status == 1; } return TRUE; @@ -17675,7 +20505,7 @@ Py_LOCAL_INLINE(void) add_node(RE_Node* node_1, RE_Node* node_2) { } /* Ensures that the entry for a group's details actually exists. */ -Py_LOCAL_INLINE(BOOL) ensure_group(PatternObject* pattern, RE_CODE group) { +Py_LOCAL_INLINE(BOOL) ensure_group(PatternObject* pattern, size_t group) { size_t old_capacity; size_t new_capacity; RE_GroupInfo* new_group_info; @@ -17710,7 +20540,7 @@ Py_LOCAL_INLINE(BOOL) ensure_group(PatternObject* pattern, RE_CODE group) { } /* Records that there's a reference to a group. */ -Py_LOCAL_INLINE(BOOL) record_ref_group(PatternObject* pattern, RE_CODE group) { +Py_LOCAL_INLINE(BOOL) record_ref_group(PatternObject* pattern, size_t group) { if (!ensure_group(pattern, group)) return FALSE; @@ -17720,7 +20550,7 @@ Py_LOCAL_INLINE(BOOL) record_ref_group(PatternObject* pattern, RE_CODE group) { } /* Records that there's a new group. */ -Py_LOCAL_INLINE(BOOL) record_group(PatternObject* pattern, RE_CODE group, +Py_LOCAL_INLINE(BOOL) record_group(PatternObject* pattern, size_t group, RE_Node* node) { if (!ensure_group(pattern, group)) return FALSE; @@ -17729,7 +20559,7 @@ Py_LOCAL_INLINE(BOOL) record_group(PatternObject* pattern, RE_CODE group, RE_GroupInfo* info; info = &pattern->group_info[group - 1]; - info->end_index = pattern->true_group_count; + info->end_index = (Py_ssize_t)pattern->true_group_count; info->node = node; } @@ -17737,13 +20567,13 @@ Py_LOCAL_INLINE(BOOL) record_group(PatternObject* pattern, RE_CODE group, } /* Records that a group has closed. */ -Py_LOCAL_INLINE(void) record_group_end(PatternObject* pattern, RE_CODE group) { +Py_LOCAL_INLINE(void) record_group_end(PatternObject* pattern, size_t group) { if (group >= 1) pattern->group_info[group - 1].end_index = ++pattern->group_end_index; } /* Ensures that the entry for a call_ref's details actually exists. */ -Py_LOCAL_INLINE(BOOL) ensure_call_ref(PatternObject* pattern, RE_CODE call_ref) +Py_LOCAL_INLINE(BOOL) ensure_call_ref(PatternObject* pattern, size_t call_ref) { size_t old_capacity; size_t new_capacity; @@ -17779,7 +20609,7 @@ Py_LOCAL_INLINE(BOOL) ensure_call_ref(PatternObject* pattern, RE_CODE call_ref) } /* Records that a call_ref is defined. */ -Py_LOCAL_INLINE(BOOL) record_call_ref_defined(PatternObject* pattern, RE_CODE +Py_LOCAL_INLINE(BOOL) record_call_ref_defined(PatternObject* pattern, size_t call_ref, RE_Node* node) { if (!ensure_call_ref(pattern, call_ref)) return FALSE; @@ -17791,7 +20621,7 @@ Py_LOCAL_INLINE(BOOL) record_call_ref_defined(PatternObject* pattern, RE_CODE } /* Records that a call_ref is used. */ -Py_LOCAL_INLINE(BOOL) record_call_ref_used(PatternObject* pattern, RE_CODE +Py_LOCAL_INLINE(BOOL) record_call_ref_used(PatternObject* pattern, size_t call_ref) { if (!ensure_call_ref(pattern, call_ref)) return FALSE; @@ -17809,44 +20639,7 @@ Py_LOCAL_INLINE(BOOL) sequence_matches_one(RE_Node* node) { if (node->next_1.node || (node->status & RE_STATUS_FUZZY)) return FALSE; - switch (node->op) { - case RE_OP_ANY: - case RE_OP_ANY_ALL: - case RE_OP_ANY_ALL_REV: - case RE_OP_ANY_REV: - case RE_OP_ANY_U: - case RE_OP_ANY_U_REV: - case RE_OP_CHARACTER: - case RE_OP_CHARACTER_IGN: - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_CHARACTER_REV: - case RE_OP_PROPERTY: - case RE_OP_PROPERTY_IGN: - case RE_OP_PROPERTY_IGN_REV: - case RE_OP_PROPERTY_REV: - case RE_OP_RANGE: - case RE_OP_RANGE_IGN: - case RE_OP_RANGE_IGN_REV: - case RE_OP_RANGE_REV: - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_IGN: - case RE_OP_SET_UNION_REV: - return TRUE; - default: - return FALSE; - } + return node_matches_one_character(node); } /* Records a repeat. */ @@ -17908,7 +20701,7 @@ Py_LOCAL_INLINE(Py_ssize_t) get_step(RE_CODE op) { case RE_OP_STRING: case RE_OP_STRING_FLD: case RE_OP_STRING_IGN: - return 1; + return 1; case RE_OP_ANY_ALL_REV: case RE_OP_ANY_REV: case RE_OP_ANY_U_REV: @@ -18134,7 +20927,7 @@ Py_LOCAL_INLINE(int) build_BOUNDARY(RE_CompileArgs* args) { Py_LOCAL_INLINE(int) build_BRANCH(RE_CompileArgs* args) { RE_Node* branch_node; RE_Node* join_node; - size_t smallest_min_width; + Py_ssize_t smallest_min_width; RE_CompileArgs subargs; int status; @@ -18152,7 +20945,7 @@ Py_LOCAL_INLINE(int) build_BRANCH(RE_CompileArgs* args) { add_node(args->end, branch_node); args->end = join_node; - smallest_min_width = ~(size_t)0; + smallest_min_width = PY_SSIZE_T_MAX; subargs = *args; @@ -18173,7 +20966,8 @@ Py_LOCAL_INLINE(int) build_BRANCH(RE_CompileArgs* args) { if (status != RE_ERROR_SUCCESS) return status; - smallest_min_width = RE_MIN(smallest_min_width, subargs.min_width); + smallest_min_width = min_ssize_t(smallest_min_width, + subargs.min_width); args->has_captures |= subargs.has_captures; args->is_fuzzy |= subargs.is_fuzzy; @@ -18352,7 +21146,8 @@ Py_LOCAL_INLINE(int) build_GROUP(RE_CompileArgs* args) { args->code = subargs.code; args->min_width = subargs.min_width; - args->has_captures |= subargs.has_captures || subargs.visible_captures; + if (subargs.has_captures || subargs.visible_captures) + args->has_captures = TRUE; args->is_fuzzy |= subargs.is_fuzzy; ++args->code; @@ -18406,7 +21201,7 @@ Py_LOCAL_INLINE(int) build_GROUP_EXISTS(RE_CompileArgs* args) { RE_Node* start_node; RE_Node* end_node; RE_CompileArgs subargs; - size_t min_width; + Py_ssize_t min_width; int status; /* codes: opcode, sequence, next, sequence, end. */ @@ -18459,7 +21254,7 @@ Py_LOCAL_INLINE(int) build_GROUP_EXISTS(RE_CompileArgs* args) { args->has_captures |= subargs.has_captures; args->is_fuzzy |= subargs.is_fuzzy; - min_width = RE_MIN(min_width, subargs.min_width); + min_width = min_ssize_t(min_width, subargs.min_width); add_node(start_node, subargs.start); add_node(subargs.end, end_node); @@ -18635,7 +21430,7 @@ Py_LOCAL_INLINE(int) build_REPEAT(RE_CompileArgs* args) { greedy = args->code[0] == RE_OP_GREEDY_REPEAT; min_count = args->code[1]; max_count = args->code[2]; - if (min_count > max_count) + if (args->code[1] > args->code[2]) return RE_ERROR_ILLEGAL; args->code += 3; @@ -18708,11 +21503,11 @@ Py_LOCAL_INLINE(int) build_REPEAT(RE_CompileArgs* args) { add_node(args->end, subargs.start); args->end = subargs.end; } else { - RE_CODE index; + size_t index; RE_Node* repeat_node; RE_CompileArgs subargs; - index = (RE_CODE)args->pattern->repeat_count; + index = args->pattern->repeat_count; /* Create the nodes for the repeat. */ repeat_node = create_node(args->pattern, greedy ? RE_OP_GREEDY_REPEAT : @@ -18721,7 +21516,7 @@ Py_LOCAL_INLINE(int) build_REPEAT(RE_CompileArgs* args) { args->repeat_depth)) return RE_ERROR_MEMORY; - repeat_node->values[0] = index; + repeat_node->values[0] = (RE_CODE)index; repeat_node->values[1] = min_count; repeat_node->values[2] = max_count; repeat_node->values[3] = args->forward; @@ -18744,7 +21539,7 @@ Py_LOCAL_INLINE(int) build_REPEAT(RE_CompileArgs* args) { return RE_ERROR_ILLEGAL; args->code = subargs.code; - args->min_width += min_count * subargs.min_width; + args->min_width += (Py_ssize_t)min_count * subargs.min_width; args->has_captures |= subargs.has_captures; args->is_fuzzy |= subargs.is_fuzzy; @@ -18803,7 +21598,7 @@ Py_LOCAL_INLINE(int) build_STRING(RE_CompileArgs* args, BOOL is_charset) { RE_UINT8 op; Py_ssize_t step; RE_Node* node; - RE_CODE i; + size_t i; /* codes: opcode, flags, length, characters. */ flags = args->code[1]; @@ -18836,9 +21631,9 @@ Py_LOCAL_INLINE(int) build_STRING(RE_CompileArgs* args, BOOL is_charset) { * multiple characters in the pattern. */ if (op == RE_OP_STRING_FLD || op == RE_OP_STRING_FLD_REV) - args->min_width += possible_unfolded_length(length); + args->min_width += possible_unfolded_length((Py_ssize_t)length); else - args->min_width += length; + args->min_width += (Py_ssize_t)length; return RE_ERROR_SUCCESS; } @@ -18849,7 +21644,7 @@ Py_LOCAL_INLINE(int) build_SET(RE_CompileArgs* args) { RE_CODE flags; Py_ssize_t step; RE_Node* node; - size_t saved_min_width; + Py_ssize_t saved_min_width; int status; /* codes: opcode, flags, members. */ @@ -19251,7 +22046,7 @@ Py_LOCAL_INLINE(BOOL) compile_to_nodes(RE_CODE* code, RE_CODE* end_code, /* Get the call_ref for the entire pattern, if any. */ if (pattern->start_node->op == RE_OP_CALL_REF) - pattern->pattern_call_ref = pattern->start_node->values[0]; + pattern->pattern_call_ref = (Py_ssize_t)pattern->start_node->values[0]; else pattern->pattern_call_ref = -1; @@ -19264,7 +22059,7 @@ Py_LOCAL_INLINE(BOOL) compile_to_nodes(RE_CODE* code, RE_CODE* end_code, * characters. */ Py_LOCAL_INLINE(void) get_required_chars(PyObject* required_chars, RE_CODE** - req_chars, Py_ssize_t* req_length) { + req_chars, size_t* req_length) { Py_ssize_t len; RE_CODE* chars; Py_ssize_t i; @@ -19278,16 +22073,19 @@ Py_LOCAL_INLINE(void) get_required_chars(PyObject* required_chars, RE_CODE** return; } - chars = (RE_CODE*)re_alloc(len * sizeof(RE_CODE)); + chars = (RE_CODE*)re_alloc((size_t)len * sizeof(RE_CODE)); if (!chars) goto error; for (i = 0; i < len; i++) { - PyObject* o = PyTuple_GET_ITEM(required_chars, i); + PyObject* o; size_t value; + /* PyTuple_SET_ITEM borrows the reference. */ + o = PyTuple_GET_ITEM(required_chars, i); + value = PyLong_AsUnsignedLong(o); - if (value == -1 && PyErr_Occurred()) + if ((Py_ssize_t)value == -1 && PyErr_Occurred()) goto error; chars[i] = (RE_CODE)value; @@ -19296,7 +22094,7 @@ Py_LOCAL_INLINE(void) get_required_chars(PyObject* required_chars, RE_CODE** } *req_chars = chars; - *req_length = len; + *req_length = (size_t)len; return; @@ -19307,15 +22105,15 @@ error: /* Makes a STRING node. */ Py_LOCAL_INLINE(RE_Node*) make_STRING_node(PatternObject* pattern, RE_UINT8 op, - Py_ssize_t length, RE_CODE* chars) { + size_t length, RE_CODE* chars) { Py_ssize_t step; RE_Node* node; - Py_ssize_t i; + size_t i; step = get_step(op); /* Create the node. */ - node = create_node(pattern, op, 0, step * length, length); + node = create_node(pattern, op, 0, step * (Py_ssize_t)length, length); if (!node) return NULL; @@ -19327,6 +22125,40 @@ Py_LOCAL_INLINE(RE_Node*) make_STRING_node(PatternObject* pattern, RE_UINT8 op, return node; } +/* Scans all of the characters in the current locale for their properties. */ +Py_LOCAL_INLINE(void) scan_locale_chars(RE_LocaleInfo* locale_info) { + int c; + + for (c = 0; c < 0x100; c++) { + unsigned short props = 0; + + if (isalnum(c)) + props |= RE_LOCALE_ALNUM; + if (isalpha(c)) + props |= RE_LOCALE_ALPHA; + if (iscntrl(c)) + props |= RE_LOCALE_CNTRL; + if (isdigit(c)) + props |= RE_LOCALE_DIGIT; + if (isgraph(c)) + props |= RE_LOCALE_GRAPH; + if (islower(c)) + props |= RE_LOCALE_LOWER; + if (isprint(c)) + props |= RE_LOCALE_PRINT; + if (ispunct(c)) + props |= RE_LOCALE_PUNCT; + if (isspace(c)) + props |= RE_LOCALE_SPACE; + if (isupper(c)) + props |= RE_LOCALE_UPPER; + + locale_info->properties[c] = props; + locale_info->uppercase[c] = toupper(c); + locale_info->lowercase[c] = tolower(c); + } +} + /* Compiles regular expression code to a PatternObject. * * The regular expression code is provided as a list and is then compiled to @@ -19343,10 +22175,10 @@ static PyObject* re_compile(PyObject* self_, PyObject* args) { PyObject* named_list_indexes; Py_ssize_t req_offset; PyObject* required_chars; - Py_ssize_t req_length; + size_t req_length; RE_CODE* req_chars; Py_ssize_t req_flags; - Py_ssize_t public_group_count; + size_t public_group_count; Py_ssize_t code_len; RE_CODE* code; Py_ssize_t i; @@ -19363,16 +22195,19 @@ static PyObject* re_compile(PyObject* self_, PyObject* args) { /* Read the regex code. */ code_len = PyList_GET_SIZE(code_list); - code = (RE_CODE*)re_alloc(code_len * sizeof(RE_CODE)); + code = (RE_CODE*)re_alloc((size_t)code_len * sizeof(RE_CODE)); if (!code) return NULL; for (i = 0; i < code_len; i++) { - PyObject* o = PyList_GET_ITEM(code_list, i); + PyObject* o; size_t value; + /* PyList_GET_ITEM borrows a reference. */ + o = PyList_GET_ITEM(code_list, i); + value = PyLong_AsUnsignedLong(o); - if (value == -1 && PyErr_Occurred()) + if ((Py_ssize_t)value == -1 && PyErr_Occurred()) goto error; code[i] = (RE_CODE)value; @@ -19394,7 +22229,7 @@ static PyObject* re_compile(PyObject* self_, PyObject* args) { /* Initialise the PatternObject. */ self->pattern = pattern; - self->flags = (RE_CODE)flags; + self->flags = flags; self->weakreflist = NULL; self->start_node = NULL; self->repeat_count = 0; @@ -19404,6 +22239,9 @@ static PyObject* re_compile(PyObject* self_, PyObject* args) { self->groupindex = groupindex; self->indexgroup = indexgroup; self->named_lists = named_lists; + self->named_lists_count = (size_t)PyDict_Size(named_lists); + self->partial_named_lists[0] = NULL; + self->partial_named_lists[1] = NULL; self->named_list_indexes = named_list_indexes; self->node_capacity = 0; self->node_count = 0; @@ -19421,6 +22259,7 @@ static PyObject* re_compile(PyObject* self_, PyObject* args) { self->recursive = FALSE; self->req_offset = req_offset; self->req_string = NULL; + self->locale_info = NULL; Py_INCREF(self->pattern); Py_INCREF(self->groupindex); Py_INCREF(self->indexgroup); @@ -19497,6 +22336,19 @@ static PyObject* re_compile(PyObject* self_, PyObject* args) { re_dealloc(req_chars); } + if (locale) { + /* Store info about the characters in the locale for locale-sensitive + * matching. + */ + self->locale_info = re_alloc(sizeof(RE_LocaleInfo)); + if (!self->locale_info) { + Py_DECREF(self); + return NULL; + } + + scan_locale_chars(self->locale_info); + } + return (PyObject*)self; error: @@ -19528,6 +22380,7 @@ static PyObject* fold_case(PyObject* self_, PyObject* args) { void* folded; Py_ssize_t folded_len; PyObject* result; + RE_LocaleInfo locale_info; Py_ssize_t flags; PyObject* string; @@ -19565,9 +22418,10 @@ static PyObject* fold_case(PyObject* self_, PyObject* args) { /* What's the encoding? */ if (flags & RE_FLAG_UNICODE) encoding = &unicode_encoding; - else if (flags & RE_FLAG_LOCALE) + else if (flags & RE_FLAG_LOCALE) { encoding = &locale_encoding; - else if (flags & RE_FLAG_ASCII) + scan_locale_chars(&locale_info); + } else if (flags & RE_FLAG_ASCII) encoding = &ascii_encoding; else encoding = &unicode_encoding; @@ -19597,13 +22451,13 @@ static PyObject* fold_case(PyObject* self_, PyObject* args) { /* Allocate a buffer for the folded string. */ if (flags & RE_FLAG_FULLCASE) /* When using full case-folding with Unicode, some single codepoints - * are transformed to sequences of codepoints. + * are mapped to multiple codepoints. */ buf_size = str_info.length * RE_MAX_FOLDED; else buf_size = str_info.length; - folded = re_alloc(buf_size * folded_charsize); + folded = re_alloc((size_t)(buf_size * folded_charsize)); if (!folded) { #if PY_VERSION_HEX >= 0x02060000 release_buffer(&str_info); @@ -19617,7 +22471,8 @@ static PyObject* fold_case(PyObject* self_, PyObject* args) { if (flags & RE_FLAG_FULLCASE) { /* Full case-folding. */ - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int (*full_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch, Py_UCS4* + folded); Py_ssize_t i; Py_UCS4 codepoints[RE_MAX_FOLDED]; @@ -19627,8 +22482,8 @@ static PyObject* fold_case(PyObject* self_, PyObject* args) { int count; int j; - count = full_case_fold(char_at(str_info.characters, i), - codepoints); + count = full_case_fold(&locale_info, char_at(str_info.characters, + i), codepoints); for (j = 0; j < count; j++) set_char_at(folded, folded_len + j, codepoints[j]); @@ -19636,7 +22491,7 @@ static PyObject* fold_case(PyObject* self_, PyObject* args) { } } else { /* Simple case-folding. */ - Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); + Py_UCS4 (*simple_case_fold)(RE_LocaleInfo* locale_info, Py_UCS4 ch); Py_ssize_t i; simple_case_fold = encoding->simple_case_fold; @@ -19644,7 +22499,8 @@ static PyObject* fold_case(PyObject* self_, PyObject* args) { for (i = 0; i < str_info.length; i++) { Py_UCS4 ch; - ch = simple_case_fold(char_at(str_info.characters, i)); + ch = simple_case_fold(&locale_info, char_at(str_info.characters, + i)); set_char_at(folded, i, ch); } @@ -19667,7 +22523,7 @@ static PyObject* fold_case(PyObject* self_, PyObject* args) { return result; } -/* Returns a tuple of the Unicode characters which expand on full case-folding. +/* Returns a tuple of the Unicode characters that expand on full case-folding. */ static PyObject* get_expand_on_folding(PyObject* self, PyObject* unused) { int count; @@ -19680,7 +22536,7 @@ static PyObject* get_expand_on_folding(PyObject* self, PyObject* unused) { /* Put all the characters in a tuple. */ result = PyTuple_New(count); if (!result) - goto error; + return NULL; for (i = 0; i < count; i++) { Py_UNICODE codepoint; @@ -19692,14 +22548,14 @@ static PyObject* get_expand_on_folding(PyObject* self, PyObject* unused) { if (!item) goto error; + /* PyTuple_SetItem borrows the reference. */ PyTuple_SetItem(result, i, item); } return result; error: - Py_XDECREF(result); - + Py_DECREF(result); return NULL; } @@ -19719,7 +22575,7 @@ static PyObject* has_property_value(PyObject* self_, PyObject* args) { return Py_BuildValue("n", v); } -/* Returns a list all the simple cases of a character. +/* Returns a list of all the simple cases of a character. * * If full case-folding is turned on and the character also expands on full * case-folding, a None is appended to the list. @@ -19731,6 +22587,7 @@ static PyObject* get_all_cases(PyObject* self_, PyObject* args) { Py_UCS4 folded[RE_MAX_FOLDED]; PyObject* result; int i; + RE_LocaleInfo locale_info; Py_ssize_t flags; Py_ssize_t character; @@ -19740,19 +22597,20 @@ static PyObject* get_all_cases(PyObject* self_, PyObject* args) { /* What's the encoding? */ if (flags & RE_FLAG_UNICODE) encoding = &unicode_encoding; - else if (flags & RE_FLAG_LOCALE) + else if (flags & RE_FLAG_LOCALE) { encoding = &locale_encoding; - else if (flags & RE_FLAG_ASCII) + scan_locale_chars(&locale_info); + } else if (flags & RE_FLAG_ASCII) encoding = &ascii_encoding; else encoding = &ascii_encoding; /* Get all the simple cases. */ - count = encoding->all_cases((Py_UCS4)character, cases); + count = encoding->all_cases(&locale_info, (Py_UCS4)character, cases); result = PyList_New(count); if (!result) - goto error; + return NULL; for (i = 0; i < count; i++) { PyObject* item; @@ -19761,12 +22619,14 @@ static PyObject* get_all_cases(PyObject* self_, PyObject* args) { if (!item) goto error; + /* PyList_SetItem borrows the reference. */ PyList_SetItem(result, i, item); } /* If the character also expands on full case-folding, append a None. */ if ((flags & RE_FULL_CASE_FOLDING) == RE_FULL_CASE_FOLDING) { - count = encoding->full_case_fold((Py_UCS4)character, folded); + count = encoding->full_case_fold(&locale_info, (Py_UCS4)character, + folded); if (count > 1) PyList_Append(result, Py_None); } @@ -19774,8 +22634,7 @@ static PyObject* get_all_cases(PyObject* self_, PyObject* args) { return result; error: - Py_XDECREF(result); - + Py_DECREF(result); return NULL; } @@ -19792,9 +22651,9 @@ static PyMethodDef _functions[] = { }; /* Initialises the property dictionary. */ -static BOOL init_property_dict(void) { - int value_set_count; - int i; +Py_LOCAL_INLINE(BOOL) init_property_dict(void) { + size_t value_set_count; + size_t i; PyObject** value_dicts; property_dict = NULL; @@ -19808,7 +22667,7 @@ static BOOL init_property_dict(void) { value = &re_property_values[i]; if (value->value_set >= value_set_count) - value_set_count = value->value_set + 1; + value_set_count = (size_t)value->value_set + 1; } /* Quick references for the value sets. */ @@ -19824,6 +22683,7 @@ static BOOL init_property_dict(void) { i++) { RE_PropertyValue* value; PyObject* v; + int status; value = &re_property_values[i]; if (!value_dicts[value->value_set]) { @@ -19836,8 +22696,11 @@ static BOOL init_property_dict(void) { if (!v) goto error; - PyDict_SetItemString(value_dicts[value->value_set], + status = PyDict_SetItemString(value_dicts[value->value_set], re_strings[value->name], v); + Py_DECREF(v); + if (status < 0) + goto error; } /* Build the property dictionary. */ @@ -19848,6 +22711,7 @@ static BOOL init_property_dict(void) { for (i = 0; i < sizeof(re_properties) / sizeof(re_properties[0]); i++) { RE_Property* property; PyObject* v; + int status; property = &re_properties[i]; v = Py_BuildValue("iO", property->id, @@ -19855,7 +22719,11 @@ static BOOL init_property_dict(void) { if (!v) goto error; - PyDict_SetItemString(property_dict, re_strings[property->name], v); + status = PyDict_SetItemString(property_dict, + re_strings[property->name], v); + Py_DECREF(v); + if (status < 0) + goto error; } /* DECREF the value sets. Any unused ones will be deallocated. */ @@ -19883,11 +22751,12 @@ PyMODINIT_FUNC init_regex(void) { PyObject* m; PyObject* d; PyObject* x; + #if defined(VERBOSE) /* Unbuffered in case it crashes! */ setvbuf(stdout, NULL, _IONBF, 0); -#endif +#endif /* Initialise Pattern_Type. */ Pattern_Type.tp_dealloc = pattern_dealloc; Pattern_Type.tp_repr = pattern_repr; diff --git a/src/regex/_regex.h b/src/regex/_regex.h index 33dc1540b1..9688dd95ef 100644 --- a/src/regex/_regex.h +++ b/src/regex/_regex.h @@ -11,7 +11,7 @@ * 2010-01-16 mrab Re-written */ -/* Supports Unicode version 6.3.0. */ +/* Supports Unicode version 7.0.0. */ #define RE_MAGIC 20100116 diff --git a/src/regex/_regex_core.py b/src/regex/_regex_core.py index 5366d70a76..aa0d63c8b8 100644 --- a/src/regex/_regex_core.py +++ b/src/regex/_regex_core.py @@ -14,7 +14,6 @@ # 2010-01-16 mrab Python front-end re-written and extended import string -import sys import unicodedata from collections import defaultdict @@ -23,6 +22,7 @@ _regex = plugins['_regex'][0] if _regex is None: raise RuntimeError('Failed to load regex module with error: ' + plugins['_regex'][1]) + __all__ = ["A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH", "F", "FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "R", "REVERSE", "S", "DOTALL", "T", "TEMPLATE", "U", "UNICODE", "V0", "VERSION0", @@ -114,6 +114,7 @@ HEX_ESCAPES = {"x": 2, "u": 4, "U": 8} # A singleton which indicates a comment within a pattern. COMMENT = object() +FLAGS = object() # The names of the opcodes. OPCODES = """ @@ -248,8 +249,8 @@ def _shrink_cache(cache_dict, args_dict, max_length, divisor=5): # Rebuild the arguments dictionary. args_dict.clear() - for pattern, pattern_type, flags, args, default_version in cache_dict: - args_dict[pattern, pattern_type, flags, default_version] = args + for pattern, pattern_type, flags, args, default_version, locale in cache_dict: + args_dict[pattern, pattern_type, flags, default_version, locale] = args def _fold_case(info, string): "Folds the case of a string." @@ -331,81 +332,129 @@ def _parse_pattern(source, info): def parse_sequence(source, info): "Parses a sequence, eg. 'abc'." sequence = [] - item = parse_item(source, info) - while item: - sequence.append(item) - item = parse_item(source, info) + applied = False + while True: + # Get literal characters followed by an element. + characters, case_flags, element = parse_literal_and_element(source, + info) + if not element: + # No element, just a literal. We've also reached the end of the + # sequence. + append_literal(characters, case_flags, sequence) + break + + if element is COMMENT or element is FLAGS: + append_literal(characters, case_flags, sequence) + elif type(element) is tuple: + # It looks like we've found a quantifier. + ch, saved_pos = element + + counts = parse_quantifier(source, info, ch) + if counts: + # It _is_ a quantifier. + apply_quantifier(source, info, counts, characters, case_flags, + ch, saved_pos, applied, sequence) + applied = True + else: + # It's not a quantifier. Maybe it's a fuzzy constraint. + constraints = parse_fuzzy(source, ch) + if constraints: + # It _is_ a fuzzy constraint. + apply_constraint(source, info, constraints, characters, + case_flags, saved_pos, applied, sequence) + applied = True + else: + # The element was just a literal. + characters.append(ord(ch)) + append_literal(characters, case_flags, sequence) + applied = False + else: + # We have a literal followed by something else. + append_literal(characters, case_flags, sequence) + sequence.append(element) + applied = False return make_sequence(sequence) +def apply_quantifier(source, info, counts, characters, case_flags, ch, + saved_pos, applied, sequence): + if characters: + # The quantifier applies to the last character. + append_literal(characters[ : -1], case_flags, sequence) + element = Character(characters[-1], case_flags=case_flags) + else: + # The quantifier applies to the last item in the sequence. + if applied or not sequence: + raise error("nothing to repeat at position %d" % saved_pos) + + element = sequence.pop() + + min_count, max_count = counts + saved_pos = source.pos + ch = source.get() + if ch == "?": + # The "?" suffix that means it's a lazy repeat. + repeated = LazyRepeat + elif ch == "+": + # The "+" suffix that means it's a possessive repeat. + repeated = PossessiveRepeat + else: + # No suffix means that it's a greedy repeat. + source.pos = saved_pos + repeated = GreedyRepeat + + # Ignore the quantifier if it applies to a zero-width item or the number of + # repeats is fixed at 1. + if not element.is_empty() and (min_count != 1 or max_count != 1): + element = repeated(element, min_count, max_count) + + sequence.append(element) + +def apply_constraint(source, info, constraints, characters, case_flags, + saved_pos, applied, sequence): + if characters: + # The constraint applies to the last character. + append_literal(characters[ : -1], case_flags, sequence) + element = Character(characters[-1], case_flags=case_flags) + sequence.append(Fuzzy(element, constraints)) + else: + # The constraint applies to the last item in the sequence. + if applied or not sequence: + raise error("nothing for fuzzy constraint at position %d" % saved_pos) + + element = sequence.pop() + + # If a group is marked as fuzzy then put all of the fuzzy part in the + # group. + if isinstance(element, Group): + element.subpattern = Fuzzy(element.subpattern, constraints) + sequence.append(element) + else: + sequence.append(Fuzzy(element, constraints)) + +def append_literal(characters, case_flags, sequence): + if characters: + sequence.append(Literal(characters, case_flags=case_flags)) + def PossessiveRepeat(element, min_count, max_count): "Builds a possessive repeat." return Atomic(GreedyRepeat(element, min_count, max_count)) -def parse_item(source, info): - "Parses an item, which might be repeated. Returns None if there's no item." - element = parse_element(source, info) - counts = parse_quantifier(source, info) - if counts: - min_count, max_count = counts - saved_pos = source.pos - ch = source.get() - if ch == "?": - # The "?" suffix that means it's a lazy repeat. - repeated = LazyRepeat - elif ch == "+": - # The "+" suffix that means it's a possessive repeat. - repeated = PossessiveRepeat - else: - # No suffix means that it's a greedy repeat. - source.pos = saved_pos - repeated = GreedyRepeat - - if element.is_empty() or min_count == max_count == 1: - return element - - return repeated(element, min_count, max_count) - - # No quantifier, but maybe there's a fuzzy constraint. - constraints = parse_fuzzy(source) - if not constraints: - # No fuzzy constraint. - return element - - # If a group is marked as fuzzy then put all of the fuzzy part in the - # group. - if isinstance(element, Group): - element.subpattern = Fuzzy(element.subpattern, constraints) - return element - - return Fuzzy(element, constraints) - _QUANTIFIERS = {"?": (0, 1), "*": (0, None), "+": (1, None)} -def parse_quantifier(source, info): +def parse_quantifier(source, info, ch): "Parses a quantifier." - while True: - saved_pos = source.pos - ch = source.get() - q = _QUANTIFIERS.get(ch) - if q: - # It's a quantifier. - return q - if ch == "{": - # Looks like a limited repeated element, eg. 'a{2,3}'. - counts = parse_limited_quantifier(source) - if counts: - return counts - elif ch == "(" and source.match("?#"): - # A comment. - parse_comment(source) - continue + q = _QUANTIFIERS.get(ch) + if q: + # It's a quantifier. + return q - # Neither a quantifier nor a comment. - break + if ch == "{": + # Looks like a limited repeated element, eg. 'a{2,3}'. + counts = parse_limited_quantifier(source) + if counts: + return counts - # Parse it later, perhaps as a literal. - source.pos = saved_pos return None def is_above_limit(count): @@ -441,13 +490,13 @@ def parse_limited_quantifier(source): return min_count, max_count -def parse_fuzzy(source): +def parse_fuzzy(source, ch): "Parses a fuzzy setting, if present." - saved_pos = source.pos - if not source.match("{"): - source.pos = saved_pos + if ch != "{": return None + saved_pos = source.pos + constraints = {} try: parse_fuzzy_item(source, constraints) @@ -455,7 +504,6 @@ def parse_fuzzy(source): parse_fuzzy_item(source, constraints) except ParseError: source.pos = saved_pos - return None if not source.match("}"): @@ -597,10 +645,12 @@ def parse_count(source): "Parses a quantifier's count, which can be empty." return source.get_while(DIGITS) -def parse_element(source, info): - """Parses an element. An element might actually be a flag, eg. '(?i)', in - which case it returns None. +def parse_literal_and_element(source, info): + """Parses a literal followed by an element. The element is FLAGS if it's an + inline flag or None if it has reached the end of a sequence. """ + characters = [] + case_flags = info.flags & CASE_FLAGS while True: saved_pos = source.pos ch = source.get() @@ -608,71 +658,69 @@ def parse_element(source, info): if ch in ")|": # The end of a sequence. At the end of the pattern ch is "". source.pos = saved_pos - return None + return characters, case_flags, None elif ch == "\\": # An escape sequence outside a set. - return parse_escape(source, info, False) + element = parse_escape(source, info, False) + return characters, case_flags, element elif ch == "(": # A parenthesised subpattern or a flag. element = parse_paren(source, info) if element and element is not COMMENT: - return element + return characters, case_flags, element elif ch == ".": # Any character. if info.flags & DOTALL: - return AnyAll() + element = AnyAll() elif info.flags & WORD: - return AnyU() + element = AnyU() else: - return Any() + element = Any() + + return characters, case_flags, element elif ch == "[": # A character set. - return parse_set(source, info) + element = parse_set(source, info) + return characters, case_flags, element elif ch == "^": # The start of a line or the string. if info.flags & MULTILINE: if info.flags & WORD: - return StartOfLineU() + element = StartOfLineU() else: - return StartOfLine() + element = StartOfLine() else: - return StartOfString() + element = StartOfString() + + return characters, case_flags, element elif ch == "$": # The end of a line or the string. if info.flags & MULTILINE: if info.flags & WORD: - return EndOfLineU() + element = EndOfLineU() else: - return EndOfLine() + element = EndOfLine() else: if info.flags & WORD: - return EndOfStringLineU() + element = EndOfStringLineU() else: - return EndOfStringLine() - elif ch == "{": - # Looks like a limited quantifier. - saved_pos_2 = source.pos - source.pos = saved_pos - counts = parse_quantifier(source, info) - if counts: - # A quantifier where we expected an element. - raise error("nothing to repeat at position %d" % saved_pos_2) + element = EndOfStringLine() - # Not a quantifier, so it's a literal. - source.pos = saved_pos_2 - return make_character(info, ord(ch)) - elif ch in "?*+": - # A quantifier where we expected an element. - raise error("nothing to repeat at position %d" % saved_pos) + return characters, case_flags, element + elif ch in "?*+{": + # Looks like a quantifier. + return characters, case_flags, (ch, saved_pos) else: # A literal. - return make_character(info, ord(ch)) + characters.append(ord(ch)) else: # A literal. - return make_character(info, ord(ch)) + characters.append(ord(ch)) def parse_paren(source, info): - "Parses a parenthesised subpattern or a flag." + """Parses a parenthesised subpattern or a flag. Returns FLAGS if it's an + inline flag. + """ saved_pos = source.pos ch = source.get() if ch == "?": @@ -897,6 +945,10 @@ def parse_flags(source, info): else: flags_off = 0 + if flags_on & LOCALE: + # Remember that this pattern as an inline locale flag. + info.inline_locale = True + return flags_on, flags_off def parse_subpattern(source, info, flags_on, flags_off): @@ -913,30 +965,10 @@ def parse_subpattern(source, info, flags_on, flags_off): return subpattern -def parse_positional_flags(source, info, flags_on, flags_off): - "Parses positional flags." - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - if version == VERSION0: - # Positional flags are global and can only be turned on. - if flags_off: - raise error("bad inline flags: can't turn flags off at position %d" % source.pos) - - new_global_flags = flags_on & ~info.global_flags - if new_global_flags: - info.global_flags |= new_global_flags - - # A global has been turned on, so reparse the pattern. - raise _UnscopedFlagSet(info.global_flags) - else: - info.flags = (info.flags | flags_on) & ~flags_off - - source.ignore_space = bool(info.flags & VERBOSE) - - return None - def parse_flags_subpattern(source, info): """Parses a flags subpattern. It could be inline flags or a subpattern - possibly with local flags. + possibly with local flags. If it's a subpattern, then that's returned; + if it's a inline flags, then FLAGS is returned. """ flags_on, flags_off = parse_flags(source, info) @@ -961,10 +993,30 @@ def parse_flags_subpattern(source, info): return parse_subpattern(source, info, flags_on, flags_off) if source.match(")"): - return parse_positional_flags(source, info, flags_on, flags_off) + parse_positional_flags(source, info, flags_on, flags_off) + return FLAGS raise error("unknown extension at position %d" % source.pos) +def parse_positional_flags(source, info, flags_on, flags_off): + "Parses positional flags." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version == VERSION0: + # Positional flags are global and can only be turned on. + if flags_off: + raise error("bad inline flags: can't turn flags off at position %d" % source.pos) + + new_global_flags = flags_on & ~info.global_flags + if new_global_flags: + info.global_flags |= new_global_flags + + # A global has been turned on, so reparse the pattern. + raise _UnscopedFlagSet(info.global_flags) + else: + info.flags = (info.flags | flags_on) & ~flags_off + + source.ignore_space = bool(info.flags & VERBOSE) + def parse_name(source, allow_numeric=False): "Parses a name." name = source.get_while(set(")>"), include=False) @@ -1175,12 +1227,12 @@ def parse_property(source, info, positive, in_set): prop_name, name = parse_property_name(source) if source.match("}"): # It's correctly delimited. - prop = lookup_property(prop_name, name, positive != negate) + prop = lookup_property(prop_name, name, positive != negate, source_pos=source.pos) return make_property(info, prop, in_set) elif ch and ch in "CLMNPSZ": # An abbreviated property, eg \pL. prop = lookup_property(None, ch, positive) - return make_property(info, prop, in_set) + return make_property(info, prop, in_set, source_pos=source.pos) # Not a property, so treat as a literal "p" or "P". source.pos = saved_pos @@ -1375,7 +1427,7 @@ def parse_posix_class(source, info): if not source.match(":]"): raise ParseError() - return lookup_property(prop_name, name, positive=not negate) + return lookup_property(prop_name, name, positive=not negate, source_pos=source.pos) def float_to_rational(flt): "Converts a float to a rational pair." @@ -1416,21 +1468,25 @@ def standardise_name(name): except (ValueError, ZeroDivisionError): return "".join(ch for ch in name if ch not in "_- ").upper() -def lookup_property(property, value, positive): +def lookup_property(property, value, positive, source_pos=None): "Looks up a property." # Normalise the names (which may still be lists). property = standardise_name(property) if property else None value = standardise_name(value) + + if (property, value) == ("GENERALCATEGORY", "ASSIGNED"): + property, value, positive = "GENERALCATEGORY", "UNASSIGNED", not positive + if property: # Both the property and the value are provided. prop = PROPERTIES.get(property) if not prop: - raise error("unknown property at position %d" % source.pos) + raise error("unknown property at position %d" % source_pos) prop_id, value_dict = prop val_id = value_dict.get(value) if val_id is None: - raise error("unknown property value at position %d" % source.pos) + raise error("unknown property value at position %d" % source_pos) if "YES" in value_dict and val_id == 0: positive, val_id = not positive, 1 @@ -1470,7 +1526,7 @@ def lookup_property(property, value, positive): return Property((prop_id << 16) | val_id, positive) # Unknown property. - raise error("unknown property at position %d" % source.pos) + raise error("unknown property at position %d" % source_pos) def _compile_replacement(source, pattern, is_unicode): "Compiles a replacement template escape sequence." @@ -1660,6 +1716,12 @@ class RegexBase(object): def has_simple_start(self): return False + def compile(self, reverse=False, fuzzy=False): + return self._compile(reverse, fuzzy) + + def dump(self, indent, reverse): + self._dump(indent, reverse) + def is_empty(self): return False @@ -1686,7 +1748,7 @@ class ZeroWidthBase(RegexBase): def get_firstset(self, reverse): return set([None]) - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): flags = 0 if self.positive: flags |= POSITIVE_OP @@ -1696,7 +1758,7 @@ class ZeroWidthBase(RegexBase): flags |= REVERSE_OP return [(self._opcode, flags)] - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%s%s %s" % (INDENT * indent, self._op_name, POS_TEXT[self.positive]) @@ -1710,13 +1772,13 @@ class Any(RegexBase): def has_simple_start(self): return True - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): flags = 0 if fuzzy: flags |= FUZZY_OP return [(self._opcode[reverse], flags)] - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%s%s" % (INDENT * indent, self._op_name) def max_width(self): @@ -1765,11 +1827,11 @@ class Atomic(RegexBase): def has_simple_start(self): return self.subpattern.has_simple_start() - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): return ([(OP.ATOMIC, )] + self.subpattern.compile(reverse, fuzzy) + [(OP.END, )]) - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%sATOMIC" % (INDENT * indent) self.subpattern.dump(indent + 1, reverse) @@ -1822,6 +1884,20 @@ class Branch(RegexBase): return make_sequence(sequence) + def optimise(self, info): + # Flatten branches within branches. + branches = Branch._flatten_branches(info, self.branches) + + # Try to reduce adjacent single-character branches to sets. + branches = Branch._reduce_to_set(info, branches) + + if len(branches) > 1: + sequence = [Branch(branches)] + else: + sequence = branches + + return make_sequence(sequence) + def pack_characters(self, info): self.branches = [b.pack_characters(info) for b in self.branches] return self @@ -1846,7 +1922,7 @@ class Branch(RegexBase): return fs or set([None]) - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): code = [(OP.BRANCH, )] for b in self.branches: code.extend(b.compile(reverse, fuzzy)) @@ -1856,7 +1932,7 @@ class Branch(RegexBase): return code - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%sBRANCH" % (INDENT * indent) self.branches[0].dump(indent + 1, reverse) for b in self.branches[1 : ]: @@ -2181,10 +2257,10 @@ class CallGroup(RegexBase): def remove_captures(self): raise error("group reference not allowed at position %d" % self.position) - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): return [(OP.GROUP_CALL, self.call_ref)] - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%sGROUP_CALL %s" % (INDENT * indent, self.group) def __eq__(self, other): @@ -2229,7 +2305,7 @@ class Character(RegexBase): def has_simple_start(self): return True - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): flags = 0 if self.positive: flags |= POSITIVE_OP @@ -2248,7 +2324,7 @@ class Character(RegexBase): return code.compile(reverse, fuzzy) - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): display = repr(unichr(self.value)).lstrip("bu") print "%sCHARACTER %s %s%s" % (INDENT * indent, POS_TEXT[self.positive], display, CASE_TEXT[self.case_flags]) @@ -2319,7 +2395,7 @@ class Conditional(RegexBase): return (self.yes_item.get_firstset(reverse) | self.no_item.get_firstset(reverse)) - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): code = [(OP.GROUP_EXISTS, self.group)] code.extend(self.yes_item.compile(reverse, fuzzy)) add_code = self.no_item.compile(reverse, fuzzy) @@ -2331,7 +2407,7 @@ class Conditional(RegexBase): return code - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%sGROUP_EXISTS %s" % (INDENT * indent, self.group) self.yes_item.dump(indent + 1, reverse) if self.no_item: @@ -2437,7 +2513,7 @@ class Fuzzy(RegexBase): def contains_group(self): return self.subpattern.contains_group() - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): # The individual limits. arguments = [] for e in "dise": @@ -2460,7 +2536,7 @@ class Fuzzy(RegexBase): return ([(OP.FUZZY, flags) + tuple(arguments)] + self.subpattern.compile(reverse, True) + [(OP.END,)]) - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): constraints = self._constraints_to_string() if constraints: constraints = " " + constraints @@ -2511,7 +2587,7 @@ class Fuzzy(RegexBase): return ",".join(constraints) class Grapheme(RegexBase): - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): # Match at least 1 character until a grapheme boundary is reached. Note # that this is the same whether matching forwards or backwards. character_matcher = LazyRepeat(AnyAll(), 1, None).compile(reverse, @@ -2520,7 +2596,7 @@ class Grapheme(RegexBase): return character_matcher + boundary_matcher - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%sGRAPHEME" % (INDENT * indent) def max_width(self): @@ -2565,7 +2641,7 @@ class GreedyRepeat(RegexBase): return fs - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): repeat = [self._opcode, self.min_count] if self.max_count is None: repeat.append(UNLIMITED) @@ -2578,7 +2654,7 @@ class GreedyRepeat(RegexBase): return ([tuple(repeat)] + subpattern + [(OP.END, )]) - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): if self.max_count is None: limit = "INF" else: @@ -2655,7 +2731,7 @@ class Group(RegexBase): def has_simple_start(self): return self.subpattern.has_simple_start() - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): code = [] key = self.group, reverse, fuzzy @@ -2676,7 +2752,7 @@ class Group(RegexBase): return code - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): group = self.group if group < 0: group = private_groups[group] @@ -2736,11 +2812,11 @@ class LookAround(RegexBase): def contains_group(self): return self.subpattern.contains_group() - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): return ([(OP.LOOKAROUND, int(self.positive), int(not self.behind))] + self.subpattern.compile(self.behind) + [(OP.END, )]) - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%sLOOK%s %s" % (INDENT * indent, self._dir_text[self.behind], POS_TEXT[self.positive]) self.subpattern.dump(indent + 1, self.behind) @@ -2759,7 +2835,7 @@ class PrecompiledCode(RegexBase): def __init__(self, code): self.code = code - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): return [tuple(self.code)] class Property(RegexBase): @@ -2792,7 +2868,7 @@ class Property(RegexBase): def has_simple_start(self): return True - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): flags = 0 if self.positive: flags |= POSITIVE_OP @@ -2802,7 +2878,7 @@ class Property(RegexBase): flags |= FUZZY_OP return [(self._opcode[self.case_flags, reverse], flags, self.value)] - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): prop = PROPERTY_NAMES[self.value >> 16] name, value = prop[0], prop[1][self.value & 0xFFFF] print "%sPROPERTY %s %s:%s%s" % (INDENT * indent, @@ -2867,7 +2943,7 @@ class Range(RegexBase): return Branch(items) - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): flags = 0 if self.positive: flags |= POSITIVE_OP @@ -2878,7 +2954,7 @@ class Range(RegexBase): return [(self._opcode[self.case_flags, reverse], flags, self.lower, self.upper)] - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): display_lower = repr(unichr(self.lower)).lstrip("bu") display_upper = repr(unichr(self.upper)).lstrip("bu") print "%sRANGE %s %s %s%s" % (INDENT * indent, POS_TEXT[self.positive], @@ -2923,13 +2999,13 @@ class RefGroup(RegexBase): def remove_captures(self): raise error("group reference not allowed at position %d" % self.position) - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): flags = 0 if fuzzy: flags |= FUZZY_OP return [(self._opcode[self.case_flags, reverse], flags, self.group)] - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%sREF_GROUP %s%s" % (INDENT * indent, self.group, CASE_TEXT[self.case_flags]) @@ -2974,18 +3050,18 @@ class Sequence(RegexBase): if s.case_flags != case_flags: # Different case sensitivity, so flush, unless neither the # previous nor the new character are cased. - if case_flags or is_cased(info, s.value): + if s.case_flags or is_cased(info, s.value): Sequence._flush_characters(info, characters, case_flags, items) case_flags = s.case_flags characters.append(s.value) - elif type(s) is String: + elif type(s) is String or type(s) is Literal: if s.case_flags != case_flags: # Different case sensitivity, so flush, unless the neither # the previous nor the new string are cased. - if not s.case_flags or any(is_cased(info, c) for c in + if s.case_flags or any(is_cased(info, c) for c in characters): Sequence._flush_characters(info, characters, case_flags, items) @@ -3031,7 +3107,7 @@ class Sequence(RegexBase): def has_simple_start(self): return self.items and self.items[0].has_simple_start() - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): seq = self.items if reverse: seq = seq[::-1] @@ -3042,7 +3118,7 @@ class Sequence(RegexBase): return code - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): for s in self.items: s.dump(indent, reverse) @@ -3112,7 +3188,7 @@ class SetBase(RegexBase): def has_simple_start(self): return True - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): flags = 0 if self.positive: flags |= POSITIVE_OP @@ -3128,7 +3204,7 @@ class SetBase(RegexBase): return code - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%s%s %s%s" % (INDENT * indent, self._op_name, POS_TEXT[self.positive], CASE_TEXT[self.case_flags]) for i in self.items: @@ -3306,7 +3382,7 @@ class SetUnion(SetBase): return self._handle_case_folding(info, in_set) - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): flags = 0 if self.positive: flags |= POSITIVE_OP @@ -3395,7 +3471,7 @@ class String(RegexBase): def has_simple_start(self): return True - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): flags = 0 if fuzzy: flags |= FUZZY_OP @@ -3404,7 +3480,7 @@ class String(RegexBase): return [(self._opcode[self.case_flags, reverse], flags, len(self.folded_characters)) + self.folded_characters] - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): display = repr("".join(unichr(c) for c in self.characters)).lstrip("bu") print "%sSTRING %s%s" % (INDENT * indent, display, CASE_TEXT[self.case_flags]) @@ -3415,6 +3491,13 @@ class String(RegexBase): def get_required_string(self, reverse): return 0, self +class Literal(String): + def _dump(self, indent, reverse): + for c in self.characters: + display = ascii("".join(chr(c))).lstrip("bu") + print("{}CHARACTER MATCH {}{}".format(INDENT * indent, + display, CASE_TEXT[self.case_flags])) + class StringSet(RegexBase): _opcode = {(NOCASE, False): OP.STRING_SET, (IGNORECASE, False): OP.STRING_SET_IGN, (FULLCASE, False): OP.STRING_SET, (FULLIGNORECASE, @@ -3433,7 +3516,7 @@ class StringSet(RegexBase): if self.set_key not in info.named_lists_used: info.named_lists_used[self.set_key] = len(info.named_lists_used) - def compile(self, reverse=False, fuzzy=False): + def _compile(self, reverse, fuzzy): index = self.info.named_lists_used[self.set_key] items = self.info.kwargs[self.name] @@ -3469,7 +3552,7 @@ class StringSet(RegexBase): return [(self._opcode[case_flags, reverse], index, min_len, max_len)] - def dump(self, indent=0, reverse=False): + def _dump(self, indent, reverse): print "%sSTRING_SET %s%s" % (INDENT * indent, self.name, CASE_TEXT[self.case_flags]) @@ -3740,6 +3823,7 @@ class Info(object): flags |= DEFAULT_FLAGS[(flags & _ALL_VERSIONS) or DEFAULT_VERSION] self.flags = flags self.global_flags = flags + self.inline_locale = False self.kwargs = kwargs @@ -3799,8 +3883,8 @@ class Info(object): def _check_group_features(info, parsed): """Checks whether the reverse and fuzzy features of the group calls match - the groups which they call.""" - + the groups which they call. + """ call_refs = {} additional_groups = [] for call, reverse, fuzzy in info.group_calls: @@ -3976,12 +4060,12 @@ CHARACTER_ESCAPES = { # Predefined character set escape sequences. CHARSET_ESCAPES = { - "d": lookup_property(None, "DIGIT", True), - "D": lookup_property(None, "DIGIT", False), - "s": lookup_property(None, "SPACE", True), - "S": lookup_property(None, "SPACE", False), - "w": lookup_property(None, "WORD", True), - "W": lookup_property(None, "WORD", False), + "d": lookup_property(None, "Digit", True), + "D": lookup_property(None, "Digit", False), + "s": lookup_property(None, "Space", True), + "S": lookup_property(None, "Space", False), + "w": lookup_property(None, "Word", True), + "W": lookup_property(None, "Word", False), } # Positional escape sequences. diff --git a/src/regex/_regex_unicode.c b/src/regex/_regex_unicode.c index 6b64e0062a..69b1e70962 100644 --- a/src/regex/_regex_unicode.c +++ b/src/regex/_regex_unicode.c @@ -1,3 +1,5 @@ +/* For Unicode version 7.0.0 */ + #include "_regex_unicode.h" #define RE_BLANK_MASK ((1 << RE_PROP_ZL) | (1 << RE_PROP_ZP)) @@ -34,7 +36,9 @@ char* re_strings[] = { "1000", "10000", "100000", + "1000000", "100000000", + "10000000000", "1000000000000", "103", "107", @@ -48,6 +52,7 @@ char* re_strings[] = { "13/2", "130", "132", + "133", "14", "15", "15/2", @@ -160,6 +165,7 @@ char* re_strings[] = { "ABOVELEFT", "ABOVERIGHT", "AEGEANNUMBERS", + "AGHB", "AHEX", "AI", "AIN", @@ -208,6 +214,7 @@ char* re_strings[] = { "ATA", "ATAR", "ATB", + "ATBL", "ATERM", "ATTACHEDABOVE", "ATTACHEDABOVERIGHT", @@ -226,6 +233,8 @@ char* re_strings[] = { "BAMUMSUP", "BAMUMSUPPLEMENT", "BASICLATIN", + "BASS", + "BASSAVAH", "BATAK", "BATK", "BB", @@ -261,6 +270,7 @@ char* re_strings[] = { "BR", "BRAH", "BRAHMI", + "BRAHMIJOININGNUMBER", "BRAI", "BRAILLE", "BRAILLEPATTERNS", @@ -284,12 +294,14 @@ char* re_strings[] = { "CANONICAL", "CANONICALCOMBININGCLASS", "CANS", + "CANTILLATIONMARK", "CARI", "CARIAN", "CARRIAGERETURN", "CASED", "CASEDLETTER", "CASEIGNORABLE", + "CAUCASIANALBANIAN", "CB", "CC", "CCC", @@ -376,6 +388,7 @@ char* re_strings[] = { "CO", "COM", "COMBININGDIACRITICALMARKS", + "COMBININGDIACRITICALMARKSEXTENDED", "COMBININGDIACRITICALMARKSFORSYMBOLS", "COMBININGDIACRITICALMARKSSUPPLEMENT", "COMBININGHALFMARKS", @@ -395,13 +408,15 @@ char* re_strings[] = { "CONSONANTHEADLETTER", "CONSONANTMEDIAL", "CONSONANTPLACEHOLDER", - "CONSONANTREPHA", + "CONSONANTPRECEDINGREPHA", "CONSONANTSUBJOINED", + "CONSONANTSUCCEEDINGREPHA", "CONTINGENTBREAK", "CONTROL", "CONTROLPICTURES", "COPT", "COPTIC", + "COPTICEPACTNUMBERS", "COUNTINGROD", "COUNTINGRODNUMERALS", "CP", @@ -452,6 +467,7 @@ char* re_strings[] = { "DIA", "DIACRITIC", "DIACRITICALS", + "DIACRITICALSEXT", "DIACRITICALSFORSYMBOLS", "DIACRITICALSSUP", "DIGIT", @@ -465,11 +481,15 @@ char* re_strings[] = { "DSRT", "DT", "DUALJOINING", + "DUPL", + "DUPLOYAN", "E", "EA", "EASTASIANWIDTH", "EGYP", "EGYPTIANHIEROGLYPHS", + "ELBA", + "ELBASAN", "EMOTICONS", "EN", "ENC", @@ -522,9 +542,12 @@ char* re_strings[] = { "GAMAL", "GC", "GCB", + "GEMINATIONMARK", "GENERALCATEGORY", "GENERALPUNCTUATION", "GEOMETRICSHAPES", + "GEOMETRICSHAPESEXT", + "GEOMETRICSHAPESEXTENDED", "GEOR", "GEORGIAN", "GEORGIANSUP", @@ -535,6 +558,8 @@ char* re_strings[] = { "GLUE", "GOTH", "GOTHIC", + "GRAN", + "GRANTHA", "GRAPH", "GRAPHEMEBASE", "GRAPHEMECLUSTERBREAK", @@ -588,6 +613,8 @@ char* re_strings[] = { "HIRA", "HIRAGANA", "HL", + "HMNG", + "HRKT", "HST", "HY", "HYPHEN", @@ -619,7 +646,7 @@ char* re_strings[] = { "INSCRIPTIONALPARTHIAN", "INSEPARABLE", "INSEPERABLE", - "INVISIBLE", + "INVISIBLESTACKER", "IOTASUBSCRIPT", "IPAEXT", "IPAEXTENSIONS", @@ -637,6 +664,7 @@ char* re_strings[] = { "JOINC", "JOINCAUSING", "JOINCONTROL", + "JOINER", "JOININGGROUP", "JOININGTYPE", "JT", @@ -665,6 +693,9 @@ char* re_strings[] = { "KHMER", "KHMERSYMBOLS", "KHMR", + "KHOJ", + "KHOJKI", + "KHUDAWADI", "KNDA", "KNOTTEDHEH", "KTHI", @@ -685,13 +716,16 @@ char* re_strings[] = { "LATINEXTB", "LATINEXTC", "LATINEXTD", + "LATINEXTE", "LATINEXTENDEDA", "LATINEXTENDEDADDITIONAL", "LATINEXTENDEDB", "LATINEXTENDEDC", "LATINEXTENDEDD", + "LATINEXTENDEDE", "LATN", "LB", + "LC", "LE", "LEADINGJAMO", "LEFT", @@ -709,7 +743,9 @@ char* re_strings[] = { "LF", "LIMB", "LIMBU", + "LINA", "LINB", + "LINEARA", "LINEARB", "LINEARBIDEOGRAMS", "LINEARBSYLLABARY", @@ -741,12 +777,43 @@ char* re_strings[] = { "LYDIAN", "M", "M&", + "MAHAJANI", + "MAHJ", "MAHJONG", "MAHJONGTILES", "MALAYALAM", "MAND", "MANDAIC", "MANDATORYBREAK", + "MANI", + "MANICHAEAN", + "MANICHAEANALEPH", + "MANICHAEANAYIN", + "MANICHAEANBETH", + "MANICHAEANDALETH", + "MANICHAEANDHAMEDH", + "MANICHAEANFIVE", + "MANICHAEANGIMEL", + "MANICHAEANHETH", + "MANICHAEANHUNDRED", + "MANICHAEANKAPH", + "MANICHAEANLAMEDH", + "MANICHAEANMEM", + "MANICHAEANNUN", + "MANICHAEANONE", + "MANICHAEANPE", + "MANICHAEANQOPH", + "MANICHAEANRESH", + "MANICHAEANSADHE", + "MANICHAEANSAMEKH", + "MANICHAEANTAW", + "MANICHAEANTEN", + "MANICHAEANTETH", + "MANICHAEANTHAMEDH", + "MANICHAEANTWENTY", + "MANICHAEANWAW", + "MANICHAEANYODH", + "MANICHAEANZAYIN", "MARK", "MATH", "MATHALPHANUM", @@ -763,6 +830,8 @@ char* re_strings[] = { "MEETEIMAYEK", "MEETEIMAYEKEXT", "MEETEIMAYEKEXTENSIONS", + "MEND", + "MENDEKIKAKUI", "MERC", "MERO", "MEROITICCURSIVE", @@ -787,6 +856,7 @@ char* re_strings[] = { "ML", "MLYM", "MN", + "MODI", "MODIFIERLETTER", "MODIFIERLETTERS", "MODIFIERSYMBOL", @@ -794,20 +864,27 @@ char* re_strings[] = { "MODIFYINGLETTER", "MONG", "MONGOLIAN", + "MRO", + "MROO", "MTEI", "MUSIC", "MUSICALSYMBOLS", "MYANMAR", "MYANMAREXTA", + "MYANMAREXTB", "MYANMAREXTENDEDA", + "MYANMAREXTENDEDB", "MYMR", "N", "N&", "NA", + "NABATAEAN", "NAN", "NAR", + "NARB", "NARROW", "NB", + "NBAT", "NCHAR", "ND", "NEUTRAL", @@ -824,6 +901,7 @@ char* re_strings[] = { "NOJOININGGROUP", "NONCHARACTERCODEPOINT", "NONE", + "NONJOINER", "NONJOINING", "NONSPACINGMARK", "NONSTARTER", @@ -838,6 +916,7 @@ char* re_strings[] = { "NUKTA", "NUMBER", "NUMBERFORMS", + "NUMBERJOINER", "NUMERIC", "NUMERICTYPE", "NUMERICVALUE", @@ -855,6 +934,8 @@ char* re_strings[] = { "OLCHIKI", "OLCK", "OLDITALIC", + "OLDNORTHARABIAN", + "OLDPERMIC", "OLDPERSIAN", "OLDSOUTHARABIAN", "OLDTURKIC", @@ -867,6 +948,7 @@ char* re_strings[] = { "OPTICALCHARACTERRECOGNITION", "ORIYA", "ORKH", + "ORNAMENTALDINGBATS", "ORYA", "OSMA", "OSMANYA", @@ -890,22 +972,29 @@ char* re_strings[] = { "OVERSTRUCK", "P", "P&", + "PAHAWHHMONG", + "PALM", + "PALMYRENE", "PARAGRAPHSEPARATOR", "PATSYN", "PATTERNSYNTAX", "PATTERNWHITESPACE", "PATWS", + "PAUC", + "PAUCINHAU", "PC", "PD", "PDF", "PDI", "PE", + "PERM", "PF", "PHAG", "PHAGSPA", "PHAISTOS", "PHAISTOSDISC", "PHLI", + "PHLP", "PHNX", "PHOENICIAN", "PHONETICEXT", @@ -919,6 +1008,7 @@ char* re_strings[] = { "POPDIRECTIONALFORMAT", "POPDIRECTIONALISOLATE", "POSTFIXNUMERIC", + "PP", "PR", "PREFIXNUMERIC", "PREPEND", @@ -927,9 +1017,11 @@ char* re_strings[] = { "PRIVATEUSEAREA", "PRTI", "PS", + "PSALTERPAHLAVI", "PUA", "PUNCT", "PUNCTUATION", + "PUREKILLER", "QAAC", "QAAI", "QAF", @@ -988,10 +1080,15 @@ char* re_strings[] = { "SHAVIAN", "SHAW", "SHIN", + "SHORTHANDFORMATCONTROLS", "SHRD", + "SIDD", + "SIDDHAM", + "SIND", "SINGLEQUOTE", "SINH", "SINHALA", + "SINHALAARCHAICNUMBERS", "SK", "SM", "SMALL", @@ -1013,6 +1110,7 @@ char* re_strings[] = { "SQUARE", "ST", "STERM", + "STRAIGHTWAW", "SUB", "SUND", "SUNDANESE", @@ -1021,12 +1119,14 @@ char* re_strings[] = { "SUP", "SUPARROWSA", "SUPARROWSB", + "SUPARROWSC", "SUPER", "SUPERANDSUB", "SUPERSCRIPTSANDSUBSCRIPTS", "SUPMATHOPERATORS", "SUPPLEMENTALARROWSA", "SUPPLEMENTALARROWSB", + "SUPPLEMENTALARROWSC", "SUPPLEMENTALMATHEMATICALOPERATORS", "SUPPLEMENTALPUNCTUATION", "SUPPLEMENTARYPRIVATEUSEAREAA", @@ -1077,6 +1177,8 @@ char* re_strings[] = { "TIBETAN", "TIBT", "TIFINAGH", + "TIRH", + "TIRHUTA", "TITLECASELETTER", "TONELETTER", "TONEMARK", @@ -1128,6 +1230,8 @@ char* re_strings[] = { "VS", "VSSUP", "W", + "WARA", + "WARANGCITI", "WAW", "WB", "WHITESPACE", @@ -1173,1255 +1277,1370 @@ char* re_strings[] = { "ZZZZ", }; -/* strings: 10575 bytes. */ +/* strings: 11780 bytes. */ /* properties. */ RE_Property re_properties[] = { - { 506, 0, 0}, - { 504, 0, 0}, - { 231, 1, 1}, - { 230, 1, 1}, - { 958, 2, 2}, - { 956, 2, 2}, - {1118, 3, 3}, - {1113, 3, 3}, - { 521, 4, 4}, - { 505, 4, 4}, - { 964, 5, 5}, - { 955, 5, 5}, - { 732, 6, 6}, - { 155, 7, 6}, - { 154, 7, 6}, - { 707, 8, 6}, - { 706, 8, 6}, - {1088, 9, 6}, - {1087, 9, 6}, - { 271, 10, 6}, - { 273, 11, 6}, - { 324, 11, 6}, - { 319, 12, 6}, - { 399, 12, 6}, - { 321, 13, 6}, - { 401, 13, 6}, - { 320, 14, 6}, - { 400, 14, 6}, - { 317, 15, 6}, - { 397, 15, 6}, - { 318, 16, 6}, - { 398, 16, 6}, - { 585, 17, 6}, - { 581, 17, 6}, - { 577, 18, 6}, - { 576, 18, 6}, - {1126, 19, 6}, - {1125, 19, 6}, - {1124, 20, 6}, - {1123, 20, 6}, - { 424, 21, 6}, - { 432, 21, 6}, - { 522, 22, 6}, - { 530, 22, 6}, - { 520, 23, 6}, - { 524, 23, 6}, - { 523, 24, 6}, - { 531, 24, 6}, - {1114, 25, 6}, - { 987, 25, 6}, - {1121, 25, 6}, - { 223, 26, 6}, - { 221, 26, 6}, - { 620, 27, 6}, - { 618, 27, 6}, - { 417, 28, 6}, - { 574, 29, 6}, - { 921, 30, 6}, - { 918, 30, 6}, - {1051, 31, 6}, - {1050, 31, 6}, - { 862, 32, 6}, - { 844, 32, 6}, - { 565, 33, 6}, - { 564, 33, 6}, - { 186, 34, 6}, - { 144, 34, 6}, - { 855, 35, 6}, - { 828, 35, 6}, - { 579, 36, 6}, - { 578, 36, 6}, - { 434, 37, 6}, - { 433, 37, 6}, - { 483, 38, 6}, - { 481, 38, 6}, - { 861, 39, 6}, - { 843, 39, 6}, - { 867, 40, 6}, - { 868, 40, 6}, - { 806, 41, 6}, - { 792, 41, 6}, - { 857, 42, 6}, - { 833, 42, 6}, - { 583, 43, 6}, - { 582, 43, 6}, - { 586, 44, 6}, - { 584, 44, 6}, - { 923, 45, 6}, - {1084, 46, 6}, - {1080, 46, 6}, - { 856, 47, 6}, - { 830, 47, 6}, - { 426, 48, 6}, - { 425, 48, 6}, - { 983, 49, 6}, - { 959, 49, 6}, - { 705, 50, 6}, - { 704, 50, 6}, - { 859, 51, 6}, - { 835, 51, 6}, - { 858, 52, 6}, - { 834, 52, 6}, - { 996, 53, 6}, - {1093, 54, 6}, - {1109, 54, 6}, - { 877, 55, 6}, - { 878, 55, 6}, - { 876, 56, 6}, - { 875, 56, 6}, - { 553, 57, 7}, - { 572, 57, 7}, - { 222, 58, 8}, - { 213, 58, 8}, - { 266, 59, 9}, - { 276, 59, 9}, - { 423, 60, 10}, - { 447, 60, 10}, - { 451, 61, 11}, - { 450, 61, 11}, - { 621, 62, 12}, - { 616, 62, 12}, - { 622, 63, 13}, - { 623, 63, 13}, - { 697, 64, 14}, - { 675, 64, 14}, - { 823, 65, 15}, - { 817, 65, 15}, - { 824, 66, 16}, - { 826, 66, 16}, - { 225, 67, 6}, - { 224, 67, 6}, - { 589, 68, 17}, - { 597, 68, 17}, - { 591, 69, 18}, - { 598, 69, 18}, - { 158, 70, 6}, - { 153, 70, 6}, - { 165, 71, 6}, - { 185, 72, 6}, - { 187, 73, 6}, - { 229, 74, 6}, - { 519, 75, 6}, - { 906, 76, 6}, - {1117, 77, 6}, - {1122, 78, 6}, + { 525, 0, 0}, + { 522, 0, 0}, + { 238, 1, 1}, + { 237, 1, 1}, + {1048, 2, 2}, + {1046, 2, 2}, + {1220, 3, 3}, + {1215, 3, 3}, + { 544, 4, 4}, + { 523, 4, 4}, + {1054, 5, 5}, + {1045, 5, 5}, + { 797, 6, 6}, + { 159, 7, 6}, + { 158, 7, 6}, + { 741, 8, 6}, + { 740, 8, 6}, + {1188, 9, 6}, + {1187, 9, 6}, + { 280, 10, 6}, + { 282, 11, 6}, + { 334, 11, 6}, + { 329, 12, 6}, + { 412, 12, 6}, + { 331, 13, 6}, + { 414, 13, 6}, + { 330, 14, 6}, + { 413, 14, 6}, + { 327, 15, 6}, + { 410, 15, 6}, + { 328, 16, 6}, + { 411, 16, 6}, + { 610, 17, 6}, + { 606, 17, 6}, + { 602, 18, 6}, + { 601, 18, 6}, + {1228, 19, 6}, + {1227, 19, 6}, + {1226, 20, 6}, + {1225, 20, 6}, + { 437, 21, 6}, + { 445, 21, 6}, + { 545, 22, 6}, + { 553, 22, 6}, + { 543, 23, 6}, + { 547, 23, 6}, + { 546, 24, 6}, + { 554, 24, 6}, + {1216, 25, 6}, + {1223, 25, 6}, + {1082, 25, 6}, + { 230, 26, 6}, + { 228, 26, 6}, + { 645, 27, 6}, + { 643, 27, 6}, + { 430, 28, 6}, + { 599, 29, 6}, + {1011, 30, 6}, + {1008, 30, 6}, + {1149, 31, 6}, + {1148, 31, 6}, + { 942, 32, 6}, + { 923, 32, 6}, + { 588, 33, 6}, + { 587, 33, 6}, + { 190, 34, 6}, + { 148, 34, 6}, + { 935, 35, 6}, + { 905, 35, 6}, + { 604, 36, 6}, + { 603, 36, 6}, + { 447, 37, 6}, + { 446, 37, 6}, + { 501, 38, 6}, + { 499, 38, 6}, + { 941, 39, 6}, + { 922, 39, 6}, + { 947, 40, 6}, + { 948, 40, 6}, + { 881, 41, 6}, + { 867, 41, 6}, + { 937, 42, 6}, + { 910, 42, 6}, + { 608, 43, 6}, + { 607, 43, 6}, + { 611, 44, 6}, + { 609, 44, 6}, + {1013, 45, 6}, + {1184, 46, 6}, + {1180, 46, 6}, + { 936, 47, 6}, + { 907, 47, 6}, + { 439, 48, 6}, + { 438, 48, 6}, + {1078, 49, 6}, + {1049, 49, 6}, + { 739, 50, 6}, + { 738, 50, 6}, + { 939, 51, 6}, + { 912, 51, 6}, + { 938, 52, 6}, + { 911, 52, 6}, + {1091, 53, 6}, + {1193, 54, 6}, + {1209, 54, 6}, + { 960, 55, 6}, + { 961, 55, 6}, + { 959, 56, 6}, + { 958, 56, 6}, + { 576, 57, 7}, + { 597, 57, 7}, + { 229, 58, 8}, + { 220, 58, 8}, + { 274, 59, 9}, + { 286, 59, 9}, + { 436, 60, 10}, + { 461, 60, 10}, + { 467, 61, 11}, + { 466, 61, 11}, + { 647, 62, 12}, + { 641, 62, 12}, + { 648, 63, 13}, + { 649, 63, 13}, + { 731, 64, 14}, + { 706, 64, 14}, + { 900, 65, 15}, + { 893, 65, 15}, + { 901, 66, 16}, + { 903, 66, 16}, + { 232, 67, 6}, + { 231, 67, 6}, + { 614, 68, 17}, + { 622, 68, 17}, + { 616, 69, 18}, + { 623, 69, 18}, + { 162, 70, 6}, + { 157, 70, 6}, + { 169, 71, 6}, + { 236, 72, 6}, + { 542, 73, 6}, + { 994, 74, 6}, + {1219, 75, 6}, + {1224, 76, 6}, }; -/* properties: 580 bytes. */ +/* properties: 572 bytes. */ /* property values. */ RE_PropertyValue re_property_values[] = { - {1081, 0, 0}, - { 355, 0, 0}, - {1089, 0, 1}, - { 714, 0, 1}, - { 708, 0, 2}, - { 701, 0, 2}, - {1061, 0, 3}, - { 713, 0, 3}, - { 771, 0, 4}, - { 702, 0, 4}, - { 860, 0, 5}, - { 703, 0, 5}, - { 809, 0, 6}, - { 770, 0, 6}, - { 465, 0, 7}, - { 740, 0, 7}, - { 989, 0, 8}, - { 739, 0, 8}, - { 422, 0, 9}, - { 438, 0, 9}, - { 793, 0, 9}, - { 689, 0, 10}, - { 801, 0, 10}, - { 864, 0, 11}, - { 802, 0, 11}, - { 988, 0, 12}, - {1150, 0, 12}, - { 699, 0, 13}, - {1148, 0, 13}, - { 874, 0, 14}, - {1149, 0, 14}, - { 382, 0, 15}, - { 356, 0, 15}, - { 275, 0, 15}, - { 497, 0, 16}, - { 314, 0, 16}, - { 907, 0, 17}, - { 357, 0, 17}, - {1018, 0, 18}, - { 391, 0, 18}, - { 418, 0, 19}, - { 880, 0, 19}, - { 847, 0, 20}, - { 910, 0, 20}, - { 353, 0, 21}, - { 883, 0, 21}, - { 372, 0, 22}, - { 879, 0, 22}, - { 865, 0, 23}, - { 899, 0, 23}, - { 737, 0, 24}, - { 977, 0, 24}, - { 395, 0, 25}, - { 956, 0, 25}, - { 773, 0, 26}, - { 976, 0, 26}, - { 866, 0, 27}, - { 982, 0, 27}, - { 596, 0, 28}, - { 896, 0, 28}, - { 492, 0, 29}, - { 884, 0, 29}, - { 854, 0, 30}, - { 260, 0, 30}, - { 259, 0, 30}, - { 687, 0, 31}, - { 653, 0, 31}, - { 654, 0, 31}, - { 731, 0, 32}, - { 723, 0, 32}, - { 724, 0, 32}, - { 363, 0, 32}, - { 820, 0, 33}, - { 785, 0, 33}, - { 786, 0, 33}, - { 913, 0, 34}, - { 912, 0, 34}, - { 873, 0, 34}, - { 872, 0, 34}, - {1023, 0, 35}, - { 946, 0, 35}, - { 945, 0, 35}, - { 966, 0, 36}, - {1144, 0, 36}, - {1143, 0, 36}, - { 272, 0, 37}, - { 803, 1, 0}, - { 791, 1, 0}, - { 209, 1, 1}, - { 185, 1, 1}, - { 663, 1, 2}, - { 661, 1, 2}, - { 662, 1, 2}, - { 669, 1, 3}, - { 664, 1, 3}, - { 671, 1, 4}, - { 666, 1, 4}, - { 606, 1, 5}, - { 605, 1, 5}, - { 990, 1, 6}, - { 772, 1, 6}, - { 359, 1, 7}, - { 435, 1, 7}, - { 526, 1, 8}, - { 525, 1, 8}, - { 404, 1, 9}, - { 410, 1, 10}, - { 409, 1, 10}, - { 411, 1, 10}, - { 181, 1, 11}, - { 559, 1, 12}, - { 168, 1, 13}, - {1025, 1, 14}, - { 180, 1, 15}, - { 179, 1, 15}, - {1056, 1, 16}, - { 799, 1, 17}, - { 950, 1, 18}, - { 729, 1, 19}, - { 170, 1, 20}, - { 169, 1, 20}, - { 429, 1, 21}, - { 219, 1, 22}, - { 534, 1, 23}, - { 532, 1, 24}, - { 849, 1, 25}, - {1042, 1, 26}, - {1049, 1, 27}, - { 636, 1, 28}, - { 727, 1, 29}, - { 975, 1, 30}, - {1057, 1, 31}, - { 658, 1, 32}, - {1058, 1, 33}, - { 781, 1, 34}, - { 510, 1, 35}, - { 549, 1, 36}, - { 611, 1, 36}, - { 469, 1, 37}, - { 475, 1, 38}, - { 474, 1, 38}, - { 323, 1, 39}, - {1082, 1, 40}, - { 264, 1, 40}, - {1076, 1, 40}, - { 832, 1, 41}, - { 943, 1, 42}, - {1028, 1, 43}, - { 556, 1, 44}, - { 255, 1, 45}, - {1030, 1, 46}, - { 646, 1, 47}, - { 777, 1, 48}, - {1083, 1, 49}, - {1077, 1, 49}, - { 692, 1, 50}, - {1033, 1, 51}, - { 796, 1, 52}, - { 647, 1, 53}, - { 253, 1, 54}, - {1034, 1, 55}, - { 204, 1, 56}, - { 999, 1, 57}, - { 210, 1, 58}, - { 686, 1, 59}, - { 836, 1, 60}, - {1001, 1, 61}, - {1000, 1, 61}, + {1181, 0, 0}, + { 365, 0, 0}, + {1189, 0, 1}, + { 748, 0, 1}, + { 742, 0, 2}, + { 735, 0, 2}, + {1161, 0, 3}, + { 747, 0, 3}, + { 839, 0, 4}, + { 736, 0, 4}, + { 940, 0, 5}, + { 737, 0, 5}, + { 885, 0, 6}, + { 837, 0, 6}, + { 483, 0, 7}, + { 805, 0, 7}, + {1084, 0, 8}, + { 804, 0, 8}, + { 435, 0, 9}, + { 868, 0, 9}, + { 452, 0, 9}, + { 721, 0, 10}, + { 876, 0, 10}, + { 944, 0, 11}, + { 877, 0, 11}, + {1083, 0, 12}, + {1252, 0, 12}, + { 733, 0, 13}, + {1250, 0, 13}, + { 957, 0, 14}, + {1251, 0, 14}, + { 394, 0, 15}, + { 285, 0, 15}, + { 366, 0, 15}, + { 515, 0, 16}, + { 324, 0, 16}, + { 995, 0, 17}, + { 367, 0, 17}, + {1116, 0, 18}, + { 404, 0, 18}, + { 431, 0, 19}, + { 965, 0, 19}, + { 926, 0, 20}, + { 998, 0, 20}, + { 363, 0, 21}, + { 968, 0, 21}, + { 383, 0, 22}, + { 964, 0, 22}, + { 945, 0, 23}, + { 986, 0, 23}, + { 802, 0, 24}, + {1072, 0, 24}, + { 408, 0, 25}, + {1046, 0, 25}, + { 841, 0, 26}, + {1071, 0, 26}, + { 946, 0, 27}, + {1077, 0, 27}, + { 621, 0, 28}, + { 983, 0, 28}, + { 510, 0, 29}, + { 970, 0, 29}, + { 934, 0, 30}, + { 267, 0, 30}, + { 268, 0, 30}, + { 719, 0, 31}, + { 682, 0, 31}, + { 683, 0, 31}, + { 796, 0, 32}, + { 757, 0, 32}, + { 374, 0, 32}, + { 758, 0, 32}, + { 896, 0, 33}, + { 857, 0, 33}, + { 858, 0, 33}, + {1002, 0, 34}, + { 952, 0, 34}, + {1001, 0, 34}, + { 953, 0, 34}, + {1121, 0, 35}, + {1035, 0, 35}, + {1036, 0, 35}, + {1056, 0, 36}, + {1245, 0, 36}, + {1246, 0, 36}, + { 281, 0, 37}, + { 707, 0, 37}, + { 191, 0, 38}, + { 878, 1, 0}, + { 865, 1, 0}, + { 214, 1, 1}, + { 189, 1, 1}, + { 692, 1, 2}, + { 691, 1, 2}, + { 690, 1, 2}, + { 699, 1, 3}, + { 693, 1, 3}, + { 701, 1, 4}, + { 695, 1, 4}, + { 631, 1, 5}, + { 630, 1, 5}, + {1085, 1, 6}, + { 840, 1, 6}, + { 369, 1, 7}, + { 448, 1, 7}, + { 549, 1, 8}, + { 548, 1, 8}, + { 417, 1, 9}, + { 423, 1, 10}, + { 422, 1, 10}, + { 424, 1, 10}, + { 185, 1, 11}, + { 582, 1, 12}, + { 172, 1, 13}, + {1123, 1, 14}, + { 184, 1, 15}, + { 183, 1, 15}, + {1154, 1, 16}, + { 874, 1, 17}, + {1040, 1, 18}, + { 765, 1, 19}, + { 174, 1, 20}, + { 173, 1, 20}, + { 442, 1, 21}, + { 226, 1, 22}, + { 557, 1, 23}, + { 555, 1, 24}, + { 928, 1, 25}, + {1140, 1, 26}, + {1147, 1, 27}, + { 662, 1, 28}, + { 763, 1, 29}, + {1069, 1, 30}, + {1155, 1, 31}, + { 687, 1, 32}, + {1156, 1, 33}, + { 851, 1, 34}, + { 531, 1, 35}, + { 572, 1, 36}, + { 636, 1, 36}, + { 487, 1, 37}, + { 493, 1, 38}, + { 492, 1, 38}, + { 333, 1, 39}, + {1182, 1, 40}, + {1176, 1, 40}, + { 272, 1, 40}, + { 909, 1, 41}, + {1033, 1, 42}, + {1126, 1, 43}, + { 579, 1, 44}, + { 263, 1, 45}, + {1128, 1, 46}, + { 672, 1, 47}, + { 845, 1, 48}, + {1183, 1, 49}, + {1177, 1, 49}, + { 724, 1, 50}, + {1131, 1, 51}, + { 871, 1, 52}, + { 673, 1, 53}, + { 261, 1, 54}, + {1132, 1, 55}, + { 370, 1, 56}, + { 449, 1, 56}, + { 209, 1, 57}, + {1095, 1, 58}, + { 217, 1, 59}, + { 718, 1, 60}, + { 913, 1, 61}, {1097, 1, 62}, {1096, 1, 62}, - { 893, 1, 63}, - { 892, 1, 63}, - { 894, 1, 64}, - { 895, 1, 64}, - { 361, 1, 65}, - { 437, 1, 65}, - { 670, 1, 66}, - { 665, 1, 66}, - { 528, 1, 67}, - { 527, 1, 67}, - { 507, 1, 68}, - { 913, 1, 68}, - {1007, 1, 69}, - {1006, 1, 69}, - { 396, 1, 70}, - { 360, 1, 71}, - { 436, 1, 71}, - { 364, 1, 71}, - { 688, 1, 72}, - { 821, 1, 73}, - { 184, 1, 74}, - { 735, 1, 75}, - { 736, 1, 75}, - { 762, 1, 76}, - { 767, 1, 76}, - { 383, 1, 77}, - { 848, 1, 78}, - { 829, 1, 78}, - { 458, 1, 79}, - { 457, 1, 79}, - { 241, 1, 80}, - { 232, 1, 81}, - { 508, 1, 82}, - { 759, 1, 83}, - { 766, 1, 83}, - { 439, 1, 84}, - { 757, 1, 85}, - { 763, 1, 85}, - {1009, 1, 86}, - {1003, 1, 86}, - { 247, 1, 87}, - { 246, 1, 87}, - {1010, 1, 88}, - {1004, 1, 88}, - { 758, 1, 89}, - { 764, 1, 89}, - {1011, 1, 90}, - {1008, 1, 90}, - { 760, 1, 91}, - { 756, 1, 91}, - { 515, 1, 92}, - { 672, 1, 93}, - { 667, 1, 93}, - { 385, 1, 94}, - { 512, 1, 95}, - { 511, 1, 95}, - {1060, 1, 96}, - { 472, 1, 97}, - { 470, 1, 97}, - { 407, 1, 98}, - { 405, 1, 98}, - {1012, 1, 99}, - {1017, 1, 99}, - { 341, 1, 100}, - { 340, 1, 100}, - { 635, 1, 101}, - { 634, 1, 101}, - { 580, 1, 102}, - { 576, 1, 102}, - { 344, 1, 103}, - { 343, 1, 103}, - { 570, 1, 104}, - { 638, 1, 105}, - { 235, 1, 106}, - { 548, 1, 107}, - { 369, 1, 107}, - { 633, 1, 108}, - { 237, 1, 109}, - { 236, 1, 109}, - { 342, 1, 110}, - { 641, 1, 111}, - { 639, 1, 111}, - { 462, 1, 112}, - { 461, 1, 112}, - { 330, 1, 113}, - { 328, 1, 113}, - { 346, 1, 114}, - { 336, 1, 114}, - {1138, 1, 115}, - {1137, 1, 115}, - { 345, 1, 116}, - { 327, 1, 116}, - {1140, 1, 117}, - {1139, 1, 118}, - { 700, 1, 119}, - {1091, 1, 120}, - { 408, 1, 121}, - { 406, 1, 121}, - { 206, 1, 122}, - { 774, 1, 123}, - { 673, 1, 124}, - { 668, 1, 124}, - {1022, 1, 125}, - { 366, 1, 126}, - { 590, 1, 126}, - { 886, 1, 127}, - { 954, 1, 128}, - { 431, 1, 129}, - { 430, 1, 129}, - { 642, 1, 130}, - { 927, 1, 131}, - { 550, 1, 132}, - { 612, 1, 132}, - { 615, 1, 133}, - { 316, 1, 134}, - { 783, 1, 135}, - { 782, 1, 135}, - {1035, 1, 136}, - { 746, 1, 137}, - { 745, 1, 137}, - { 473, 1, 138}, - { 471, 1, 138}, - { 744, 1, 139}, - { 552, 1, 140}, - { 547, 1, 140}, - { 551, 1, 141}, - { 613, 1, 141}, - { 568, 1, 142}, - { 566, 1, 143}, - { 567, 1, 143}, - { 709, 1, 144}, - { 908, 1, 145}, - { 911, 1, 145}, - { 907, 1, 145}, - { 332, 1, 146}, - { 334, 1, 146}, - { 157, 1, 147}, - { 156, 1, 147}, - { 177, 1, 148}, - { 175, 1, 148}, - {1094, 1, 149}, - {1109, 1, 149}, - {1100, 1, 150}, - { 362, 1, 151}, - { 541, 1, 151}, - { 331, 1, 152}, - { 329, 1, 152}, - { 980, 1, 153}, - { 979, 1, 153}, - { 178, 1, 154}, - { 176, 1, 154}, - { 543, 1, 155}, - { 540, 1, 155}, - { 991, 1, 156}, - { 696, 1, 157}, - { 695, 1, 158}, - { 143, 1, 159}, - { 163, 1, 160}, - { 164, 1, 161}, - { 888, 1, 162}, - { 887, 1, 162}, - { 720, 1, 163}, - { 269, 1, 164}, - { 838, 1, 165}, - { 518, 1, 166}, - {1079, 1, 167}, - { 839, 1, 168}, - { 427, 1, 169}, - { 969, 1, 170}, - { 853, 1, 171}, - { 403, 1, 172}, - { 587, 1, 173}, - { 891, 1, 174}, - { 722, 1, 175}, - { 750, 1, 176}, - { 749, 1, 177}, - { 645, 1, 178}, - { 840, 1, 179}, - { 198, 1, 180}, - { 600, 1, 181}, - { 599, 1, 182}, - { 841, 1, 183}, - { 942, 1, 184}, - { 941, 1, 184}, - { 244, 1, 185}, - { 627, 1, 186}, - { 985, 1, 187}, - { 315, 1, 188}, - { 968, 1, 189}, - {1039, 1, 190}, - { 392, 1, 191}, - { 394, 1, 192}, - { 393, 1, 192}, - { 453, 1, 193}, - { 208, 1, 194}, - { 207, 1, 194}, - { 751, 1, 195}, - { 631, 1, 196}, - { 630, 1, 196}, - { 258, 1, 197}, - { 257, 1, 197}, - { 780, 1, 198}, - { 779, 1, 198}, - { 162, 1, 199}, - { 161, 1, 199}, - {1037, 1, 200}, - {1036, 1, 200}, - { 387, 1, 201}, - { 386, 1, 201}, - { 734, 1, 202}, - { 733, 1, 202}, - { 173, 1, 203}, - { 172, 1, 203}, - { 726, 1, 204}, - { 725, 1, 204}, - { 441, 1, 205}, - { 440, 1, 205}, - { 897, 1, 206}, - { 459, 1, 207}, - { 460, 1, 207}, - { 464, 1, 208}, - { 463, 1, 208}, - { 761, 1, 209}, - { 765, 1, 209}, - { 454, 1, 210}, - {1073, 1, 211}, - {1072, 1, 211}, - { 150, 1, 212}, - { 149, 1, 212}, - { 347, 1, 213}, - { 337, 1, 213}, - { 348, 1, 214}, - { 338, 1, 214}, - { 349, 1, 215}, - { 339, 1, 215}, - { 333, 1, 216}, - { 335, 1, 216}, - {1031, 1, 217}, - {1095, 1, 218}, - {1110, 1, 218}, - {1013, 1, 219}, - {1015, 1, 219}, - {1014, 1, 220}, - {1016, 1, 220}, - {1085, 2, 0}, - {1154, 2, 0}, - { 365, 2, 1}, - {1153, 2, 1}, - { 660, 2, 2}, - { 674, 2, 2}, - { 525, 2, 3}, - { 529, 2, 3}, - { 404, 2, 4}, - { 412, 2, 4}, - { 181, 2, 5}, - { 183, 2, 5}, - { 559, 2, 6}, - { 558, 2, 6}, - { 168, 2, 7}, - { 167, 2, 7}, - {1025, 2, 8}, - {1024, 2, 8}, - {1056, 2, 9}, - {1055, 2, 9}, - { 429, 2, 10}, - { 428, 2, 10}, - { 219, 2, 11}, - { 218, 2, 11}, - { 534, 2, 12}, - { 535, 2, 12}, - { 532, 2, 13}, - { 533, 2, 13}, - { 849, 2, 14}, - { 851, 2, 14}, - {1042, 2, 15}, - {1043, 2, 15}, - {1049, 2, 16}, - {1048, 2, 16}, - { 636, 2, 17}, - { 649, 2, 17}, - { 727, 2, 18}, - { 769, 2, 18}, - { 975, 2, 19}, - { 974, 2, 19}, - {1057, 2, 20}, - { 658, 2, 21}, - { 659, 2, 21}, - {1058, 2, 22}, - {1059, 2, 22}, - { 781, 2, 23}, - { 784, 2, 23}, - { 510, 2, 24}, - { 509, 2, 24}, - { 547, 2, 25}, - { 546, 2, 25}, - { 469, 2, 26}, - { 468, 2, 26}, - { 323, 2, 27}, - { 322, 2, 27}, - { 263, 2, 28}, - { 267, 2, 28}, - { 832, 2, 29}, - { 831, 2, 29}, - { 943, 2, 30}, - { 944, 2, 30}, - { 646, 2, 31}, - { 648, 2, 31}, - { 777, 2, 32}, - { 776, 2, 32}, - { 570, 2, 33}, - { 569, 2, 33}, - { 638, 2, 34}, - { 629, 2, 34}, - { 235, 2, 35}, - { 234, 2, 35}, - { 545, 2, 36}, - { 554, 2, 36}, - {1135, 2, 37}, - {1136, 2, 37}, - { 838, 2, 38}, - { 610, 2, 38}, - { 518, 2, 39}, - { 517, 2, 39}, - { 427, 2, 40}, - { 446, 2, 40}, - { 593, 2, 41}, - {1147, 2, 41}, - { 915, 2, 41}, - {1028, 2, 42}, - {1054, 2, 42}, - { 556, 2, 43}, - { 555, 2, 43}, - { 255, 2, 44}, - { 254, 2, 44}, - {1030, 2, 45}, - {1029, 2, 45}, - { 692, 2, 46}, - { 691, 2, 46}, - {1033, 2, 47}, - {1040, 2, 47}, - { 694, 2, 48}, - { 693, 2, 48}, - {1079, 2, 49}, - {1078, 2, 49}, - { 969, 2, 50}, - { 970, 2, 50}, - { 853, 2, 51}, - { 852, 2, 51}, + {1197, 1, 63}, + {1196, 1, 63}, + { 980, 1, 64}, + { 979, 1, 64}, + { 981, 1, 65}, + { 982, 1, 65}, + { 372, 1, 66}, + { 451, 1, 66}, + { 700, 1, 67}, + { 694, 1, 67}, + { 551, 1, 68}, + { 550, 1, 68}, + { 526, 1, 69}, + {1002, 1, 69}, + {1104, 1, 70}, + {1103, 1, 70}, + { 409, 1, 71}, + { 371, 1, 72}, + { 450, 1, 72}, + { 375, 1, 72}, + { 720, 1, 73}, + { 897, 1, 74}, + { 188, 1, 75}, + { 800, 1, 76}, + { 801, 1, 76}, + { 829, 1, 77}, + { 834, 1, 77}, + { 395, 1, 78}, + { 927, 1, 79}, + { 906, 1, 79}, + { 476, 1, 80}, + { 475, 1, 80}, + { 248, 1, 81}, + { 239, 1, 82}, + { 527, 1, 83}, + { 826, 1, 84}, + { 833, 1, 84}, + { 453, 1, 85}, + { 824, 1, 86}, + { 830, 1, 86}, + {1106, 1, 87}, + {1099, 1, 87}, + { 255, 1, 88}, + { 254, 1, 88}, + {1107, 1, 89}, + {1100, 1, 89}, + { 825, 1, 90}, + { 831, 1, 90}, + {1109, 1, 91}, + {1105, 1, 91}, + { 827, 1, 92}, + { 823, 1, 92}, + { 536, 1, 93}, + { 702, 1, 94}, + { 696, 1, 94}, + { 397, 1, 95}, + { 533, 1, 96}, + { 532, 1, 96}, + {1158, 1, 97}, + { 490, 1, 98}, + { 488, 1, 98}, + { 420, 1, 99}, + { 418, 1, 99}, + {1110, 1, 100}, + {1115, 1, 100}, + { 351, 1, 101}, + { 350, 1, 101}, + { 661, 1, 102}, + { 660, 1, 102}, + { 605, 1, 103}, + { 601, 1, 103}, + { 354, 1, 104}, + { 353, 1, 104}, + { 593, 1, 105}, + { 664, 1, 106}, + { 242, 1, 107}, + { 571, 1, 108}, + { 380, 1, 108}, + { 659, 1, 109}, + { 244, 1, 110}, + { 243, 1, 110}, + { 352, 1, 111}, + { 667, 1, 112}, + { 665, 1, 112}, + { 480, 1, 113}, + { 479, 1, 113}, + { 340, 1, 114}, + { 338, 1, 114}, + { 356, 1, 115}, + { 346, 1, 115}, + {1240, 1, 116}, + {1239, 1, 116}, + { 355, 1, 117}, + { 337, 1, 117}, + {1242, 1, 118}, + {1241, 1, 119}, + { 734, 1, 120}, + {1191, 1, 121}, + { 421, 1, 122}, + { 419, 1, 122}, + { 211, 1, 123}, + { 842, 1, 124}, + { 703, 1, 125}, + { 697, 1, 125}, + {1120, 1, 126}, + { 377, 1, 127}, + { 615, 1, 127}, + { 972, 1, 128}, + {1044, 1, 129}, + { 444, 1, 130}, + { 443, 1, 130}, + { 668, 1, 131}, + {1017, 1, 132}, + { 573, 1, 133}, + { 637, 1, 133}, + { 640, 1, 134}, + { 855, 1, 135}, + { 853, 1, 135}, + { 326, 1, 136}, + { 854, 1, 137}, + { 852, 1, 137}, + {1133, 1, 138}, + { 811, 1, 139}, + { 810, 1, 139}, + { 491, 1, 140}, + { 489, 1, 140}, + { 704, 1, 141}, + { 698, 1, 141}, + { 809, 1, 142}, + { 575, 1, 143}, + { 570, 1, 143}, + { 574, 1, 144}, + { 638, 1, 144}, + { 591, 1, 145}, + { 589, 1, 146}, + { 590, 1, 146}, + { 743, 1, 147}, + { 996, 1, 148}, + {1000, 1, 148}, + { 995, 1, 148}, + { 342, 1, 149}, + { 344, 1, 149}, + { 161, 1, 150}, + { 160, 1, 150}, + { 181, 1, 151}, + { 179, 1, 151}, + {1194, 1, 152}, + {1209, 1, 152}, + {1200, 1, 153}, + { 373, 1, 154}, + { 564, 1, 154}, + { 341, 1, 155}, + { 339, 1, 155}, + {1075, 1, 156}, + {1074, 1, 156}, + { 182, 1, 157}, + { 180, 1, 157}, + { 566, 1, 158}, + { 563, 1, 158}, + {1086, 1, 159}, + { 730, 1, 160}, + { 729, 1, 161}, + { 146, 1, 162}, + { 167, 1, 163}, + { 168, 1, 164}, + { 974, 1, 165}, + { 973, 1, 165}, + { 754, 1, 166}, + { 278, 1, 167}, + { 398, 1, 168}, + { 915, 1, 169}, + { 539, 1, 170}, + { 917, 1, 171}, + {1179, 1, 172}, + { 918, 1, 173}, + { 440, 1, 174}, + {1059, 1, 175}, + { 933, 1, 176}, + { 471, 1, 177}, + { 283, 1, 178}, + { 727, 1, 179}, + { 416, 1, 180}, + { 612, 1, 181}, + { 956, 1, 182}, + { 860, 1, 183}, + { 978, 1, 184}, + { 756, 1, 185}, + { 817, 1, 186}, + { 816, 1, 187}, + { 671, 1, 188}, + { 919, 1, 189}, + { 916, 1, 190}, + { 768, 1, 191}, + { 203, 1, 192}, + { 625, 1, 193}, + { 624, 1, 194}, + { 999, 1, 195}, + { 920, 1, 196}, + {1032, 1, 197}, + {1031, 1, 197}, + { 251, 1, 198}, + { 653, 1, 199}, + {1080, 1, 200}, + { 325, 1, 201}, + { 759, 1, 202}, + {1058, 1, 203}, + {1070, 1, 204}, + { 676, 1, 205}, + { 677, 1, 206}, + { 541, 1, 207}, + {1160, 1, 208}, + {1065, 1, 209}, + { 838, 1, 210}, + {1137, 1, 211}, + {1213, 1, 212}, + { 963, 1, 213}, + { 405, 1, 214}, + { 407, 1, 215}, + { 406, 1, 215}, + { 469, 1, 216}, + { 213, 1, 217}, + { 212, 1, 217}, + { 846, 1, 218}, + { 216, 1, 219}, + { 954, 1, 220}, + { 818, 1, 221}, + { 657, 1, 222}, + { 656, 1, 222}, + { 464, 1, 223}, + {1062, 1, 224}, + { 266, 1, 225}, + { 265, 1, 225}, + { 850, 1, 226}, + { 849, 1, 226}, + { 166, 1, 227}, + { 165, 1, 227}, + {1135, 1, 228}, + {1134, 1, 228}, + { 400, 1, 229}, + { 399, 1, 229}, + { 799, 1, 230}, + { 798, 1, 230}, + { 813, 1, 231}, + { 177, 1, 232}, + { 176, 1, 232}, + { 762, 1, 233}, + { 761, 1, 233}, + { 455, 1, 234}, + { 454, 1, 234}, + { 984, 1, 235}, + { 477, 1, 236}, + { 478, 1, 236}, + { 482, 1, 237}, + { 481, 1, 237}, + { 828, 1, 238}, + { 832, 1, 238}, + { 472, 1, 239}, + { 930, 1, 240}, + {1173, 1, 241}, + {1172, 1, 241}, + { 154, 1, 242}, + { 153, 1, 242}, + { 529, 1, 243}, + { 528, 1, 243}, + {1108, 1, 244}, + {1101, 1, 244}, + { 357, 1, 245}, + { 347, 1, 245}, + { 358, 1, 246}, + { 348, 1, 246}, + { 359, 1, 247}, + { 349, 1, 247}, + { 343, 1, 248}, + { 345, 1, 248}, + {1129, 1, 249}, + {1195, 1, 250}, + {1210, 1, 250}, + {1111, 1, 251}, + {1113, 1, 251}, + {1112, 1, 252}, + {1114, 1, 252}, + {1185, 2, 0}, + {1256, 2, 0}, + { 376, 2, 1}, + {1255, 2, 1}, + { 689, 2, 2}, + { 705, 2, 2}, + { 548, 2, 3}, + { 552, 2, 3}, + { 417, 2, 4}, + { 425, 2, 4}, + { 185, 2, 5}, + { 187, 2, 5}, + { 582, 2, 6}, + { 581, 2, 6}, + { 172, 2, 7}, + { 171, 2, 7}, + {1123, 2, 8}, + {1122, 2, 8}, + {1154, 2, 9}, + {1153, 2, 9}, + { 442, 2, 10}, + { 441, 2, 10}, + { 226, 2, 11}, + { 225, 2, 11}, + { 557, 2, 12}, + { 558, 2, 12}, + { 555, 2, 13}, + { 556, 2, 13}, + { 928, 2, 14}, + { 931, 2, 14}, + {1140, 2, 15}, + {1141, 2, 15}, + {1147, 2, 16}, + {1146, 2, 16}, + { 662, 2, 17}, + { 678, 2, 17}, + { 763, 2, 18}, + { 836, 2, 18}, + {1069, 2, 19}, + {1068, 2, 19}, + {1155, 2, 20}, + { 687, 2, 21}, + { 688, 2, 21}, + {1156, 2, 22}, + {1157, 2, 22}, + { 851, 2, 23}, + { 856, 2, 23}, + { 531, 2, 24}, + { 530, 2, 24}, + { 570, 2, 25}, + { 569, 2, 25}, + { 487, 2, 26}, + { 486, 2, 26}, + { 333, 2, 27}, + { 332, 2, 27}, + { 271, 2, 28}, + { 275, 2, 28}, + { 909, 2, 29}, + { 908, 2, 29}, + {1033, 2, 30}, + {1034, 2, 30}, + { 672, 2, 31}, + { 674, 2, 31}, + { 845, 2, 32}, + { 844, 2, 32}, + { 593, 2, 33}, + { 592, 2, 33}, + { 664, 2, 34}, + { 655, 2, 34}, + { 242, 2, 35}, + { 241, 2, 35}, + { 568, 2, 36}, + { 577, 2, 36}, + {1237, 2, 37}, + {1238, 2, 37}, + { 915, 2, 38}, + { 635, 2, 38}, + { 539, 2, 39}, + { 538, 2, 39}, + { 440, 2, 40}, + { 460, 2, 40}, + { 618, 2, 41}, + {1249, 2, 41}, + {1005, 2, 41}, + {1126, 2, 42}, + {1152, 2, 42}, + { 579, 2, 43}, + { 578, 2, 43}, + { 263, 2, 44}, + { 262, 2, 44}, + {1128, 2, 45}, + {1127, 2, 45}, + { 724, 2, 46}, + { 723, 2, 46}, + {1131, 2, 47}, + {1138, 2, 47}, + { 728, 2, 48}, + { 726, 2, 48}, + {1179, 2, 49}, + {1178, 2, 49}, + {1059, 2, 50}, + {1060, 2, 50}, + { 933, 2, 51}, + { 932, 2, 51}, + { 415, 2, 52}, { 402, 2, 52}, - { 389, 2, 52}, - { 246, 2, 53}, - { 245, 2, 53}, - { 253, 2, 54}, - { 252, 2, 54}, - { 385, 2, 55}, - { 384, 2, 55}, - { 914, 2, 55}, - { 796, 2, 56}, - {1041, 2, 56}, - { 515, 2, 57}, - { 514, 2, 57}, - {1060, 2, 58}, - {1053, 2, 58}, - {1022, 2, 59}, - {1021, 2, 59}, - { 839, 2, 60}, - {1127, 2, 60}, - { 645, 2, 61}, - { 644, 2, 61}, - { 204, 2, 62}, - { 203, 2, 62}, - { 392, 2, 63}, - {1128, 2, 63}, - { 891, 2, 64}, - { 890, 2, 64}, - { 886, 2, 65}, - { 885, 2, 65}, - { 799, 2, 66}, - { 800, 2, 66}, - { 999, 2, 67}, - { 998, 2, 67}, - { 686, 2, 68}, - { 685, 2, 68}, - { 836, 2, 69}, - { 837, 2, 69}, - {1091, 2, 70}, - {1092, 2, 70}, - { 954, 2, 71}, - { 953, 2, 71}, - { 642, 2, 72}, - { 628, 2, 72}, - { 927, 2, 73}, - { 936, 2, 73}, - { 720, 2, 74}, - { 719, 2, 74}, - { 269, 2, 75}, - { 268, 2, 75}, - { 722, 2, 76}, - { 721, 2, 76}, - { 316, 2, 77}, - {1034, 2, 78}, - { 657, 2, 78}, - {1035, 2, 79}, - {1044, 2, 79}, - { 198, 2, 80}, - { 199, 2, 80}, - { 453, 2, 81}, - { 452, 2, 81}, - { 950, 2, 82}, - { 951, 2, 82}, - { 700, 2, 83}, - { 206, 2, 84}, - { 205, 2, 84}, - { 615, 2, 85}, - { 614, 2, 85}, - { 744, 2, 86}, - { 778, 2, 86}, - { 587, 2, 87}, - { 182, 2, 87}, - { 840, 2, 88}, - { 952, 2, 88}, - { 600, 2, 89}, - { 909, 2, 89}, - { 599, 2, 90}, - { 889, 2, 90}, - { 841, 2, 91}, - { 850, 2, 91}, - { 627, 2, 92}, - { 651, 2, 92}, - { 210, 2, 93}, - { 211, 2, 93}, - { 244, 2, 94}, - { 243, 2, 94}, - { 729, 2, 95}, - { 728, 2, 95}, - { 315, 2, 96}, - { 261, 2, 96}, - { 749, 2, 97}, - { 747, 2, 97}, - { 750, 2, 98}, - { 748, 2, 98}, - { 751, 2, 99}, - { 898, 2, 99}, - { 968, 2, 100}, - { 972, 2, 100}, - { 985, 2, 101}, - { 984, 2, 101}, - {1039, 2, 102}, - {1038, 2, 102}, - { 640, 2, 103}, - { 854, 3, 0}, - {1129, 3, 0}, - { 444, 3, 1}, - { 445, 3, 1}, - { 973, 3, 2}, - { 992, 3, 2}, - { 560, 3, 3}, - { 571, 3, 3}, - { 390, 3, 4}, - { 690, 3, 5}, - { 795, 3, 6}, - { 801, 3, 6}, - { 482, 3, 7}, - { 924, 3, 8}, - { 929, 3, 8}, - { 497, 3, 9}, - { 495, 3, 9}, - { 638, 3, 10}, - { 625, 3, 10}, - { 152, 3, 11}, - { 676, 3, 11}, - { 752, 3, 12}, - { 768, 3, 12}, - { 753, 3, 13}, - { 770, 3, 13}, - { 754, 3, 14}, - { 738, 3, 14}, - { 822, 3, 15}, - { 818, 3, 15}, - { 484, 3, 16}, - { 479, 3, 16}, - { 854, 4, 0}, - {1129, 4, 0}, - { 390, 4, 1}, - { 690, 4, 2}, - { 382, 4, 3}, - { 355, 4, 3}, - { 482, 4, 4}, - { 479, 4, 4}, - { 924, 4, 5}, - { 929, 4, 5}, - { 989, 4, 6}, - { 977, 4, 6}, - { 653, 4, 7}, - {1090, 4, 8}, - {1027, 4, 9}, - { 715, 4, 10}, - { 717, 4, 11}, - { 905, 4, 12}, - { 854, 5, 0}, - {1129, 5, 0}, - { 390, 5, 1}, - { 690, 5, 2}, - { 482, 5, 3}, - { 479, 5, 3}, - { 965, 5, 4}, - { 960, 5, 4}, - { 497, 5, 5}, - { 495, 5, 5}, - { 986, 5, 6}, - { 706, 5, 7}, - { 703, 5, 7}, - {1087, 5, 8}, - {1086, 5, 8}, - { 842, 5, 9}, - { 676, 5, 9}, - { 822, 5, 10}, - { 818, 5, 10}, + { 254, 2, 53}, + { 253, 2, 53}, + { 261, 2, 54}, + { 260, 2, 54}, + { 397, 2, 55}, + { 396, 2, 55}, + {1004, 2, 55}, + { 871, 2, 56}, + {1139, 2, 56}, + { 536, 2, 57}, + { 535, 2, 57}, + {1158, 2, 58}, + {1151, 2, 58}, + {1120, 2, 59}, + {1119, 2, 59}, + { 918, 2, 60}, + {1229, 2, 60}, + { 671, 2, 61}, + { 670, 2, 61}, + { 209, 2, 62}, + { 208, 2, 62}, + { 405, 2, 63}, + {1230, 2, 63}, + { 978, 2, 64}, + { 977, 2, 64}, + { 972, 2, 65}, + { 971, 2, 65}, + { 874, 2, 66}, + { 875, 2, 66}, + {1095, 2, 67}, + {1094, 2, 67}, + { 718, 2, 68}, + { 717, 2, 68}, + { 913, 2, 69}, + { 914, 2, 69}, + {1191, 2, 70}, + {1192, 2, 70}, + {1044, 2, 71}, + {1043, 2, 71}, + { 668, 2, 72}, + { 654, 2, 72}, + {1017, 2, 73}, + {1026, 2, 73}, + { 754, 2, 74}, + { 753, 2, 74}, + { 278, 2, 75}, + { 277, 2, 75}, + { 756, 2, 76}, + { 755, 2, 76}, + { 326, 2, 77}, + {1132, 2, 78}, + { 686, 2, 78}, + {1133, 2, 79}, + {1142, 2, 79}, + { 203, 2, 80}, + { 204, 2, 80}, + { 469, 2, 81}, + { 468, 2, 81}, + {1040, 2, 82}, + {1041, 2, 82}, + { 734, 2, 83}, + { 211, 2, 84}, + { 210, 2, 84}, + { 640, 2, 85}, + { 639, 2, 85}, + { 809, 2, 86}, + { 848, 2, 86}, + { 612, 2, 87}, + { 186, 2, 87}, + { 919, 2, 88}, + {1042, 2, 88}, + { 625, 2, 89}, + { 997, 2, 89}, + { 624, 2, 90}, + { 975, 2, 90}, + { 920, 2, 91}, + { 929, 2, 91}, + { 653, 2, 92}, + { 680, 2, 92}, + { 217, 2, 93}, + { 218, 2, 93}, + { 251, 2, 94}, + { 250, 2, 94}, + { 765, 2, 95}, + { 764, 2, 95}, + { 325, 2, 96}, + { 269, 2, 96}, + { 816, 2, 97}, + { 814, 2, 97}, + { 817, 2, 98}, + { 815, 2, 98}, + { 818, 2, 99}, + { 985, 2, 99}, + {1058, 2, 100}, + {1063, 2, 100}, + {1080, 2, 101}, + {1079, 2, 101}, + {1137, 2, 102}, + {1136, 2, 102}, + { 283, 2, 103}, + { 147, 2, 103}, + { 216, 2, 104}, + { 215, 2, 104}, + { 464, 2, 105}, + { 463, 2, 105}, + { 471, 2, 106}, + { 470, 2, 106}, + { 541, 2, 107}, + { 540, 2, 107}, + { 954, 2, 108}, + { 595, 2, 108}, + { 676, 2, 109}, + { 675, 2, 109}, + { 727, 2, 110}, + { 725, 2, 110}, + { 759, 2, 111}, + { 760, 2, 111}, + { 768, 2, 112}, + { 767, 2, 112}, + { 813, 2, 113}, + { 812, 2, 113}, + { 838, 2, 114}, + { 846, 2, 115}, + { 847, 2, 115}, + { 916, 2, 116}, + { 863, 2, 116}, + { 860, 2, 117}, + { 866, 2, 117}, + { 956, 2, 118}, + { 955, 2, 118}, + { 963, 2, 119}, + { 962, 2, 119}, + { 917, 2, 120}, + { 969, 2, 120}, + { 999, 2, 121}, + { 976, 2, 121}, + {1065, 2, 122}, + {1064, 2, 122}, + { 677, 2, 123}, + {1066, 2, 123}, + {1160, 2, 124}, + {1159, 2, 124}, + {1213, 2, 125}, + {1212, 2, 125}, + { 666, 2, 126}, + { 596, 2, 126}, + { 934, 3, 0}, + {1231, 3, 0}, + { 458, 3, 1}, + { 459, 3, 1}, + {1067, 3, 2}, + {1087, 3, 2}, + { 583, 3, 3}, + { 594, 3, 3}, + { 403, 3, 4}, + { 722, 3, 5}, + { 870, 3, 6}, + { 876, 3, 6}, + { 500, 3, 7}, + {1014, 3, 8}, + {1019, 3, 8}, + { 515, 3, 9}, + { 513, 3, 9}, + { 664, 3, 10}, + { 651, 3, 10}, + { 156, 3, 11}, + { 708, 3, 11}, + { 819, 3, 12}, + { 835, 3, 12}, + { 820, 3, 13}, + { 837, 3, 13}, + { 821, 3, 14}, + { 803, 3, 14}, + { 899, 3, 15}, + { 894, 3, 15}, + { 502, 3, 16}, + { 497, 3, 16}, + { 934, 4, 0}, + {1231, 4, 0}, + { 403, 4, 1}, + { 722, 4, 2}, + { 394, 4, 3}, + { 365, 4, 3}, + { 500, 4, 4}, + { 497, 4, 4}, + {1014, 4, 5}, + {1019, 4, 5}, + {1084, 4, 6}, + {1072, 4, 6}, + { 682, 4, 7}, + {1190, 4, 8}, + {1125, 4, 9}, + { 749, 4, 10}, + { 751, 4, 11}, + { 993, 4, 12}, + { 990, 4, 12}, + { 934, 5, 0}, + {1231, 5, 0}, + { 403, 5, 1}, + { 722, 5, 2}, + { 500, 5, 3}, + { 497, 5, 3}, + {1055, 5, 4}, + {1050, 5, 4}, + { 515, 5, 5}, + { 513, 5, 5}, + {1081, 5, 6}, + { 740, 5, 7}, + { 737, 5, 7}, + {1187, 5, 8}, + {1186, 5, 8}, + { 921, 5, 9}, + { 708, 5, 9}, + { 899, 5, 10}, + { 894, 5, 10}, + { 197, 5, 11}, { 192, 5, 11}, - { 188, 5, 11}, - { 996, 5, 12}, - { 995, 5, 12}, - { 351, 5, 13}, - { 350, 5, 13}, - { 957, 5, 14}, - { 956, 5, 14}, - { 802, 6, 0}, - { 785, 6, 0}, - { 485, 6, 0}, - { 486, 6, 0}, - {1134, 6, 1}, - {1130, 6, 1}, - {1027, 6, 1}, - {1074, 6, 1}, - { 812, 7, 0}, - { 787, 7, 0}, - { 677, 7, 1}, - { 653, 7, 1}, - {1107, 7, 2}, - {1090, 7, 2}, - {1070, 7, 3}, - {1027, 7, 3}, - { 716, 7, 4}, - { 715, 7, 4}, - { 718, 7, 5}, - { 717, 7, 5}, - { 681, 8, 0}, - { 653, 8, 0}, - { 932, 8, 1}, - { 922, 8, 1}, - { 476, 8, 2}, - { 455, 8, 2}, - { 477, 8, 3}, - { 466, 8, 3}, - { 478, 8, 4}, - { 467, 8, 4}, - { 174, 8, 5}, - { 160, 8, 5}, - { 367, 8, 6}, - { 391, 8, 6}, - { 874, 8, 7}, - { 200, 8, 7}, - { 962, 8, 8}, - { 945, 8, 8}, - {1114, 8, 9}, - {1120, 8, 9}, - { 863, 8, 10}, - { 845, 8, 10}, + {1091, 5, 12}, + {1090, 5, 12}, + { 361, 5, 13}, + { 360, 5, 13}, + {1047, 5, 14}, + {1046, 5, 14}, + { 877, 6, 0}, + { 857, 6, 0}, + { 503, 6, 0}, + { 504, 6, 0}, + {1236, 6, 1}, + {1232, 6, 1}, + {1125, 6, 1}, + {1174, 6, 1}, + { 888, 7, 0}, + { 859, 7, 0}, + { 709, 7, 1}, + { 682, 7, 1}, + {1207, 7, 2}, + {1190, 7, 2}, + {1170, 7, 3}, + {1125, 7, 3}, + { 750, 7, 4}, + { 749, 7, 4}, + { 752, 7, 5}, + { 751, 7, 5}, + { 713, 8, 0}, + { 682, 8, 0}, + {1022, 8, 1}, + {1012, 8, 1}, + { 494, 8, 2}, + { 473, 8, 2}, + { 495, 8, 3}, + { 484, 8, 3}, + { 496, 8, 4}, + { 485, 8, 4}, + { 178, 8, 5}, + { 164, 8, 5}, + { 378, 8, 6}, + { 404, 8, 6}, + { 957, 8, 7}, + { 205, 8, 7}, + {1052, 8, 8}, + {1035, 8, 8}, + {1216, 8, 9}, + {1222, 8, 9}, + { 943, 8, 10}, + { 924, 8, 10}, + { 247, 8, 11}, { 240, 8, 11}, - { 233, 8, 11}, - { 809, 8, 12}, - { 816, 8, 12}, - { 171, 8, 13}, - { 147, 8, 13}, - { 684, 8, 14}, - { 712, 8, 14}, - { 935, 8, 15}, - { 939, 8, 15}, - { 682, 8, 16}, - { 710, 8, 16}, - { 933, 8, 17}, - { 937, 8, 17}, - { 900, 8, 18}, - { 881, 8, 18}, - { 683, 8, 19}, - { 711, 8, 19}, - { 934, 8, 20}, - { 938, 8, 20}, - { 494, 8, 21}, - { 500, 8, 21}, - { 901, 8, 22}, - { 882, 8, 22}, - { 813, 9, 0}, - { 814, 9, 0}, + { 885, 8, 12}, + { 892, 8, 12}, + { 175, 8, 13}, + { 151, 8, 13}, + { 716, 8, 14}, + { 746, 8, 14}, + {1025, 8, 15}, + {1029, 8, 15}, + { 714, 8, 16}, + { 744, 8, 16}, + {1023, 8, 17}, + {1027, 8, 17}, + { 987, 8, 18}, + { 966, 8, 18}, + { 715, 8, 19}, + { 745, 8, 19}, + {1024, 8, 20}, + {1028, 8, 20}, + { 512, 8, 21}, + { 518, 8, 21}, + { 988, 8, 22}, + { 967, 8, 22}, + { 889, 9, 0}, { 1, 9, 0}, - { 870, 9, 1}, + { 890, 9, 0}, + { 950, 9, 1}, { 2, 9, 1}, - { 869, 9, 1}, - { 819, 9, 2}, - { 798, 9, 2}, - { 119, 9, 2}, - { 632, 9, 3}, - { 652, 9, 3}, - { 126, 9, 3}, - {1101, 9, 4}, - {1108, 9, 4}, - { 132, 9, 4}, - { 277, 9, 5}, + { 949, 9, 1}, + { 895, 9, 2}, + { 122, 9, 2}, + { 873, 9, 2}, + { 658, 9, 3}, + { 129, 9, 3}, + { 681, 9, 3}, + {1201, 9, 4}, + { 135, 9, 4}, + {1208, 9, 4}, + { 287, 9, 5}, { 13, 9, 5}, - { 280, 9, 6}, - { 22, 9, 6}, - { 282, 9, 7}, - { 25, 9, 7}, - { 285, 9, 8}, - { 28, 9, 8}, - { 289, 9, 9}, - { 32, 9, 9}, - { 290, 9, 10}, - { 33, 9, 10}, - { 291, 9, 11}, - { 35, 9, 11}, - { 292, 9, 12}, - { 36, 9, 12}, - { 293, 9, 13}, - { 38, 9, 13}, - { 294, 9, 14}, - { 39, 9, 14}, - { 295, 9, 15}, - { 43, 9, 15}, - { 296, 9, 16}, - { 48, 9, 16}, - { 297, 9, 17}, - { 53, 9, 17}, - { 298, 9, 18}, - { 59, 9, 18}, - { 299, 9, 19}, - { 64, 9, 19}, - { 300, 9, 20}, - { 66, 9, 20}, - { 301, 9, 21}, - { 67, 9, 21}, - { 302, 9, 22}, - { 68, 9, 22}, - { 303, 9, 23}, - { 69, 9, 23}, - { 304, 9, 24}, - { 70, 9, 24}, - { 305, 9, 25}, - { 77, 9, 25}, - { 306, 9, 26}, - { 81, 9, 26}, - { 307, 9, 27}, - { 82, 9, 27}, - { 308, 9, 28}, - { 83, 9, 28}, - { 309, 9, 29}, - { 84, 9, 29}, - { 310, 9, 30}, - { 85, 9, 30}, - { 311, 9, 31}, - { 86, 9, 31}, - { 312, 9, 32}, - { 131, 9, 32}, - { 313, 9, 33}, - { 138, 9, 33}, - { 278, 9, 34}, - { 20, 9, 34}, - { 279, 9, 35}, - { 21, 9, 35}, - { 281, 9, 36}, - { 24, 9, 36}, - { 283, 9, 37}, - { 26, 9, 37}, - { 284, 9, 38}, - { 27, 9, 38}, - { 286, 9, 39}, - { 30, 9, 39}, - { 287, 9, 40}, - { 31, 9, 40}, + { 290, 9, 6}, + { 24, 9, 6}, + { 292, 9, 7}, + { 27, 9, 7}, + { 295, 9, 8}, + { 30, 9, 8}, + { 299, 9, 9}, + { 35, 9, 9}, + { 300, 9, 10}, + { 36, 9, 10}, + { 301, 9, 11}, + { 38, 9, 11}, + { 302, 9, 12}, + { 39, 9, 12}, + { 303, 9, 13}, + { 41, 9, 13}, + { 304, 9, 14}, + { 42, 9, 14}, + { 305, 9, 15}, + { 46, 9, 15}, + { 306, 9, 16}, + { 51, 9, 16}, + { 307, 9, 17}, + { 56, 9, 17}, + { 308, 9, 18}, + { 62, 9, 18}, + { 309, 9, 19}, + { 67, 9, 19}, + { 310, 9, 20}, + { 69, 9, 20}, + { 311, 9, 21}, + { 70, 9, 21}, + { 312, 9, 22}, + { 71, 9, 22}, + { 313, 9, 23}, + { 72, 9, 23}, + { 314, 9, 24}, + { 73, 9, 24}, + { 315, 9, 25}, + { 80, 9, 25}, + { 316, 9, 26}, + { 84, 9, 26}, + { 317, 9, 27}, + { 85, 9, 27}, + { 318, 9, 28}, + { 86, 9, 28}, + { 319, 9, 29}, + { 87, 9, 29}, + { 320, 9, 30}, + { 88, 9, 30}, + { 321, 9, 31}, + { 89, 9, 31}, + { 322, 9, 32}, + { 134, 9, 32}, + { 323, 9, 33}, + { 141, 9, 33}, + { 288, 9, 34}, + { 22, 9, 34}, + { 289, 9, 35}, + { 23, 9, 35}, + { 291, 9, 36}, + { 26, 9, 36}, + { 293, 9, 37}, + { 28, 9, 37}, + { 294, 9, 38}, + { 29, 9, 38}, + { 296, 9, 39}, + { 32, 9, 39}, + { 297, 9, 40}, + { 33, 9, 40}, + { 200, 9, 41}, + { 50, 9, 41}, { 195, 9, 41}, - { 191, 9, 41}, - { 47, 9, 41}, + { 198, 9, 42}, + { 52, 9, 42}, { 193, 9, 42}, - { 189, 9, 42}, - { 49, 9, 42}, + { 199, 9, 43}, + { 53, 9, 43}, { 194, 9, 43}, - { 190, 9, 43}, - { 50, 9, 43}, - { 216, 9, 44}, - { 52, 9, 44}, - { 228, 9, 44}, - { 215, 9, 45}, - { 200, 9, 45}, - { 54, 9, 45}, - { 217, 9, 46}, - { 242, 9, 46}, - { 55, 9, 46}, - { 678, 9, 47}, - { 653, 9, 47}, - { 56, 9, 47}, - { 930, 9, 48}, - { 57, 9, 48}, - { 922, 9, 48}, - { 141, 9, 49}, - { 147, 9, 49}, - { 58, 9, 49}, - { 140, 9, 50}, - { 139, 9, 50}, - { 60, 9, 50}, - { 142, 9, 51}, - { 166, 9, 51}, - { 61, 9, 51}, - { 443, 9, 52}, - { 419, 9, 52}, - { 62, 9, 52}, - { 442, 9, 53}, - { 63, 9, 53}, - { 414, 9, 53}, - { 604, 9, 54}, - { 607, 9, 54}, - { 65, 9, 54}, - { 288, 9, 55}, + { 223, 9, 44}, + { 55, 9, 44}, + { 235, 9, 44}, + { 222, 9, 45}, + { 57, 9, 45}, + { 205, 9, 45}, + { 224, 9, 46}, + { 58, 9, 46}, + { 249, 9, 46}, + { 710, 9, 47}, + { 59, 9, 47}, + { 682, 9, 47}, + {1020, 9, 48}, + { 60, 9, 48}, + {1012, 9, 48}, + { 144, 9, 49}, + { 61, 9, 49}, + { 151, 9, 49}, + { 143, 9, 50}, + { 63, 9, 50}, + { 142, 9, 50}, + { 145, 9, 51}, + { 64, 9, 51}, + { 170, 9, 51}, + { 457, 9, 52}, + { 65, 9, 52}, + { 432, 9, 52}, + { 456, 9, 53}, + { 66, 9, 53}, + { 427, 9, 53}, + { 629, 9, 54}, + { 68, 9, 54}, + { 632, 9, 54}, + { 298, 9, 55}, + { 34, 9, 55}, + { 201, 9, 56}, + { 47, 9, 56}, { 196, 9, 56}, - { 807, 10, 0}, - { 265, 10, 1}, - { 262, 10, 1}, + { 882, 10, 0}, + { 273, 10, 1}, + { 270, 10, 1}, + { 379, 10, 2}, { 368, 10, 2}, - { 358, 10, 2}, - { 496, 10, 3}, - { 804, 10, 4}, - { 791, 10, 4}, - { 595, 10, 5}, - { 594, 10, 5}, - { 742, 10, 6}, - { 741, 10, 6}, - { 491, 10, 7}, - { 490, 10, 7}, - { 609, 10, 8}, - { 608, 10, 8}, - { 325, 10, 9}, - { 456, 10, 9}, - {1005, 10, 10}, - {1002, 10, 10}, - { 997, 10, 11}, - {1099, 10, 12}, - {1098, 10, 12}, - {1115, 10, 13}, - { 790, 10, 14}, - { 789, 10, 14}, - { 978, 10, 15}, - { 981, 10, 15}, - { 994, 10, 16}, - { 993, 10, 16}, - { 499, 10, 17}, - { 498, 10, 17}, - { 794, 11, 0}, - { 785, 11, 0}, - { 159, 11, 1}, - { 139, 11, 1}, - { 542, 11, 2}, - { 536, 11, 2}, - {1115, 11, 3}, - {1111, 11, 3}, - { 501, 11, 4}, - { 485, 11, 4}, - { 790, 11, 5}, - { 787, 11, 5}, - { 805, 12, 0}, - { 146, 12, 1}, - { 148, 12, 2}, - { 151, 12, 3}, - { 214, 12, 4}, - { 220, 12, 5}, - { 415, 12, 6}, - { 416, 12, 7}, - { 449, 12, 8}, - { 489, 12, 9}, - { 493, 12, 10}, - { 502, 12, 11}, - { 503, 12, 12}, - { 539, 12, 13}, - { 544, 12, 14}, - {1047, 12, 14}, - { 557, 12, 15}, - { 561, 12, 16}, - { 562, 12, 17}, - { 563, 12, 18}, - { 626, 12, 19}, - { 637, 12, 20}, - { 650, 12, 21}, - { 655, 12, 22}, - { 656, 12, 23}, - { 743, 12, 24}, - { 755, 12, 25}, - { 811, 12, 26}, - { 825, 12, 27}, - { 883, 12, 28}, - { 916, 12, 29}, - { 917, 12, 30}, - { 926, 12, 31}, - { 928, 12, 32}, - { 948, 12, 33}, - { 949, 12, 34}, - { 961, 12, 35}, - { 963, 12, 36}, - { 971, 12, 37}, - {1019, 12, 38}, - {1032, 12, 39}, - {1045, 12, 40}, - {1046, 12, 41}, - {1052, 12, 42}, - {1112, 12, 43}, - {1026, 12, 44}, - {1131, 12, 45}, - {1132, 12, 46}, - {1133, 12, 47}, - {1141, 12, 48}, - {1142, 12, 49}, - {1145, 12, 50}, - {1146, 12, 51}, - { 643, 12, 52}, - { 488, 12, 53}, - { 256, 12, 54}, - { 487, 12, 55}, - { 827, 12, 56}, - { 940, 12, 57}, - { 808, 13, 0}, - {1075, 13, 0}, - { 619, 13, 1}, - { 259, 13, 1}, - { 448, 13, 2}, - { 413, 13, 2}, - { 931, 13, 3}, - { 922, 13, 3}, - { 680, 13, 4}, - { 653, 13, 4}, - {1071, 13, 5}, - {1027, 13, 5}, - {1085, 14, 0}, - {1129, 14, 0}, - { 847, 14, 1}, - { 846, 14, 1}, - { 353, 14, 2}, - { 350, 14, 2}, - { 920, 14, 3}, - { 919, 14, 3}, - { 516, 14, 4}, - { 513, 14, 4}, - { 810, 14, 5}, - { 815, 14, 5}, - { 480, 14, 6}, - { 479, 14, 6}, - { 251, 14, 7}, - {1020, 14, 7}, - { 592, 14, 8}, - { 607, 14, 8}, - { 904, 14, 9}, - { 903, 14, 9}, - { 902, 14, 10}, - { 899, 14, 10}, - { 822, 14, 11}, - { 818, 14, 11}, - { 155, 14, 12}, - { 147, 14, 12}, - { 579, 14, 13}, - { 575, 14, 13}, - { 601, 14, 14}, - { 602, 14, 14}, - { 588, 14, 14}, - { 574, 14, 15}, - { 573, 14, 15}, - { 363, 14, 16}, - { 354, 14, 16}, - { 249, 14, 17}, - { 212, 14, 17}, - { 248, 14, 18}, - { 202, 14, 18}, - { 987, 14, 19}, - { 986, 14, 19}, - { 730, 14, 20}, - { 227, 14, 20}, - { 270, 14, 21}, - { 390, 14, 21}, - { 698, 14, 22}, - { 690, 14, 22}, - { 381, 14, 23}, - { 274, 14, 23}, - { 370, 14, 24}, - { 947, 14, 24}, - { 159, 14, 25}, - { 145, 14, 25}, - { 250, 14, 26}, - { 201, 14, 26}, - {1018, 14, 27}, - { 967, 14, 27}, - {1152, 14, 28}, - {1151, 14, 28}, - { 797, 14, 29}, - { 801, 14, 29}, - {1119, 14, 30}, - {1116, 14, 30}, - { 617, 14, 31}, - { 624, 14, 32}, - { 623, 14, 33}, - { 537, 14, 34}, - { 538, 14, 35}, - { 352, 14, 36}, - { 388, 14, 36}, - { 560, 14, 37}, - { 571, 14, 37}, - { 371, 14, 38}, - { 326, 14, 38}, - { 924, 14, 39}, - { 929, 14, 39}, - { 807, 15, 0}, - { 822, 15, 1}, - { 818, 15, 1}, - { 438, 15, 2}, - { 432, 15, 2}, - { 421, 15, 3}, - { 420, 15, 3}, - { 788, 16, 0}, + { 514, 10, 3}, + { 879, 10, 4}, + { 865, 10, 4}, + { 620, 10, 5}, + { 619, 10, 5}, + { 807, 10, 6}, + { 806, 10, 6}, + { 509, 10, 7}, + { 508, 10, 7}, + { 634, 10, 8}, + { 633, 10, 8}, + { 335, 10, 9}, + { 474, 10, 9}, + {1102, 10, 10}, + {1098, 10, 10}, + {1093, 10, 11}, + {1199, 10, 12}, + {1198, 10, 12}, + {1217, 10, 13}, + { 864, 10, 14}, + { 862, 10, 14}, + {1073, 10, 15}, + {1076, 10, 15}, + {1089, 10, 16}, + {1088, 10, 16}, + { 517, 10, 17}, + { 516, 10, 17}, + { 869, 11, 0}, + { 857, 11, 0}, + { 163, 11, 1}, + { 142, 11, 1}, + { 565, 11, 2}, + { 559, 11, 2}, + {1217, 11, 3}, + {1211, 11, 3}, + { 519, 11, 4}, + { 503, 11, 4}, + { 864, 11, 5}, + { 859, 11, 5}, + { 880, 12, 0}, + { 150, 12, 1}, + { 152, 12, 2}, + { 155, 12, 3}, + { 221, 12, 4}, + { 227, 12, 5}, + { 428, 12, 6}, + { 429, 12, 7}, + { 465, 12, 8}, + { 507, 12, 9}, + { 511, 12, 10}, + { 520, 12, 11}, + { 521, 12, 12}, + { 562, 12, 13}, + { 567, 12, 14}, + {1145, 12, 14}, + { 580, 12, 15}, + { 584, 12, 16}, + { 585, 12, 17}, + { 586, 12, 18}, + { 652, 12, 19}, + { 663, 12, 20}, + { 679, 12, 21}, + { 684, 12, 22}, + { 685, 12, 23}, + { 808, 12, 24}, + { 822, 12, 25}, + { 887, 12, 26}, + { 902, 12, 27}, + { 968, 12, 28}, + {1006, 12, 29}, + {1007, 12, 30}, + {1016, 12, 31}, + {1018, 12, 32}, + {1038, 12, 33}, + {1039, 12, 34}, + {1051, 12, 35}, + {1053, 12, 36}, + {1061, 12, 37}, + {1117, 12, 38}, + {1130, 12, 39}, + {1143, 12, 40}, + {1144, 12, 41}, + {1150, 12, 42}, + {1214, 12, 43}, + {1124, 12, 44}, + {1233, 12, 45}, + {1234, 12, 46}, + {1235, 12, 47}, + {1243, 12, 48}, + {1244, 12, 49}, + {1247, 12, 50}, + {1248, 12, 51}, + { 669, 12, 52}, + { 506, 12, 53}, + { 264, 12, 54}, + { 505, 12, 55}, + { 904, 12, 56}, + {1030, 12, 57}, + {1092, 12, 58}, + { 769, 12, 59}, + { 770, 12, 60}, + { 771, 12, 61}, + { 772, 12, 62}, + { 773, 12, 63}, + { 774, 12, 64}, + { 775, 12, 65}, + { 776, 12, 66}, + { 777, 12, 67}, + { 778, 12, 68}, + { 779, 12, 69}, + { 780, 12, 70}, + { 781, 12, 71}, + { 782, 12, 72}, + { 783, 12, 73}, + { 784, 12, 74}, + { 785, 12, 75}, + { 786, 12, 76}, + { 787, 12, 77}, + { 788, 12, 78}, + { 789, 12, 79}, + { 790, 12, 80}, + { 791, 12, 81}, + { 792, 12, 82}, + { 793, 12, 83}, + { 794, 12, 84}, + { 795, 12, 85}, + { 884, 13, 0}, + {1175, 13, 0}, + { 644, 13, 1}, + { 267, 13, 1}, + { 462, 13, 2}, + { 426, 13, 2}, + {1021, 13, 3}, + {1012, 13, 3}, + { 712, 13, 4}, + { 682, 13, 4}, + {1171, 13, 5}, + {1125, 13, 5}, + {1185, 14, 0}, + {1231, 14, 0}, + { 926, 14, 1}, + { 925, 14, 1}, + { 363, 14, 2}, + { 360, 14, 2}, + {1010, 14, 3}, + {1009, 14, 3}, + { 537, 14, 4}, + { 534, 14, 4}, + { 886, 14, 5}, + { 891, 14, 5}, + { 498, 14, 6}, + { 497, 14, 6}, + { 259, 14, 7}, + {1118, 14, 7}, + { 617, 14, 8}, + { 632, 14, 8}, + { 992, 14, 9}, + { 991, 14, 9}, + { 989, 14, 10}, + { 986, 14, 10}, + { 899, 14, 11}, + { 894, 14, 11}, + { 159, 14, 12}, + { 151, 14, 12}, + { 604, 14, 13}, + { 600, 14, 13}, + { 626, 14, 14}, + { 613, 14, 14}, + { 627, 14, 14}, + { 599, 14, 15}, + { 598, 14, 15}, + { 374, 14, 16}, + { 364, 14, 16}, + { 257, 14, 17}, + { 219, 14, 17}, + { 256, 14, 18}, + { 207, 14, 18}, + {1082, 14, 19}, + {1081, 14, 19}, + { 766, 14, 20}, + { 234, 14, 20}, + { 279, 14, 21}, + { 403, 14, 21}, + { 732, 14, 22}, + { 722, 14, 22}, + { 393, 14, 23}, + { 284, 14, 23}, + { 381, 14, 24}, + {1037, 14, 24}, + { 163, 14, 25}, + { 149, 14, 25}, + { 258, 14, 26}, + { 206, 14, 26}, + {1116, 14, 27}, + {1057, 14, 27}, + {1254, 14, 28}, + {1253, 14, 28}, + { 872, 14, 29}, + { 876, 14, 29}, + {1221, 14, 30}, + {1218, 14, 30}, + { 642, 14, 31}, + { 650, 14, 32}, + { 649, 14, 33}, + { 560, 14, 34}, + { 561, 14, 35}, + { 362, 14, 36}, + { 401, 14, 36}, + { 583, 14, 37}, + { 594, 14, 37}, + { 382, 14, 38}, + { 336, 14, 38}, + {1014, 14, 39}, + {1019, 14, 39}, + { 882, 15, 0}, + { 899, 15, 1}, + { 894, 15, 1}, + { 452, 15, 2}, + { 445, 15, 2}, + { 434, 15, 3}, + { 433, 15, 3}, + { 861, 16, 0}, { 0, 16, 1}, { 1, 16, 2}, { 4, 16, 3}, @@ -2430,153 +2649,164 @@ RE_PropertyValue re_property_values[] = { { 11, 16, 6}, { 10, 16, 7}, { 9, 16, 8}, - { 72, 16, 9}, + { 75, 16, 9}, { 8, 16, 10}, { 7, 16, 11}, { 6, 16, 12}, - { 76, 16, 13}, - { 42, 16, 14}, + { 79, 16, 13}, + { 45, 16, 14}, { 5, 16, 15}, - { 75, 16, 16}, - { 109, 16, 17}, - { 41, 16, 18}, - { 74, 16, 19}, - { 91, 16, 20}, - { 108, 16, 21}, - { 121, 16, 22}, + { 78, 16, 16}, + { 112, 16, 17}, + { 44, 16, 18}, + { 77, 16, 19}, + { 94, 16, 20}, + { 111, 16, 21}, + { 124, 16, 22}, { 2, 16, 23}, - { 73, 16, 24}, - { 40, 16, 25}, - { 107, 16, 26}, - { 71, 16, 27}, - { 120, 16, 28}, - { 90, 16, 29}, - { 133, 16, 30}, - { 106, 16, 31}, - { 23, 16, 32}, - { 114, 16, 33}, - { 29, 16, 34}, - { 119, 16, 35}, - { 34, 16, 36}, - { 126, 16, 37}, - { 37, 16, 38}, - { 132, 16, 39}, + { 76, 16, 24}, + { 43, 16, 25}, + { 110, 16, 26}, + { 74, 16, 27}, + { 123, 16, 28}, + { 93, 16, 29}, + { 136, 16, 30}, + { 109, 16, 31}, + { 25, 16, 32}, + { 117, 16, 33}, + { 31, 16, 34}, + { 122, 16, 35}, + { 37, 16, 36}, + { 129, 16, 37}, + { 40, 16, 38}, + { 135, 16, 39}, { 13, 16, 40}, - { 22, 16, 41}, - { 25, 16, 42}, - { 28, 16, 43}, - { 32, 16, 44}, - { 33, 16, 45}, - { 35, 16, 46}, - { 36, 16, 47}, - { 38, 16, 48}, - { 39, 16, 49}, - { 43, 16, 50}, - { 48, 16, 51}, - { 53, 16, 52}, - { 59, 16, 53}, - { 64, 16, 54}, - { 66, 16, 55}, - { 67, 16, 56}, - { 68, 16, 57}, - { 69, 16, 58}, - { 70, 16, 59}, - { 77, 16, 60}, - { 81, 16, 61}, - { 82, 16, 62}, - { 83, 16, 63}, - { 84, 16, 64}, - { 85, 16, 65}, - { 86, 16, 66}, - { 87, 16, 67}, - { 88, 16, 68}, - { 89, 16, 69}, - { 92, 16, 70}, - { 96, 16, 71}, - { 97, 16, 72}, - { 98, 16, 73}, - { 100, 16, 74}, - { 101, 16, 75}, - { 102, 16, 76}, - { 103, 16, 77}, - { 104, 16, 78}, - { 105, 16, 79}, - { 110, 16, 80}, - { 115, 16, 81}, - { 122, 16, 82}, - { 127, 16, 83}, - { 134, 16, 84}, + { 24, 16, 41}, + { 27, 16, 42}, + { 30, 16, 43}, + { 35, 16, 44}, + { 36, 16, 45}, + { 38, 16, 46}, + { 39, 16, 47}, + { 41, 16, 48}, + { 42, 16, 49}, + { 46, 16, 50}, + { 51, 16, 51}, + { 56, 16, 52}, + { 62, 16, 53}, + { 67, 16, 54}, + { 69, 16, 55}, + { 70, 16, 56}, + { 71, 16, 57}, + { 72, 16, 58}, + { 73, 16, 59}, + { 80, 16, 60}, + { 84, 16, 61}, + { 85, 16, 62}, + { 86, 16, 63}, + { 87, 16, 64}, + { 88, 16, 65}, + { 89, 16, 66}, + { 90, 16, 67}, + { 91, 16, 68}, + { 92, 16, 69}, + { 95, 16, 70}, + { 99, 16, 71}, + { 100, 16, 72}, + { 101, 16, 73}, + { 103, 16, 74}, + { 104, 16, 75}, + { 105, 16, 76}, + { 106, 16, 77}, + { 107, 16, 78}, + { 108, 16, 79}, + { 113, 16, 80}, + { 118, 16, 81}, + { 125, 16, 82}, + { 130, 16, 83}, + { 137, 16, 84}, { 14, 16, 85}, - { 44, 16, 86}, - { 78, 16, 87}, - { 93, 16, 88}, - { 111, 16, 89}, - { 116, 16, 90}, - { 123, 16, 91}, - { 128, 16, 92}, - { 135, 16, 93}, + { 47, 16, 86}, + { 81, 16, 87}, + { 96, 16, 88}, + { 114, 16, 89}, + { 119, 16, 90}, + { 126, 16, 91}, + { 131, 16, 92}, + { 138, 16, 93}, { 15, 16, 94}, - { 45, 16, 95}, - { 79, 16, 96}, - { 94, 16, 97}, - { 112, 16, 98}, - { 117, 16, 99}, - { 124, 16, 100}, - { 129, 16, 101}, - { 136, 16, 102}, + { 48, 16, 95}, + { 82, 16, 96}, + { 97, 16, 97}, + { 115, 16, 98}, + { 120, 16, 99}, + { 127, 16, 100}, + { 132, 16, 101}, + { 139, 16, 102}, { 16, 16, 103}, - { 46, 16, 104}, - { 80, 16, 105}, - { 95, 16, 106}, - { 113, 16, 107}, - { 118, 16, 108}, - { 125, 16, 109}, - { 130, 16, 110}, - { 137, 16, 111}, + { 49, 16, 104}, + { 83, 16, 105}, + { 98, 16, 106}, + { 116, 16, 107}, + { 121, 16, 108}, + { 128, 16, 109}, + { 133, 16, 110}, + { 140, 16, 111}, { 17, 16, 112}, - { 51, 16, 113}, - { 99, 16, 114}, + { 54, 16, 113}, + { 102, 16, 114}, { 18, 16, 115}, { 19, 16, 116}, - { 787, 17, 0}, - { 930, 17, 1}, - { 678, 17, 2}, - {1103, 17, 3}, - { 679, 17, 4}, - {1064, 17, 5}, - { 238, 17, 6}, - {1065, 17, 7}, - {1069, 17, 8}, - {1067, 17, 9}, - {1068, 17, 10}, - { 239, 17, 11}, - {1066, 17, 12}, - { 871, 17, 13}, - { 603, 17, 14}, - { 854, 18, 0}, - { 226, 18, 1}, - {1102, 18, 2}, - { 197, 18, 3}, - { 819, 18, 4}, - {1101, 18, 5}, - {1106, 18, 6}, - {1105, 18, 7}, - {1104, 18, 8}, - { 378, 18, 9}, - { 373, 18, 10}, - { 374, 18, 11}, - { 379, 18, 12}, - { 380, 18, 13}, - { 377, 18, 14}, - { 375, 18, 15}, - { 376, 18, 16}, - { 775, 18, 17}, - {1062, 18, 18}, - {1063, 18, 19}, - { 925, 18, 20}, + { 20, 16, 117}, + { 21, 16, 118}, + { 859, 17, 0}, + {1020, 17, 1}, + { 710, 17, 2}, + {1203, 17, 3}, + { 711, 17, 4}, + {1164, 17, 5}, + { 245, 17, 6}, + {1165, 17, 7}, + {1169, 17, 8}, + {1167, 17, 9}, + {1168, 17, 10}, + { 246, 17, 11}, + {1166, 17, 12}, + { 951, 17, 13}, + { 934, 18, 0}, + { 233, 18, 1}, + {1202, 18, 2}, + { 202, 18, 3}, + { 895, 18, 4}, + {1201, 18, 5}, + {1003, 18, 6}, + { 628, 18, 7}, + {1206, 18, 8}, + {1205, 18, 9}, + {1204, 18, 10}, + { 389, 18, 11}, + { 384, 18, 12}, + { 385, 18, 13}, + { 390, 18, 14}, + { 392, 18, 15}, + { 391, 18, 16}, + { 388, 18, 17}, + { 386, 18, 18}, + { 387, 18, 19}, + { 843, 18, 20}, + {1162, 18, 21}, + {1163, 18, 22}, + { 524, 18, 23}, + { 276, 18, 24}, + {1015, 18, 25}, + { 883, 18, 26}, + { 646, 18, 27}, + { 898, 18, 28}, + { 896, 18, 29}, + { 252, 18, 30}, }; -/* property values: 4976 bytes. */ +/* property values: 5488 bytes. */ /* Codepoints which expand on full case-folding. */ @@ -2601,40 +2831,23 @@ RE_UINT16 re_expand_on_folding[] = { /* General_Category. */ static RE_UINT8 re_general_category_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 14, 14, 14, 15, - 16, 17, 18, 19, 20, 19, 21, 19, 19, 19, 19, 19, 19, 22, 19, 19, - 19, 19, 19, 19, 19, 19, 23, 19, 19, 19, 24, 19, 19, 25, 26, 19, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 27, 7, 28, 29, 19, 19, 19, 19, 19, 19, 19, 30, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 31, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 32, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 32, + 0, 1, 2, 3, 4, 5, 5, 5, 5, 6, 7, 5, 5, 8, 9, 10, + 11, 12, 13, 14, 15, 15, 16, 15, 15, 15, 15, 17, 15, 18, 19, 20, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, 22, 15, 15, 15, 23, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 24, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, }; static RE_UINT8 re_general_category_stage_2[] = { @@ -2646,31 +2859,50 @@ static RE_UINT8 re_general_category_stage_2[] = { 69, 69, 64, 74, 64, 64, 75, 76, 77, 78, 79, 80, 81, 82, 69, 83, 84, 85, 86, 87, 88, 89, 69, 69, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 90, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 91, 92, 34, 34, 34, 34, 34, 34, 34, 34, 93, 34, 34, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 106, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 34, 34, 109, 110, 111, 112, 34, 34, 113, 114, 115, 116, 117, 118, - 119, 120, 121, 122, 76, 123, 124, 125, 126, 127, 76, 76, 76, 76, 76, 76, - 128, 76, 129, 130, 131, 76, 132, 76, 133, 76, 76, 76, 134, 76, 76, 76, - 135, 136, 137, 138, 76, 76, 76, 76, 76, 76, 76, 76, 76, 139, 76, 76, - 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, - 34, 34, 34, 34, 34, 34, 140, 76, 141, 76, 76, 76, 76, 76, 76, 76, - 34, 34, 34, 34, 34, 34, 34, 34, 142, 76, 76, 76, 76, 76, 76, 76, - 34, 34, 34, 34, 143, 76, 76, 76, 76, 76, 76, 76, 76, 76, 144, 145, - 146, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, - 69, 147, 148, 149, 150, 76, 151, 76, 152, 153, 154, 155, 156, 157, 158, 159, - 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 160, 161, 76, 76, - 162, 163, 164, 165, 166, 76, 167, 168, 169, 170, 171, 172, 173, 174, 175, 76, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 176, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 177, 34, - 178, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, - 34, 34, 34, 34, 178, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, - 179, 76, 180, 181, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, - 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 182, + 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 123, 34, 34, 130, 123, + 131, 132, 133, 134, 135, 136, 137, 138, 139, 123, 123, 123, 140, 123, 123, 123, + 141, 142, 143, 144, 145, 146, 147, 123, 123, 148, 123, 149, 150, 151, 123, 123, + 123, 152, 123, 123, 123, 153, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 34, 34, 34, 34, 34, 34, 34, 154, 155, 123, 123, 123, 123, 123, 123, 123, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 34, 34, 34, 34, 34, 34, 34, 34, 156, 123, 123, 123, 123, 123, 123, 123, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 34, 34, 34, 34, 157, 158, 159, 160, 123, 123, 123, 123, 123, 123, 161, 162, + 163, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 123, 123, 123, 123, 123, 123, 123, 123, 164, 165, 123, 123, 123, 123, 123, 123, + 69, 166, 167, 168, 169, 123, 170, 123, 171, 172, 173, 174, 175, 176, 177, 178, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 34, 179, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 180, 181, 123, 123, + 182, 183, 184, 185, 186, 123, 187, 188, 69, 189, 190, 191, 192, 193, 194, 195, + 196, 197, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 198, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 199, 34, + 200, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 34, 34, 34, 34, 200, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 201, 123, 202, 203, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 204, }; static RE_UINT16 re_general_category_stage_3[] = { @@ -2679,93 +2911,104 @@ static RE_UINT16 re_general_category_stage_3[] = { 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39, 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13, - 13, 13, 42, 43, 9, 44, 45, 11, 46, 47, 32, 48, 49, 50, 51, 52, - 53, 54, 50, 50, 55, 32, 56, 57, 50, 50, 50, 50, 50, 58, 59, 60, - 61, 62, 50, 32, 63, 50, 50, 50, 50, 50, 64, 65, 66, 50, 67, 68, - 50, 69, 70, 71, 50, 72, 73, 73, 73, 73, 74, 73, 73, 73, 75, 76, - 77, 50, 50, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, - 91, 84, 85, 92, 93, 94, 95, 96, 97, 98, 85, 99, 100, 101, 89, 102, - 83, 84, 85, 103, 104, 105, 89, 106, 107, 108, 109, 110, 111, 112, 95, 113, - 114, 115, 85, 116, 117, 118, 89, 119, 120, 115, 85, 121, 122, 123, 89, 124, - 120, 115, 50, 125, 126, 127, 89, 128, 129, 130, 50, 131, 132, 133, 73, 134, - 135, 50, 50, 136, 137, 138, 73, 73, 139, 140, 141, 142, 143, 144, 73, 73, - 145, 146, 147, 148, 149, 50, 150, 151, 152, 153, 32, 154, 155, 156, 73, 73, - 50, 50, 157, 158, 159, 160, 161, 162, 163, 164, 9, 9, 165, 50, 50, 166, - 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 167, 168, 50, 50, - 167, 50, 50, 169, 170, 171, 50, 50, 50, 170, 50, 50, 50, 172, 173, 174, - 50, 175, 50, 50, 50, 50, 50, 176, 177, 50, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50, 50, 50, 178, 50, 179, 180, 50, 50, 50, 50, 181, 182, - 183, 184, 50, 185, 50, 186, 183, 187, 50, 50, 50, 188, 189, 190, 191, 192, - 193, 191, 50, 50, 194, 50, 50, 195, 50, 50, 196, 50, 50, 50, 50, 197, - 50, 150, 198, 199, 200, 50, 201, 176, 50, 50, 202, 203, 204, 205, 206, 206, - 50, 207, 50, 50, 50, 208, 209, 210, 191, 191, 211, 73, 73, 73, 73, 73, - 212, 50, 50, 213, 214, 159, 215, 216, 217, 50, 218, 66, 50, 50, 219, 220, - 50, 50, 221, 222, 223, 66, 50, 224, 73, 73, 73, 73, 225, 226, 227, 228, - 11, 11, 229, 27, 27, 27, 230, 231, 11, 232, 27, 27, 32, 32, 233, 234, + 13, 13, 13, 42, 9, 43, 44, 11, 45, 46, 32, 47, 48, 49, 50, 51, + 52, 53, 49, 49, 54, 32, 55, 56, 49, 49, 49, 49, 49, 57, 58, 59, + 60, 61, 49, 32, 62, 49, 49, 49, 49, 49, 63, 64, 65, 49, 66, 67, + 49, 68, 69, 70, 49, 71, 72, 72, 72, 72, 49, 73, 72, 72, 74, 32, + 75, 49, 49, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, + 89, 82, 83, 90, 91, 92, 93, 94, 95, 96, 83, 97, 98, 99, 87, 100, + 101, 82, 83, 102, 103, 104, 87, 105, 106, 107, 108, 109, 110, 111, 93, 112, + 113, 114, 83, 115, 116, 117, 87, 118, 119, 114, 83, 120, 121, 122, 87, 123, + 119, 114, 49, 124, 125, 126, 87, 127, 128, 129, 49, 130, 131, 132, 93, 133, + 134, 49, 49, 135, 136, 137, 72, 72, 138, 139, 140, 141, 142, 143, 72, 72, + 144, 145, 146, 147, 148, 49, 149, 150, 151, 152, 32, 153, 154, 155, 72, 72, + 49, 49, 156, 157, 158, 159, 160, 161, 162, 163, 9, 9, 164, 49, 49, 165, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 166, 167, 49, 49, + 166, 49, 49, 168, 169, 170, 49, 49, 49, 169, 49, 49, 49, 171, 172, 173, + 49, 174, 49, 49, 49, 49, 49, 175, 176, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 177, 49, 178, 179, 49, 49, 49, 49, 180, 181, + 182, 183, 49, 184, 49, 185, 182, 186, 49, 49, 49, 187, 188, 189, 190, 191, + 192, 190, 49, 49, 193, 49, 49, 194, 49, 49, 195, 49, 49, 49, 49, 196, + 49, 197, 198, 199, 200, 49, 201, 175, 49, 49, 202, 203, 204, 205, 206, 206, + 49, 207, 49, 49, 49, 208, 209, 210, 190, 190, 211, 212, 72, 72, 72, 72, + 213, 49, 49, 214, 215, 158, 216, 217, 218, 49, 219, 65, 49, 49, 220, 221, + 49, 49, 222, 223, 224, 65, 49, 225, 72, 72, 72, 72, 226, 227, 228, 229, + 11, 11, 230, 27, 27, 27, 231, 232, 11, 233, 27, 27, 32, 32, 32, 234, 13, 13, 13, 13, 13, 13, 13, 13, 13, 235, 13, 13, 13, 13, 13, 13, 236, 237, 236, 236, 237, 238, 236, 239, 240, 240, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 73, 258, 259, 260, + 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 72, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 267, 268, 269, 270, 206, 271, 272, 206, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 206, 276, 206, 206, 206, 206, 277, - 206, 278, 274, 279, 206, 280, 281, 282, 206, 206, 283, 73, 284, 73, 266, 266, - 266, 285, 206, 206, 206, 206, 286, 266, 206, 206, 206, 206, 206, 206, 206, 206, - 206, 206, 206, 287, 288, 206, 206, 289, 206, 206, 206, 206, 206, 206, 290, 206, - 291, 206, 206, 206, 206, 206, 292, 293, 266, 294, 206, 206, 295, 274, 296, 274, - 297, 298, 274, 274, 274, 299, 274, 300, 206, 206, 206, 274, 301, 175, 73, 73, - 73, 73, 73, 73, 73, 73, 73, 73, 9, 9, 302, 11, 11, 303, 304, 305, - 13, 13, 13, 13, 13, 13, 306, 307, 11, 11, 308, 50, 50, 50, 309, 310, - 50, 311, 312, 312, 312, 312, 32, 32, 313, 314, 315, 316, 73, 73, 73, 73, - 206, 317, 206, 206, 206, 206, 206, 282, 206, 206, 206, 206, 206, 318, 73, 319, - 320, 321, 322, 323, 135, 50, 50, 50, 50, 324, 177, 50, 50, 50, 50, 325, - 326, 50, 201, 135, 50, 50, 50, 50, 327, 328, 50, 51, 206, 206, 282, 50, - 206, 329, 330, 206, 331, 332, 206, 206, 330, 206, 206, 332, 206, 206, 206, 329, - 50, 50, 50, 197, 206, 206, 206, 206, 50, 50, 50, 50, 150, 73, 73, 73, - 50, 333, 50, 50, 50, 50, 50, 50, 150, 206, 206, 206, 283, 50, 50, 224, - 334, 50, 335, 73, 13, 13, 336, 337, 13, 338, 50, 50, 50, 50, 339, 340, - 31, 341, 342, 343, 13, 13, 13, 344, 345, 346, 347, 73, 73, 73, 73, 348, - 349, 50, 350, 351, 50, 50, 50, 352, 353, 50, 50, 354, 355, 191, 32, 356, - 66, 50, 357, 50, 358, 359, 50, 150, 77, 50, 50, 360, 361, 362, 73, 73, - 50, 50, 363, 364, 365, 366, 50, 367, 50, 50, 50, 368, 369, 370, 371, 372, - 373, 374, 312, 73, 73, 73, 73, 73, 73, 73, 73, 73, 50, 50, 375, 191, - 50, 50, 376, 50, 377, 50, 50, 202, 378, 378, 378, 378, 378, 378, 378, 378, - 379, 379, 379, 379, 379, 379, 379, 379, 50, 50, 50, 50, 50, 50, 201, 50, - 50, 50, 50, 50, 50, 380, 73, 73, 381, 382, 383, 384, 385, 50, 50, 50, - 50, 50, 50, 386, 387, 388, 50, 50, 50, 50, 50, 389, 73, 50, 50, 50, - 50, 390, 50, 50, 195, 73, 73, 391, 32, 392, 233, 393, 394, 395, 396, 397, - 50, 50, 50, 50, 50, 50, 50, 398, 399, 2, 3, 4, 5, 400, 401, 402, - 50, 403, 50, 327, 404, 405, 406, 407, 408, 50, 171, 409, 201, 201, 73, 73, - 50, 50, 50, 50, 50, 50, 50, 51, 410, 266, 266, 411, 267, 267, 267, 412, - 413, 319, 73, 73, 73, 206, 206, 414, 50, 150, 50, 50, 50, 101, 73, 73, - 50, 327, 415, 50, 416, 73, 73, 73, 50, 417, 50, 50, 418, 419, 73, 73, - 9, 9, 420, 11, 11, 50, 50, 50, 50, 201, 191, 73, 73, 73, 73, 73, - 421, 50, 50, 422, 50, 423, 73, 73, 50, 424, 50, 425, 73, 73, 73, 73, - 50, 50, 50, 426, 73, 73, 73, 73, 427, 428, 50, 429, 430, 431, 50, 432, - 50, 50, 50, 433, 50, 434, 50, 435, 50, 50, 50, 50, 436, 73, 73, 73, - 73, 73, 73, 73, 73, 73, 266, 437, 438, 50, 50, 439, 440, 441, 442, 73, - 217, 50, 50, 443, 444, 50, 436, 191, 445, 50, 446, 447, 448, 73, 73, 73, - 217, 50, 50, 449, 450, 191, 73, 73, 50, 50, 451, 452, 191, 73, 73, 73, - 50, 50, 50, 50, 50, 50, 327, 73, 267, 267, 267, 267, 267, 267, 453, 448, - 50, 50, 327, 73, 73, 73, 73, 73, 50, 50, 50, 436, 73, 73, 73, 73, - 50, 50, 50, 50, 176, 454, 203, 455, 456, 457, 73, 73, 73, 73, 73, 73, - 458, 73, 73, 73, 73, 73, 73, 73, 206, 206, 206, 206, 206, 206, 206, 318, - 206, 206, 459, 206, 206, 206, 460, 461, 462, 206, 463, 206, 206, 464, 73, 73, - 206, 206, 206, 206, 465, 73, 73, 73, 206, 206, 206, 206, 206, 283, 266, 466, - 9, 467, 11, 468, 469, 470, 236, 9, 471, 472, 473, 474, 475, 9, 467, 11, - 476, 477, 11, 478, 479, 480, 481, 9, 482, 11, 9, 467, 11, 468, 469, 11, - 236, 9, 471, 481, 9, 482, 11, 9, 467, 11, 483, 9, 484, 485, 486, 487, - 11, 488, 9, 489, 490, 491, 492, 11, 493, 9, 494, 11, 495, 496, 496, 496, - 497, 50, 498, 499, 500, 501, 502, 503, 504, 202, 505, 202, 73, 73, 73, 506, - 206, 206, 319, 206, 206, 206, 206, 206, 206, 282, 329, 507, 291, 291, 73, 73, - 508, 206, 329, 206, 206, 206, 319, 206, 206, 284, 73, 73, 73, 73, 509, 206, - 510, 206, 206, 284, 511, 512, 73, 73, 206, 206, 513, 514, 206, 206, 206, 515, - 206, 282, 206, 206, 516, 73, 206, 513, 206, 206, 206, 329, 517, 206, 206, 206, - 206, 206, 206, 206, 206, 206, 206, 518, 206, 206, 206, 464, 282, 206, 519, 73, - 73, 73, 73, 73, 73, 73, 73, 520, 206, 206, 206, 206, 521, 73, 73, 73, - 206, 206, 206, 206, 318, 73, 73, 73, 206, 206, 206, 206, 206, 206, 206, 282, - 50, 50, 50, 50, 50, 311, 73, 73, 50, 50, 50, 176, 50, 50, 50, 50, - 50, 201, 73, 73, 73, 73, 73, 73, 522, 73, 523, 523, 523, 523, 523, 523, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 73, - 379, 379, 379, 379, 379, 379, 379, 524, + 206, 278, 274, 279, 206, 280, 281, 282, 206, 206, 283, 72, 282, 72, 266, 266, + 266, 284, 206, 206, 206, 206, 285, 266, 206, 206, 206, 206, 206, 206, 206, 206, + 206, 206, 206, 286, 287, 206, 206, 288, 206, 206, 206, 206, 206, 206, 289, 206, + 206, 206, 206, 206, 206, 206, 290, 291, 266, 292, 206, 206, 293, 274, 294, 274, + 295, 296, 274, 274, 274, 297, 274, 298, 206, 206, 206, 274, 299, 206, 206, 300, + 206, 301, 206, 302, 303, 304, 72, 72, 9, 9, 305, 11, 11, 306, 307, 308, + 13, 13, 13, 13, 13, 13, 309, 310, 11, 11, 311, 49, 49, 49, 312, 313, + 49, 314, 315, 315, 315, 315, 32, 32, 316, 317, 318, 319, 320, 72, 72, 72, + 206, 321, 206, 206, 206, 206, 206, 322, 206, 206, 206, 206, 206, 323, 72, 324, + 325, 326, 327, 328, 134, 49, 49, 49, 49, 329, 176, 49, 49, 49, 49, 330, + 331, 49, 201, 134, 49, 49, 49, 49, 197, 332, 49, 50, 206, 206, 322, 49, + 206, 333, 334, 206, 335, 336, 206, 206, 334, 206, 206, 336, 206, 206, 206, 333, + 49, 49, 49, 196, 206, 206, 206, 206, 49, 49, 49, 49, 149, 72, 72, 72, + 49, 337, 49, 49, 49, 49, 49, 49, 149, 206, 206, 206, 283, 49, 49, 225, + 338, 49, 339, 72, 13, 13, 340, 341, 13, 342, 49, 49, 49, 49, 343, 344, + 31, 345, 346, 347, 13, 13, 13, 348, 349, 350, 351, 352, 72, 72, 72, 353, + 354, 49, 355, 356, 49, 49, 49, 357, 358, 49, 49, 359, 360, 190, 32, 361, + 65, 49, 362, 49, 363, 364, 49, 149, 75, 49, 49, 365, 366, 367, 368, 369, + 49, 49, 370, 371, 372, 373, 49, 374, 49, 49, 49, 375, 376, 377, 378, 379, + 380, 381, 315, 11, 11, 382, 383, 72, 72, 72, 72, 72, 49, 49, 384, 190, + 49, 49, 385, 49, 386, 49, 49, 202, 387, 387, 387, 387, 387, 387, 387, 387, + 388, 388, 388, 388, 388, 388, 388, 388, 49, 49, 49, 49, 49, 49, 201, 49, + 49, 49, 49, 49, 49, 389, 72, 72, 390, 391, 392, 393, 394, 49, 49, 49, + 49, 49, 49, 395, 396, 397, 49, 49, 49, 49, 49, 398, 72, 49, 49, 49, + 49, 399, 49, 49, 194, 72, 72, 400, 32, 401, 402, 403, 404, 405, 406, 407, + 49, 49, 49, 49, 49, 49, 49, 408, 409, 2, 3, 4, 5, 410, 411, 412, + 49, 413, 49, 197, 414, 415, 416, 417, 418, 49, 170, 419, 201, 201, 72, 72, + 49, 49, 49, 49, 49, 49, 49, 50, 420, 266, 266, 421, 267, 267, 267, 422, + 423, 324, 424, 72, 72, 206, 206, 425, 72, 72, 72, 72, 72, 72, 72, 72, + 49, 149, 49, 49, 49, 99, 426, 427, 49, 49, 428, 49, 429, 49, 49, 430, + 49, 431, 49, 49, 432, 433, 72, 72, 9, 9, 434, 11, 11, 49, 49, 49, + 49, 201, 190, 72, 72, 72, 72, 72, 49, 49, 194, 49, 49, 49, 435, 72, + 49, 49, 49, 314, 49, 196, 194, 72, 436, 49, 49, 437, 49, 438, 49, 439, + 49, 197, 440, 72, 72, 72, 72, 72, 49, 441, 49, 442, 72, 72, 72, 72, + 49, 49, 49, 443, 72, 72, 72, 72, 444, 445, 49, 446, 447, 448, 49, 449, + 49, 450, 72, 72, 451, 49, 452, 453, 49, 49, 49, 454, 49, 455, 49, 456, + 49, 457, 458, 72, 72, 72, 72, 72, 49, 49, 49, 49, 459, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 266, 460, 461, 49, 49, 462, 463, 464, 465, 466, + 218, 49, 49, 467, 468, 49, 459, 190, 469, 49, 470, 471, 472, 49, 49, 473, + 218, 49, 49, 474, 475, 476, 477, 478, 49, 96, 479, 480, 72, 72, 72, 72, + 72, 72, 72, 49, 49, 481, 482, 190, 101, 82, 83, 97, 483, 484, 485, 486, + 49, 49, 49, 487, 488, 190, 72, 72, 49, 49, 489, 490, 491, 72, 72, 72, + 49, 49, 49, 492, 493, 190, 72, 72, 49, 49, 494, 495, 190, 72, 72, 72, + 72, 72, 9, 9, 11, 11, 146, 496, 72, 72, 72, 72, 49, 49, 49, 459, + 49, 459, 72, 72, 72, 72, 72, 72, 267, 267, 267, 267, 267, 267, 497, 498, + 49, 49, 197, 72, 72, 72, 72, 72, 49, 49, 49, 459, 49, 197, 367, 72, + 72, 72, 72, 72, 72, 49, 201, 499, 49, 49, 49, 500, 501, 502, 503, 504, + 49, 72, 72, 72, 72, 72, 72, 72, 49, 49, 49, 49, 175, 505, 203, 506, + 466, 507, 72, 72, 72, 72, 72, 72, 508, 72, 72, 72, 72, 72, 72, 72, + 49, 49, 49, 49, 49, 49, 50, 149, 459, 509, 510, 72, 72, 72, 72, 72, + 206, 206, 206, 206, 206, 206, 206, 323, 206, 206, 511, 206, 206, 206, 512, 513, + 514, 206, 515, 206, 206, 516, 72, 72, 206, 206, 206, 206, 517, 72, 72, 72, + 206, 206, 206, 206, 206, 283, 266, 518, 9, 519, 11, 520, 521, 522, 236, 9, + 523, 524, 525, 526, 527, 9, 519, 11, 528, 529, 11, 530, 531, 532, 533, 9, + 534, 11, 9, 519, 11, 520, 521, 11, 236, 9, 523, 533, 9, 534, 11, 9, + 519, 11, 535, 9, 536, 537, 538, 539, 11, 540, 9, 541, 542, 543, 544, 11, + 545, 9, 546, 11, 547, 548, 548, 548, 49, 49, 49, 49, 549, 550, 72, 72, + 551, 49, 552, 553, 554, 555, 556, 557, 558, 202, 559, 202, 72, 72, 72, 560, + 206, 206, 324, 206, 206, 206, 206, 206, 206, 322, 333, 561, 561, 561, 206, 323, + 173, 206, 333, 206, 206, 206, 324, 206, 206, 282, 72, 72, 72, 72, 562, 206, + 563, 206, 206, 282, 564, 304, 72, 72, 206, 206, 565, 206, 206, 206, 206, 516, + 206, 206, 206, 206, 333, 566, 206, 567, 206, 206, 206, 206, 206, 206, 206, 333, + 206, 206, 206, 206, 282, 206, 206, 321, 206, 206, 568, 206, 206, 206, 206, 206, + 206, 206, 206, 206, 569, 206, 206, 206, 206, 206, 206, 206, 206, 72, 565, 322, + 206, 206, 206, 206, 206, 206, 206, 322, 206, 206, 206, 206, 206, 570, 72, 72, + 324, 206, 206, 206, 567, 174, 206, 206, 567, 206, 516, 72, 72, 72, 72, 72, + 49, 49, 49, 49, 49, 314, 72, 72, 49, 49, 49, 175, 49, 49, 49, 49, + 49, 201, 72, 72, 72, 72, 72, 72, 571, 72, 572, 572, 572, 572, 572, 572, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 72, + 388, 388, 388, 388, 388, 388, 388, 573, }; static RE_UINT8 re_general_category_stage_4[] = { @@ -2785,123 +3028,123 @@ static RE_UINT8 re_general_category_stage_4[] = { 16, 16, 39, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 40, 40, 42, 41, 41, 41, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, - 43, 43, 43, 43, 43, 43, 43, 43, 32, 32, 42, 32, 44, 45, 16, 46, - 44, 44, 41, 47, 11, 48, 48, 11, 34, 11, 11, 11, 11, 11, 11, 11, - 11, 49, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34, - 16, 11, 32, 16, 32, 32, 32, 32, 16, 16, 32, 50, 34, 32, 34, 11, - 32, 51, 43, 43, 52, 32, 32, 32, 11, 34, 34, 34, 34, 34, 34, 16, - 32, 32, 32, 32, 44, 44, 44, 44, 49, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 48, 53, 2, 2, 2, 54, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 55, 56, 44, 57, 58, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 59, 60, 61, 43, 60, 44, 44, 44, 44, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 44, 44, - 36, 63, 46, 44, 44, 44, 44, 44, 64, 64, 65, 8, 9, 66, 2, 67, - 43, 43, 43, 43, 43, 61, 65, 2, 68, 36, 36, 36, 36, 69, 43, 43, - 7, 7, 7, 7, 7, 2, 2, 36, 70, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 71, 43, 43, 43, 72, 51, 43, 43, 73, 74, 75, 43, 43, 36, - 7, 7, 7, 7, 7, 36, 76, 77, 2, 2, 2, 2, 2, 2, 2, 78, - 69, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 79, 80, 36, - 36, 36, 36, 43, 43, 43, 43, 43, 70, 44, 44, 44, 44, 44, 44, 44, - 7, 7, 7, 7, 7, 36, 36, 36, 36, 36, 36, 36, 36, 69, 43, 43, - 43, 43, 40, 21, 2, 81, 44, 44, 36, 36, 36, 43, 43, 74, 43, 43, - 43, 43, 74, 43, 74, 43, 43, 44, 2, 2, 2, 2, 2, 2, 2, 46, - 36, 36, 36, 36, 69, 43, 44, 46, 44, 44, 44, 44, 44, 44, 44, 44, - 62, 36, 36, 36, 36, 36, 62, 44, 44, 44, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 79, 43, 82, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 82, 70, 83, 84, 43, 43, 43, 82, 83, 84, 83, - 69, 43, 43, 43, 36, 36, 36, 36, 36, 43, 2, 7, 7, 7, 7, 7, - 85, 36, 36, 36, 80, 36, 36, 36, 58, 83, 80, 36, 36, 36, 62, 80, - 62, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, - 62, 62, 44, 36, 36, 44, 70, 83, 84, 43, 79, 86, 87, 86, 84, 62, - 44, 44, 44, 86, 44, 44, 36, 80, 36, 43, 44, 7, 7, 7, 7, 7, - 36, 20, 27, 27, 27, 88, 44, 44, 58, 82, 80, 36, 36, 62, 44, 80, - 62, 36, 80, 62, 36, 44, 79, 83, 84, 79, 44, 58, 79, 58, 43, 44, - 58, 44, 44, 44, 80, 36, 62, 62, 44, 44, 44, 7, 7, 7, 7, 7, - 43, 36, 69, 44, 44, 44, 44, 44, 58, 82, 80, 36, 36, 36, 36, 80, - 36, 80, 36, 36, 36, 36, 36, 36, 62, 36, 80, 36, 36, 44, 70, 83, - 84, 43, 43, 58, 82, 86, 84, 44, 62, 44, 44, 44, 44, 44, 44, 44, - 66, 44, 44, 44, 44, 44, 44, 44, 62, 36, 80, 36, 36, 44, 70, 84, - 84, 43, 79, 86, 87, 86, 84, 44, 44, 44, 44, 82, 44, 44, 36, 80, - 77, 27, 27, 27, 44, 44, 44, 44, 44, 70, 80, 36, 36, 62, 44, 36, - 62, 36, 36, 44, 80, 62, 62, 36, 44, 80, 62, 44, 36, 62, 44, 36, - 36, 36, 36, 36, 36, 44, 44, 83, 82, 87, 44, 83, 87, 83, 84, 44, - 62, 44, 44, 86, 44, 44, 44, 44, 27, 89, 67, 67, 88, 90, 44, 44, - 86, 83, 80, 36, 36, 36, 62, 36, 62, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 80, 36, 36, 44, 80, 43, 82, 83, 87, 43, 79, 43, 43, 44, - 44, 44, 58, 79, 36, 44, 44, 44, 44, 44, 44, 44, 27, 27, 27, 89, - 44, 83, 80, 36, 36, 36, 62, 36, 36, 36, 80, 36, 36, 44, 70, 84, - 83, 83, 87, 82, 87, 83, 43, 44, 44, 44, 86, 87, 44, 44, 44, 62, - 80, 62, 44, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 62, 80, 83, - 84, 43, 79, 83, 87, 83, 84, 62, 44, 44, 44, 86, 44, 44, 44, 44, - 27, 27, 27, 44, 91, 36, 36, 36, 44, 83, 80, 36, 36, 36, 36, 36, - 36, 36, 36, 62, 44, 36, 36, 36, 36, 80, 36, 36, 36, 36, 80, 44, - 36, 36, 36, 62, 44, 79, 44, 86, 83, 43, 79, 79, 83, 83, 83, 83, - 44, 83, 46, 44, 44, 44, 44, 44, 80, 36, 36, 36, 36, 36, 36, 36, - 69, 36, 43, 43, 43, 79, 44, 57, 36, 36, 36, 74, 43, 43, 43, 61, - 7, 7, 7, 7, 7, 2, 44, 44, 80, 62, 62, 80, 62, 62, 80, 44, - 44, 44, 36, 36, 80, 36, 36, 36, 80, 36, 80, 80, 44, 36, 80, 36, - 69, 36, 43, 43, 43, 58, 70, 44, 36, 36, 62, 81, 43, 43, 43, 44, - 7, 7, 7, 7, 7, 44, 36, 36, 76, 67, 2, 2, 2, 2, 2, 2, - 2, 92, 92, 67, 43, 67, 67, 67, 7, 7, 7, 7, 7, 27, 27, 27, - 27, 27, 51, 51, 51, 4, 4, 83, 36, 36, 36, 36, 80, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 62, 44, 58, 43, 43, 43, 43, 43, 43, 82, - 43, 43, 61, 43, 36, 36, 69, 43, 43, 43, 43, 43, 58, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 79, 67, 67, 67, 67, 75, 67, 67, 90, 67, - 2, 2, 92, 67, 21, 46, 44, 44, 36, 36, 36, 36, 36, 93, 84, 43, - 82, 43, 43, 43, 84, 82, 84, 70, 7, 7, 7, 7, 7, 2, 2, 2, - 36, 36, 36, 83, 43, 36, 36, 43, 70, 83, 94, 93, 83, 83, 83, 36, - 69, 43, 70, 36, 36, 36, 36, 36, 36, 82, 84, 82, 83, 83, 84, 93, - 7, 7, 7, 7, 7, 83, 84, 67, 11, 11, 11, 49, 44, 44, 49, 44, - 36, 36, 36, 36, 36, 63, 68, 36, 36, 36, 36, 36, 62, 36, 36, 44, - 36, 36, 36, 62, 62, 36, 36, 44, 62, 36, 36, 44, 36, 36, 36, 62, - 62, 36, 36, 44, 36, 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 62, 58, 43, 2, 2, 2, 2, 95, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 96, 44, 67, 67, 67, 67, 67, 44, 44, 44, - 36, 36, 62, 44, 44, 44, 44, 44, 97, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 63, 71, 98, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 99, 100, 44, 36, 36, 36, 36, 36, 63, 2, 101, - 102, 44, 44, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 62, 36, - 36, 43, 79, 44, 44, 44, 44, 44, 36, 43, 61, 46, 44, 44, 44, 44, - 36, 43, 44, 44, 44, 44, 44, 44, 62, 43, 44, 44, 44, 44, 44, 44, - 36, 36, 43, 84, 43, 43, 43, 83, 83, 83, 83, 82, 84, 43, 43, 43, - 43, 43, 2, 85, 2, 66, 69, 44, 7, 7, 7, 7, 7, 44, 44, 44, - 27, 27, 27, 27, 27, 44, 44, 44, 2, 2, 2, 103, 2, 60, 43, 65, - 36, 104, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 44, 44, 44, 44, - 36, 36, 36, 36, 69, 62, 44, 44, 36, 36, 36, 44, 44, 44, 44, 44, - 43, 82, 83, 84, 82, 83, 44, 44, 83, 82, 83, 83, 84, 43, 44, 44, + 43, 43, 43, 43, 43, 43, 43, 43, 32, 32, 42, 32, 44, 45, 16, 10, + 44, 44, 41, 46, 11, 47, 47, 11, 34, 11, 11, 11, 11, 11, 11, 11, + 11, 48, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34, + 16, 11, 32, 16, 32, 32, 32, 32, 16, 16, 32, 49, 34, 32, 34, 11, + 32, 50, 43, 43, 51, 32, 32, 32, 11, 34, 34, 34, 34, 34, 34, 16, + 48, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 47, 52, 2, 2, 2, + 53, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 54, 55, 56, 57, + 58, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 59, + 60, 61, 43, 60, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 62, 44, 44, 36, 63, 64, 44, 44, 44, 44, 44, + 65, 65, 65, 8, 9, 66, 2, 67, 43, 43, 43, 43, 43, 61, 68, 2, + 69, 36, 36, 36, 36, 70, 43, 43, 7, 7, 7, 7, 7, 2, 2, 36, + 71, 36, 36, 36, 36, 36, 36, 36, 36, 36, 72, 43, 43, 43, 73, 50, + 43, 43, 74, 75, 76, 43, 43, 36, 7, 7, 7, 7, 7, 36, 77, 78, + 2, 2, 2, 2, 2, 2, 2, 79, 70, 36, 36, 36, 36, 36, 36, 36, + 43, 43, 43, 43, 43, 80, 81, 36, 36, 36, 36, 43, 43, 43, 43, 43, + 71, 44, 44, 44, 44, 44, 44, 44, 7, 7, 7, 7, 7, 36, 36, 36, + 36, 36, 36, 36, 36, 70, 43, 43, 43, 43, 40, 21, 2, 82, 44, 44, + 36, 36, 36, 43, 43, 75, 43, 43, 43, 43, 75, 43, 75, 43, 43, 44, + 2, 2, 2, 2, 2, 2, 2, 64, 36, 36, 36, 36, 70, 43, 44, 64, + 44, 44, 44, 44, 44, 44, 44, 44, 36, 62, 44, 44, 44, 44, 44, 44, + 44, 44, 43, 43, 43, 43, 43, 43, 43, 83, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 83, 71, 84, 85, 43, 43, 43, 83, 84, 85, 84, + 70, 43, 43, 43, 36, 36, 36, 36, 36, 43, 2, 7, 7, 7, 7, 7, + 86, 36, 36, 36, 36, 36, 36, 36, 70, 84, 81, 36, 36, 36, 62, 81, + 62, 81, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, + 62, 62, 44, 36, 36, 44, 71, 84, 85, 43, 80, 87, 88, 87, 85, 62, + 44, 44, 44, 87, 44, 44, 36, 81, 36, 43, 44, 7, 7, 7, 7, 7, + 36, 20, 27, 27, 27, 57, 44, 44, 58, 83, 81, 36, 36, 62, 44, 81, + 62, 36, 81, 62, 36, 44, 80, 84, 85, 80, 44, 58, 80, 58, 43, 44, + 58, 44, 44, 44, 81, 36, 62, 62, 44, 44, 44, 7, 7, 7, 7, 7, + 43, 36, 70, 44, 44, 44, 44, 44, 58, 83, 81, 36, 36, 36, 36, 81, + 36, 81, 36, 36, 36, 36, 36, 36, 62, 36, 81, 36, 36, 44, 71, 84, + 85, 43, 43, 58, 83, 87, 85, 44, 62, 44, 44, 44, 44, 44, 44, 44, + 66, 44, 44, 44, 44, 44, 44, 44, 58, 84, 81, 36, 36, 36, 62, 81, + 62, 36, 81, 36, 36, 44, 71, 85, 85, 43, 80, 87, 88, 87, 85, 44, + 44, 44, 44, 83, 44, 44, 36, 81, 78, 27, 27, 27, 44, 44, 44, 44, + 44, 71, 81, 36, 36, 62, 44, 36, 62, 36, 36, 44, 81, 62, 62, 36, + 44, 81, 62, 44, 36, 62, 44, 36, 36, 36, 36, 36, 36, 44, 44, 84, + 83, 88, 44, 84, 88, 84, 85, 44, 62, 44, 44, 87, 44, 44, 44, 44, + 27, 89, 67, 67, 57, 90, 44, 44, 83, 84, 81, 36, 36, 36, 62, 36, + 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 44, 81, 43, + 83, 84, 88, 43, 80, 43, 43, 44, 44, 44, 58, 80, 36, 44, 44, 44, + 44, 44, 44, 44, 27, 27, 27, 89, 58, 84, 81, 36, 36, 36, 62, 36, + 36, 36, 81, 36, 36, 44, 71, 85, 84, 84, 88, 83, 88, 84, 43, 44, + 44, 44, 87, 88, 44, 44, 44, 62, 81, 62, 44, 44, 44, 44, 44, 44, + 36, 36, 36, 36, 36, 62, 81, 84, 85, 43, 80, 84, 88, 84, 85, 62, + 44, 44, 44, 87, 44, 44, 44, 44, 27, 27, 27, 44, 56, 36, 36, 36, + 44, 84, 81, 36, 36, 36, 36, 36, 36, 36, 36, 62, 44, 36, 36, 36, + 36, 81, 36, 36, 36, 36, 81, 44, 36, 36, 36, 62, 44, 80, 44, 87, + 84, 43, 80, 80, 84, 84, 84, 84, 44, 84, 64, 44, 44, 44, 44, 44, + 81, 36, 36, 36, 36, 36, 36, 36, 70, 36, 43, 43, 43, 80, 44, 91, + 36, 36, 36, 75, 43, 43, 43, 61, 7, 7, 7, 7, 7, 2, 44, 44, + 81, 62, 62, 81, 62, 62, 81, 44, 44, 44, 36, 36, 81, 36, 36, 36, + 81, 36, 81, 81, 44, 36, 81, 36, 70, 36, 43, 43, 43, 58, 71, 44, + 36, 36, 62, 82, 43, 43, 43, 44, 7, 7, 7, 7, 7, 44, 36, 36, + 77, 67, 2, 2, 2, 2, 2, 2, 2, 92, 92, 67, 43, 67, 67, 67, + 7, 7, 7, 7, 7, 27, 27, 27, 27, 27, 50, 50, 50, 4, 4, 84, + 36, 36, 36, 36, 81, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 44, + 58, 43, 43, 43, 43, 43, 43, 83, 43, 43, 61, 43, 36, 36, 70, 43, + 43, 43, 43, 43, 58, 43, 43, 43, 43, 43, 43, 43, 43, 43, 80, 67, + 67, 67, 67, 76, 67, 67, 90, 67, 2, 2, 92, 67, 21, 64, 44, 44, + 36, 36, 36, 36, 36, 93, 85, 43, 83, 43, 43, 43, 85, 83, 85, 71, + 7, 7, 7, 7, 7, 2, 2, 2, 36, 36, 36, 84, 43, 36, 36, 43, + 71, 84, 94, 93, 84, 84, 84, 36, 70, 43, 71, 36, 36, 36, 36, 36, + 36, 83, 85, 83, 84, 84, 85, 93, 7, 7, 7, 7, 7, 84, 85, 67, + 11, 11, 11, 48, 44, 44, 48, 44, 36, 36, 36, 36, 36, 63, 69, 36, + 36, 36, 36, 36, 62, 36, 36, 44, 36, 36, 36, 62, 62, 36, 36, 44, + 62, 36, 36, 44, 36, 36, 36, 62, 62, 36, 36, 44, 36, 36, 36, 36, + 36, 36, 36, 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 58, 43, + 2, 2, 2, 2, 95, 27, 27, 27, 27, 27, 27, 27, 27, 27, 96, 44, + 67, 67, 67, 67, 67, 44, 44, 44, 36, 36, 62, 44, 44, 44, 44, 44, + 97, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 63, 72, + 98, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 99, 100, 44, + 36, 36, 36, 36, 36, 63, 2, 101, 102, 36, 36, 36, 62, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 62, 36, 36, 43, 80, 44, 44, 44, 44, 44, + 36, 43, 61, 64, 44, 44, 44, 44, 36, 43, 44, 44, 44, 44, 44, 44, + 62, 43, 44, 44, 44, 44, 44, 44, 36, 36, 43, 85, 43, 43, 43, 84, + 84, 84, 84, 83, 85, 43, 43, 43, 43, 43, 2, 86, 2, 66, 70, 44, + 7, 7, 7, 7, 7, 44, 44, 44, 27, 27, 27, 27, 27, 44, 44, 44, + 2, 2, 2, 103, 2, 60, 43, 68, 36, 104, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 44, 44, 44, 44, 36, 36, 36, 36, 70, 62, 44, 44, + 36, 36, 36, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 36, 62, + 43, 83, 84, 85, 83, 84, 44, 44, 84, 83, 84, 84, 85, 43, 44, 44, 90, 44, 2, 7, 7, 7, 7, 7, 36, 36, 36, 36, 36, 36, 36, 44, - 36, 36, 36, 36, 36, 36, 44, 44, 83, 83, 83, 83, 83, 83, 83, 83, - 94, 36, 36, 36, 83, 44, 44, 44, 7, 7, 7, 7, 7, 96, 44, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 36, 36, 36, 69, 82, 84, 44, 2, - 36, 36, 93, 82, 43, 43, 43, 79, 82, 82, 84, 43, 43, 43, 82, 83, - 83, 84, 43, 43, 43, 43, 79, 58, 2, 2, 2, 85, 2, 2, 2, 44, - 43, 43, 94, 36, 36, 36, 36, 36, 36, 36, 82, 43, 43, 82, 82, 83, - 83, 82, 94, 36, 36, 36, 44, 44, 92, 67, 67, 67, 67, 51, 43, 43, - 43, 43, 67, 67, 67, 67, 90, 44, 43, 94, 36, 36, 36, 36, 36, 36, - 93, 43, 43, 83, 43, 84, 83, 36, 36, 36, 36, 82, 43, 83, 84, 84, - 43, 83, 44, 44, 44, 44, 2, 2, 36, 36, 83, 83, 83, 83, 43, 43, - 43, 43, 83, 43, 44, 55, 2, 2, 7, 7, 7, 7, 7, 44, 80, 36, - 36, 36, 36, 36, 40, 40, 40, 2, 2, 2, 2, 2, 44, 44, 44, 44, - 43, 61, 43, 43, 43, 43, 43, 43, 82, 43, 43, 43, 70, 36, 69, 36, - 36, 83, 70, 62, 44, 44, 44, 44, 16, 16, 16, 16, 16, 16, 40, 40, - 40, 40, 40, 40, 40, 45, 16, 16, 16, 16, 16, 16, 45, 16, 16, 16, - 16, 16, 16, 16, 16, 105, 40, 40, 43, 43, 43, 79, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 44, 43, 43, 32, 32, 32, 16, 16, 16, 16, 32, + 36, 36, 36, 36, 36, 36, 44, 44, 84, 84, 84, 84, 84, 84, 84, 84, + 94, 36, 36, 36, 84, 44, 44, 44, 7, 7, 7, 7, 7, 96, 44, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 36, 36, 36, 70, 83, 85, 44, 2, + 36, 36, 93, 83, 43, 43, 43, 80, 83, 83, 85, 43, 43, 43, 83, 84, + 84, 85, 43, 43, 43, 43, 80, 58, 2, 2, 2, 86, 2, 2, 2, 44, + 43, 43, 43, 43, 43, 43, 43, 105, 43, 43, 94, 36, 36, 36, 36, 36, + 36, 36, 83, 43, 43, 83, 83, 84, 84, 83, 94, 36, 36, 36, 44, 44, + 92, 67, 67, 67, 67, 50, 43, 43, 43, 43, 67, 67, 67, 67, 90, 44, + 43, 94, 36, 36, 36, 36, 36, 36, 93, 43, 43, 84, 43, 85, 43, 36, + 36, 36, 36, 83, 43, 84, 85, 85, 43, 84, 44, 44, 44, 44, 2, 2, + 36, 36, 84, 84, 84, 84, 43, 43, 43, 43, 84, 43, 44, 54, 2, 2, + 7, 7, 7, 7, 7, 44, 81, 36, 36, 36, 36, 36, 40, 40, 40, 2, + 2, 2, 2, 2, 44, 44, 44, 44, 43, 61, 43, 43, 43, 43, 43, 43, + 83, 43, 43, 43, 71, 36, 70, 36, 36, 84, 71, 62, 43, 44, 44, 44, + 16, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 45, 16, 16, + 16, 16, 16, 16, 45, 16, 16, 16, 16, 16, 16, 16, 16, 106, 40, 40, + 43, 43, 43, 44, 44, 44, 43, 43, 32, 32, 32, 16, 16, 16, 16, 32, 16, 16, 16, 16, 11, 11, 11, 11, 16, 16, 16, 44, 11, 11, 11, 44, - 16, 16, 16, 16, 49, 49, 49, 49, 16, 16, 16, 16, 16, 16, 16, 44, - 16, 16, 16, 16, 106, 106, 106, 106, 16, 16, 107, 16, 11, 11, 108, 109, - 41, 16, 107, 16, 11, 11, 108, 41, 16, 16, 44, 16, 11, 11, 110, 41, - 16, 16, 16, 16, 11, 11, 111, 41, 44, 16, 107, 16, 11, 11, 108, 112, - 113, 113, 113, 113, 113, 114, 64, 64, 115, 115, 115, 2, 116, 117, 116, 117, - 2, 2, 2, 2, 118, 64, 64, 119, 2, 2, 2, 2, 120, 121, 2, 122, - 123, 2, 124, 125, 2, 2, 2, 2, 2, 9, 123, 2, 2, 2, 2, 126, - 64, 64, 65, 64, 64, 64, 64, 64, 127, 44, 27, 27, 27, 8, 124, 128, - 27, 27, 27, 27, 27, 8, 124, 100, 40, 40, 40, 40, 40, 40, 81, 44, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 129, 44, 44, - 43, 43, 43, 43, 43, 43, 130, 52, 131, 52, 131, 43, 43, 43, 43, 43, - 79, 44, 44, 44, 44, 44, 44, 44, 67, 132, 67, 133, 67, 34, 11, 16, - 11, 32, 133, 67, 50, 11, 11, 67, 67, 67, 132, 132, 132, 11, 11, 134, - 11, 11, 35, 36, 39, 67, 16, 11, 8, 8, 50, 16, 16, 26, 67, 135, + 16, 16, 16, 16, 48, 48, 48, 48, 16, 16, 16, 16, 16, 16, 16, 44, + 16, 16, 16, 16, 107, 107, 107, 107, 16, 16, 108, 16, 11, 11, 109, 110, + 41, 16, 108, 16, 11, 11, 109, 41, 16, 16, 44, 16, 11, 11, 111, 41, + 16, 16, 16, 16, 11, 11, 112, 41, 44, 16, 108, 16, 11, 11, 109, 113, + 114, 114, 114, 114, 114, 115, 65, 65, 116, 116, 116, 2, 117, 118, 117, 118, + 2, 2, 2, 2, 119, 65, 65, 120, 2, 2, 2, 2, 121, 122, 2, 123, + 124, 2, 125, 126, 2, 2, 2, 2, 2, 9, 124, 2, 2, 2, 2, 127, + 65, 65, 68, 65, 65, 65, 65, 65, 128, 44, 27, 27, 27, 8, 125, 129, + 27, 27, 27, 27, 27, 8, 125, 100, 40, 40, 40, 40, 40, 40, 82, 44, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 44, + 43, 43, 43, 43, 43, 43, 130, 51, 131, 51, 131, 43, 43, 43, 43, 43, + 80, 44, 44, 44, 44, 44, 44, 44, 67, 132, 67, 133, 67, 34, 11, 16, + 11, 32, 133, 67, 49, 11, 11, 67, 67, 67, 132, 132, 132, 11, 11, 134, + 11, 11, 35, 36, 39, 67, 16, 11, 8, 8, 49, 16, 16, 26, 67, 135, 27, 27, 27, 27, 27, 27, 27, 27, 101, 101, 101, 101, 101, 101, 101, 101, 101, 136, 137, 101, 138, 44, 44, 44, 8, 8, 139, 67, 67, 8, 67, 67, 139, 26, 67, 139, 67, 67, 67, 139, 67, 67, 67, 67, 67, 67, 67, 8, @@ -2910,106 +3153,132 @@ static RE_UINT8 re_general_category_stage_4[] = { 8, 67, 67, 67, 140, 141, 67, 67, 67, 67, 67, 67, 67, 67, 139, 67, 67, 67, 67, 67, 67, 26, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 44, 44, 44, 44, 44, 44, 67, 67, 67, 90, 44, 44, 44, 44, - 67, 67, 67, 67, 67, 90, 44, 44, 27, 27, 27, 27, 27, 27, 67, 67, - 67, 67, 67, 67, 67, 27, 27, 27, 67, 67, 67, 26, 67, 67, 67, 67, - 26, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, - 67, 67, 67, 67, 67, 67, 67, 26, 91, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 90, 44, 44, 67, 67, 67, 90, 44, 44, 44, 44, + 27, 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, + 67, 67, 67, 26, 67, 67, 67, 67, 26, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67, 26, 67, 67, 67, 67, 4, 4, 4, 4, 4, 4, 4, 27, 27, 27, 27, 27, - 27, 27, 67, 67, 67, 67, 67, 67, 8, 8, 124, 142, 8, 8, 8, 8, - 8, 8, 8, 4, 4, 4, 4, 4, 8, 124, 143, 143, 143, 143, 143, 143, + 27, 27, 67, 67, 67, 67, 67, 67, 8, 8, 125, 142, 8, 8, 8, 8, + 8, 8, 8, 4, 4, 4, 4, 4, 8, 125, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 142, 8, 8, 8, 8, 8, 8, 8, 4, 4, 8, 8, - 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 139, 26, 8, 8, 144, 44, - 11, 11, 11, 11, 11, 11, 11, 48, 16, 16, 16, 16, 16, 16, 16, 107, - 32, 11, 32, 34, 34, 34, 34, 11, 32, 32, 34, 16, 16, 16, 40, 11, - 32, 32, 135, 67, 67, 133, 34, 145, 43, 32, 44, 44, 55, 2, 95, 2, - 16, 16, 16, 54, 44, 44, 54, 44, 36, 36, 36, 36, 44, 44, 44, 53, - 46, 44, 44, 44, 44, 44, 44, 58, 36, 36, 36, 62, 44, 44, 44, 44, - 36, 36, 36, 62, 36, 36, 36, 62, 2, 116, 116, 2, 120, 121, 116, 2, - 2, 2, 2, 6, 2, 103, 116, 2, 116, 4, 4, 4, 4, 2, 2, 85, - 2, 2, 2, 2, 2, 115, 44, 44, 67, 67, 67, 67, 67, 91, 67, 67, - 67, 67, 67, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 44, 44, - 1, 2, 146, 147, 4, 4, 4, 4, 4, 67, 4, 4, 4, 4, 148, 149, - 150, 101, 101, 101, 101, 43, 43, 83, 151, 40, 40, 67, 101, 152, 63, 67, - 36, 36, 36, 62, 58, 153, 154, 68, 36, 36, 36, 36, 36, 63, 40, 68, - 44, 44, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, + 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 139, 26, 8, 8, 139, 67, + 67, 67, 44, 67, 67, 67, 67, 67, 67, 67, 67, 44, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 44, 56, 67, 67, 67, 67, 67, 90, 67, 67, 67, + 67, 44, 44, 44, 44, 44, 44, 44, 11, 11, 11, 11, 11, 11, 11, 47, + 16, 16, 16, 16, 16, 16, 16, 108, 32, 11, 32, 34, 34, 34, 34, 11, + 32, 32, 34, 16, 16, 16, 40, 11, 32, 32, 135, 67, 67, 133, 34, 144, + 43, 32, 44, 44, 54, 2, 95, 2, 16, 16, 16, 53, 44, 44, 53, 44, + 36, 36, 36, 36, 44, 44, 44, 52, 64, 44, 44, 44, 44, 44, 44, 58, + 36, 36, 36, 62, 44, 44, 44, 44, 36, 36, 36, 62, 36, 36, 36, 62, + 2, 117, 117, 2, 121, 122, 117, 2, 2, 2, 2, 6, 2, 103, 117, 2, + 117, 4, 4, 4, 4, 2, 2, 86, 2, 2, 2, 2, 2, 116, 2, 2, + 103, 145, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 56, 67, 67, + 67, 67, 44, 44, 44, 44, 44, 44, 67, 67, 67, 44, 44, 44, 44, 44, + 67, 67, 67, 67, 67, 67, 44, 44, 1, 2, 146, 147, 4, 4, 4, 4, + 4, 67, 4, 4, 4, 4, 148, 149, 150, 101, 101, 101, 101, 43, 43, 84, + 151, 40, 40, 67, 101, 152, 63, 67, 36, 36, 36, 62, 58, 153, 154, 69, + 36, 36, 36, 36, 36, 63, 40, 69, 44, 44, 81, 36, 36, 36, 36, 36, 67, 27, 27, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 90, 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, 27, 155, 27, 27, 27, 27, 27, 27, 27, 36, 36, 104, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 156, 2, 7, 7, 7, 7, 7, 36, 44, 44, - 32, 32, 32, 32, 32, 32, 32, 69, 52, 157, 43, 43, 43, 43, 43, 85, - 32, 32, 32, 32, 44, 44, 44, 58, 36, 36, 36, 101, 101, 101, 101, 101, + 32, 32, 32, 32, 32, 32, 32, 70, 51, 157, 43, 43, 43, 43, 43, 86, + 32, 32, 32, 32, 32, 32, 40, 58, 36, 36, 36, 101, 101, 101, 101, 101, 43, 2, 2, 2, 44, 44, 44, 44, 41, 41, 41, 154, 40, 40, 40, 40, 41, 32, 32, 32, 32, 32, 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, - 45, 16, 16, 16, 34, 34, 34, 32, 32, 32, 32, 32, 42, 158, 34, 107, - 32, 32, 44, 44, 44, 44, 44, 44, 32, 32, 32, 32, 32, 48, 44, 44, - 44, 44, 44, 44, 40, 35, 36, 36, 36, 70, 36, 70, 36, 69, 36, 36, - 36, 93, 84, 82, 67, 67, 44, 44, 27, 27, 27, 67, 159, 44, 44, 44, - 36, 36, 2, 2, 44, 44, 44, 44, 83, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 83, 83, 83, 83, 83, 83, 83, 83, 79, 44, 44, 44, 44, 2, - 43, 36, 36, 36, 2, 71, 44, 44, 36, 36, 36, 43, 43, 43, 43, 2, - 36, 36, 36, 69, 43, 43, 43, 43, 43, 83, 44, 44, 44, 44, 44, 55, - 36, 69, 83, 43, 43, 83, 82, 83, 160, 2, 2, 2, 2, 2, 2, 53, - 7, 7, 7, 7, 7, 44, 44, 2, 36, 36, 36, 36, 69, 43, 43, 82, - 84, 82, 84, 79, 44, 44, 44, 44, 36, 69, 36, 36, 36, 36, 82, 44, - 7, 7, 7, 7, 7, 44, 2, 2, 68, 36, 36, 76, 67, 93, 44, 44, - 70, 43, 70, 69, 70, 36, 36, 43, 69, 62, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 80, 104, 2, 36, 36, 36, 36, 36, 93, 43, 83, - 2, 104, 161, 79, 44, 44, 44, 44, 80, 36, 36, 62, 80, 36, 36, 62, - 80, 36, 36, 62, 44, 44, 44, 44, 36, 93, 84, 83, 82, 160, 84, 44, - 36, 36, 44, 44, 44, 44, 44, 44, 36, 36, 36, 62, 44, 80, 36, 36, - 162, 162, 162, 162, 162, 162, 162, 162, 163, 163, 163, 163, 163, 163, 163, 163, - 36, 36, 36, 36, 36, 44, 44, 44, 16, 16, 16, 107, 44, 44, 44, 44, - 44, 54, 16, 16, 44, 44, 80, 70, 36, 36, 36, 36, 164, 36, 36, 36, - 36, 36, 36, 62, 36, 36, 62, 62, 36, 80, 62, 36, 36, 36, 36, 36, - 36, 41, 41, 41, 41, 41, 41, 41, 41, 44, 44, 44, 44, 44, 44, 44, - 44, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 4, - 44, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 159, 44, - 2, 2, 2, 165, 125, 44, 44, 44, 6, 166, 167, 143, 143, 143, 143, 143, - 143, 143, 125, 165, 125, 2, 122, 168, 2, 46, 2, 2, 148, 143, 143, 125, - 2, 169, 8, 144, 66, 2, 44, 44, 36, 36, 62, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 62, 78, 55, 2, 3, 2, 4, 5, 6, 2, - 16, 16, 16, 16, 16, 17, 18, 124, 125, 4, 2, 36, 36, 36, 36, 36, - 68, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 40, + 45, 16, 16, 16, 34, 34, 34, 32, 32, 32, 32, 32, 42, 158, 34, 108, + 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 11, 11, 44, + 11, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 81, 40, 35, 36, 36, + 36, 71, 36, 71, 36, 70, 36, 36, 36, 93, 85, 83, 67, 67, 44, 44, + 27, 27, 27, 67, 159, 44, 44, 44, 36, 36, 2, 2, 44, 44, 44, 44, + 84, 36, 36, 36, 36, 36, 36, 36, 36, 36, 84, 84, 84, 84, 84, 84, + 84, 84, 80, 44, 44, 44, 44, 2, 43, 36, 36, 36, 2, 72, 44, 44, + 36, 36, 36, 43, 43, 43, 43, 2, 36, 36, 36, 70, 43, 43, 43, 43, + 43, 84, 44, 44, 44, 44, 44, 54, 36, 70, 84, 43, 43, 84, 83, 84, + 160, 2, 2, 2, 2, 2, 2, 52, 7, 7, 7, 7, 7, 44, 44, 2, + 36, 36, 70, 69, 36, 36, 36, 36, 7, 7, 7, 7, 7, 36, 36, 62, + 36, 36, 36, 36, 70, 43, 43, 83, 85, 83, 85, 80, 44, 44, 44, 44, + 36, 70, 36, 36, 36, 36, 83, 44, 7, 7, 7, 7, 7, 44, 2, 2, + 69, 36, 36, 77, 67, 93, 83, 36, 71, 43, 71, 70, 71, 36, 36, 43, + 70, 62, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 81, 104, 2, + 36, 36, 36, 36, 36, 93, 43, 84, 2, 104, 161, 80, 44, 44, 44, 44, + 81, 36, 36, 62, 81, 36, 36, 62, 81, 36, 36, 62, 44, 44, 44, 44, + 16, 16, 16, 16, 16, 110, 40, 40, 44, 44, 16, 44, 44, 44, 44, 44, + 36, 93, 85, 84, 83, 160, 85, 44, 36, 36, 44, 44, 44, 44, 44, 44, + 36, 36, 36, 62, 44, 81, 36, 36, 162, 162, 162, 162, 162, 162, 162, 162, + 163, 163, 163, 163, 163, 163, 163, 163, 36, 36, 36, 36, 36, 44, 44, 44, + 16, 16, 16, 108, 44, 44, 44, 44, 44, 53, 16, 16, 44, 44, 81, 71, + 36, 36, 36, 36, 164, 36, 36, 36, 36, 36, 36, 62, 36, 36, 62, 62, + 36, 81, 62, 36, 36, 36, 36, 36, 36, 41, 41, 41, 41, 41, 41, 41, + 41, 44, 44, 44, 44, 44, 44, 44, 44, 81, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 143, 44, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 159, 44, 2, 2, 2, 165, 126, 44, 44, 44, + 43, 43, 43, 43, 43, 43, 43, 44, 6, 166, 167, 143, 143, 143, 143, 143, + 143, 143, 126, 165, 126, 2, 123, 168, 2, 64, 2, 2, 148, 143, 143, 126, + 2, 169, 8, 170, 66, 2, 44, 44, 36, 36, 62, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 62, 79, 54, 2, 3, 2, 4, 5, 6, 2, + 16, 16, 16, 16, 16, 17, 18, 125, 126, 4, 2, 36, 36, 36, 36, 36, + 69, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 40, 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 62, 44, - 20, 170, 88, 129, 26, 8, 139, 90, 44, 44, 44, 44, 78, 64, 67, 44, - 36, 36, 36, 36, 36, 36, 80, 36, 36, 36, 36, 36, 36, 62, 36, 80, - 2, 46, 44, 171, 27, 27, 27, 27, 27, 27, 44, 91, 67, 67, 67, 67, - 101, 101, 138, 27, 89, 67, 67, 67, 67, 67, 67, 67, 67, 96, 44, 44, - 67, 67, 67, 67, 67, 67, 51, 44, 27, 27, 44, 44, 44, 44, 44, 44, - 147, 36, 36, 36, 36, 102, 44, 44, 36, 36, 36, 36, 36, 36, 36, 55, - 36, 36, 44, 44, 36, 36, 36, 36, 172, 101, 101, 44, 44, 44, 44, 44, - 11, 11, 11, 11, 16, 16, 16, 16, 36, 36, 36, 44, 62, 36, 36, 36, - 36, 36, 36, 80, 62, 44, 62, 80, 36, 36, 36, 55, 27, 27, 27, 27, - 36, 36, 36, 27, 27, 27, 44, 55, 36, 36, 36, 36, 36, 44, 44, 55, - 36, 36, 36, 36, 44, 44, 44, 36, 69, 43, 58, 79, 44, 44, 43, 43, - 36, 36, 80, 36, 80, 36, 36, 36, 36, 36, 44, 44, 43, 79, 44, 58, - 27, 27, 27, 27, 44, 44, 44, 44, 2, 2, 2, 2, 46, 44, 44, 44, - 36, 36, 36, 36, 36, 36, 173, 30, 36, 36, 36, 44, 55, 2, 2, 2, - 36, 36, 36, 44, 27, 27, 27, 27, 36, 62, 44, 44, 27, 27, 27, 27, - 36, 36, 36, 36, 62, 44, 44, 44, 27, 27, 27, 27, 27, 27, 27, 96, - 84, 94, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, - 43, 43, 43, 61, 2, 2, 2, 44, 44, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 7, 7, 7, 7, 7, 83, 84, 43, 82, 84, 61, 174, 2, - 2, 44, 44, 44, 44, 44, 44, 44, 43, 70, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 69, 43, 43, 84, 43, 43, 43, 79, 7, 7, 7, 7, 7, - 2, 2, 44, 44, 44, 44, 44, 44, 36, 93, 83, 43, 43, 43, 43, 82, - 94, 36, 63, 2, 46, 44, 44, 44, 36, 36, 36, 36, 36, 69, 84, 83, - 43, 43, 43, 84, 44, 44, 44, 44, 101, 102, 44, 44, 44, 44, 44, 44, - 93, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 87, - 44, 44, 44, 44, 44, 44, 44, 58, 43, 73, 40, 40, 40, 40, 40, 40, - 36, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 90, 91, 67, 67, 67, - 67, 67, 175, 84, 43, 67, 175, 83, 83, 176, 64, 64, 64, 177, 43, 43, - 43, 75, 51, 43, 43, 43, 67, 67, 67, 67, 67, 67, 67, 43, 43, 67, - 67, 67, 67, 67, 67, 67, 67, 44, 67, 43, 75, 44, 44, 44, 44, 44, + 20, 171, 57, 172, 26, 8, 139, 90, 44, 44, 44, 44, 79, 65, 67, 44, + 36, 36, 36, 36, 36, 36, 81, 36, 36, 36, 36, 36, 36, 62, 36, 81, + 2, 64, 44, 173, 27, 27, 27, 27, 27, 27, 44, 56, 67, 67, 67, 67, + 101, 101, 138, 27, 89, 67, 67, 67, 67, 67, 67, 67, 67, 27, 90, 44, + 90, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 50, 44, + 174, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 44, 44, + 27, 27, 44, 44, 44, 44, 44, 44, 147, 36, 36, 36, 36, 175, 44, 44, + 36, 36, 36, 43, 43, 80, 44, 44, 36, 36, 36, 36, 36, 36, 36, 54, + 36, 36, 44, 44, 36, 36, 36, 36, 176, 101, 101, 44, 44, 44, 44, 44, + 11, 11, 11, 11, 16, 16, 16, 16, 36, 36, 44, 44, 44, 44, 44, 54, + 36, 36, 36, 44, 62, 36, 36, 36, 36, 36, 36, 81, 62, 44, 62, 81, + 36, 36, 36, 54, 27, 27, 27, 27, 36, 36, 36, 77, 155, 27, 27, 27, + 44, 44, 44, 173, 27, 27, 27, 27, 36, 36, 36, 27, 27, 27, 44, 54, + 36, 36, 36, 36, 36, 44, 44, 54, 36, 36, 36, 36, 44, 44, 44, 36, + 70, 43, 58, 80, 44, 44, 43, 43, 36, 36, 81, 36, 81, 36, 36, 36, + 36, 36, 44, 44, 43, 80, 44, 58, 27, 27, 27, 27, 44, 44, 44, 44, + 2, 2, 2, 2, 64, 44, 44, 44, 36, 36, 36, 36, 36, 36, 177, 30, + 36, 36, 36, 36, 36, 36, 177, 27, 36, 36, 36, 36, 78, 36, 36, 36, + 36, 36, 70, 80, 44, 173, 27, 27, 2, 2, 2, 64, 44, 44, 44, 44, + 36, 36, 36, 44, 54, 2, 2, 2, 36, 36, 36, 44, 27, 27, 27, 27, + 36, 62, 44, 44, 27, 27, 27, 27, 36, 44, 44, 44, 54, 2, 64, 44, + 44, 44, 44, 44, 173, 27, 27, 27, 36, 36, 36, 36, 62, 44, 44, 44, + 27, 27, 27, 27, 27, 27, 27, 96, 85, 94, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 43, 43, 43, 43, 43, 43, 43, 61, 2, 2, 2, 44, + 44, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 7, 7, 7, 7, 7, + 44, 44, 44, 44, 44, 44, 44, 58, 84, 85, 43, 83, 85, 61, 178, 2, + 2, 44, 44, 44, 44, 44, 44, 44, 43, 71, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 70, 43, 43, 85, 43, 43, 43, 80, 7, 7, 7, 7, 7, + 2, 2, 44, 44, 44, 44, 44, 44, 36, 70, 2, 62, 44, 44, 44, 44, + 36, 93, 84, 43, 43, 43, 43, 83, 94, 36, 63, 2, 64, 44, 54, 44, + 7, 7, 7, 7, 7, 62, 44, 44, 173, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 96, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 84, 85, + 43, 84, 83, 43, 2, 2, 2, 44, 36, 36, 36, 36, 36, 36, 36, 70, + 84, 85, 43, 43, 43, 80, 44, 44, 83, 84, 88, 87, 88, 87, 84, 44, + 44, 44, 44, 87, 44, 44, 81, 36, 36, 84, 44, 43, 43, 43, 80, 44, + 43, 43, 80, 44, 44, 44, 44, 44, 84, 85, 43, 43, 83, 83, 84, 85, + 83, 43, 36, 72, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 36, 93, + 84, 43, 43, 44, 84, 84, 43, 85, 61, 2, 2, 2, 2, 44, 44, 44, + 84, 85, 43, 43, 43, 83, 85, 85, 61, 2, 62, 44, 44, 44, 44, 44, + 36, 36, 36, 36, 36, 70, 85, 84, 43, 43, 43, 85, 44, 44, 44, 44, + 27, 96, 44, 44, 44, 44, 44, 81, 101, 101, 101, 101, 101, 101, 101, 175, + 2, 2, 64, 44, 44, 44, 44, 44, 43, 43, 61, 44, 44, 44, 44, 44, + 43, 43, 43, 61, 2, 2, 67, 67, 40, 40, 92, 44, 44, 44, 44, 44, + 7, 7, 7, 7, 7, 173, 27, 27, 27, 81, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 44, 44, 81, 36, 93, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 88, 43, 74, 40, 40, 40, 40, 40, 40, + 36, 44, 44, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 44, 50, 61, + 65, 65, 44, 44, 44, 44, 44, 44, 67, 67, 67, 90, 56, 67, 67, 67, + 67, 67, 179, 85, 43, 67, 179, 84, 84, 180, 65, 65, 65, 181, 43, 43, + 43, 76, 50, 43, 43, 43, 67, 67, 67, 67, 67, 67, 67, 43, 43, 67, + 67, 67, 67, 67, 67, 67, 67, 44, 67, 43, 76, 44, 44, 44, 44, 44, 27, 44, 44, 44, 44, 44, 44, 44, 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, - 16, 16, 107, 16, 16, 16, 16, 16, 11, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 48, 11, 44, 48, 49, 48, 49, 11, 48, 11, - 11, 11, 11, 16, 16, 54, 54, 16, 16, 16, 54, 16, 16, 16, 16, 16, - 16, 16, 11, 49, 11, 48, 49, 11, 11, 11, 48, 11, 11, 11, 48, 16, - 16, 16, 16, 16, 11, 49, 11, 48, 11, 11, 48, 48, 44, 11, 11, 11, - 48, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 11, 11, + 16, 16, 108, 16, 16, 16, 16, 16, 11, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 47, 11, 44, 47, 48, 47, 48, 11, 47, 11, + 11, 11, 11, 16, 16, 53, 53, 16, 16, 16, 53, 16, 16, 16, 16, 16, + 16, 16, 11, 48, 11, 47, 48, 11, 11, 11, 47, 11, 11, 11, 47, 16, + 16, 16, 16, 16, 11, 48, 11, 47, 11, 11, 47, 47, 44, 11, 11, 11, + 47, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 44, 11, 11, 11, 11, 31, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, @@ -3017,21 +3286,19 @@ static RE_UINT8 re_general_category_stage_4[] = { 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, 32, 44, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 36, 36, 80, 36, 36, 36, 36, 36, - 80, 62, 62, 80, 80, 36, 36, 36, 36, 62, 36, 36, 80, 80, 44, 44, - 44, 62, 44, 80, 80, 80, 80, 36, 80, 62, 62, 80, 80, 80, 80, 80, - 80, 62, 62, 80, 36, 62, 36, 36, 36, 62, 36, 36, 80, 36, 62, 62, - 36, 36, 36, 36, 36, 80, 36, 36, 80, 36, 80, 36, 36, 80, 36, 36, - 8, 44, 44, 44, 44, 44, 44, 44, 91, 67, 67, 67, 67, 67, 67, 90, - 27, 27, 27, 27, 27, 96, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, - 67, 90, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 90, 44, 44, 44, - 67, 44, 44, 44, 44, 44, 44, 44, 90, 44, 44, 44, 44, 44, 44, 44, - 67, 67, 67, 91, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 90, 44, - 67, 67, 90, 67, 67, 90, 44, 44, 90, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 91, 67, 90, 44, 67, 67, 67, 67, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 91, 67, 67, 90, 44, 91, 67, 67, 67, 67, 67, - 78, 44, 44, 44, 44, 44, 44, 44, 64, 64, 64, 64, 64, 64, 64, 64, - 163, 163, 163, 163, 163, 163, 163, 44, + 7, 7, 7, 7, 7, 7, 7, 7, 36, 36, 62, 173, 27, 27, 27, 27, + 43, 43, 43, 80, 44, 44, 44, 44, 36, 36, 81, 36, 36, 36, 36, 36, + 81, 62, 62, 81, 81, 36, 36, 36, 36, 62, 36, 36, 81, 81, 44, 44, + 44, 62, 44, 81, 81, 81, 81, 36, 81, 62, 62, 81, 81, 81, 81, 81, + 81, 62, 62, 81, 36, 62, 36, 36, 36, 62, 36, 36, 81, 36, 62, 62, + 36, 36, 36, 36, 36, 81, 36, 36, 81, 36, 81, 36, 36, 81, 36, 36, + 8, 44, 44, 44, 44, 44, 44, 44, 56, 67, 67, 67, 67, 67, 67, 67, + 44, 44, 44, 67, 67, 67, 67, 67, 67, 90, 44, 44, 44, 44, 44, 44, + 67, 67, 67, 67, 90, 44, 44, 44, 67, 67, 67, 67, 67, 67, 90, 44, + 44, 44, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 44, 44, 44, 44, + 67, 67, 56, 67, 67, 67, 67, 67, 67, 90, 56, 67, 67, 67, 67, 67, + 67, 67, 90, 44, 44, 44, 44, 44, 79, 44, 44, 44, 44, 44, 44, 44, + 65, 65, 65, 65, 65, 65, 65, 65, 163, 163, 163, 163, 163, 163, 163, 44, }; static RE_UINT8 re_general_category_stage_5[] = { @@ -3040,27 +3307,27 @@ static RE_UINT8 re_general_category_stage_5[] = { 2, 2, 2, 20, 24, 21, 24, 15, 25, 25, 27, 23, 26, 27, 5, 28, 24, 16, 27, 26, 27, 24, 11, 11, 26, 11, 5, 29, 11, 23, 1, 24, 1, 2, 2, 24, 2, 1, 2, 5, 5, 5, 1, 3, 3, 2, 5, 2, - 4, 4, 26, 26, 4, 26, 6, 6, 0, 0, 4, 2, 23, 0, 1, 23, - 1, 0, 0, 1, 24, 1, 27, 6, 7, 7, 0, 4, 0, 2, 0, 23, - 19, 0, 0, 25, 0, 6, 19, 6, 23, 6, 6, 23, 5, 0, 5, 23, - 16, 16, 16, 0, 23, 25, 27, 27, 4, 5, 5, 6, 6, 5, 23, 5, - 6, 16, 6, 4, 4, 6, 6, 27, 5, 27, 27, 5, 0, 16, 6, 0, - 0, 5, 4, 0, 6, 8, 8, 8, 8, 6, 23, 4, 0, 8, 8, 0, - 27, 25, 11, 27, 27, 0, 0, 27, 23, 27, 5, 8, 8, 5, 23, 11, - 11, 0, 19, 5, 12, 5, 5, 20, 21, 0, 10, 10, 10, 0, 19, 23, - 5, 4, 2, 4, 3, 3, 2, 0, 3, 26, 2, 26, 0, 26, 1, 26, - 26, 0, 12, 12, 12, 16, 19, 19, 28, 29, 20, 28, 13, 14, 16, 12, - 23, 28, 29, 23, 23, 22, 22, 23, 24, 20, 21, 23, 23, 12, 11, 4, - 21, 4, 25, 0, 6, 7, 7, 6, 1, 27, 27, 1, 27, 2, 2, 27, + 4, 4, 26, 26, 4, 26, 6, 6, 0, 0, 4, 2, 1, 23, 1, 0, + 0, 1, 24, 1, 27, 6, 7, 7, 0, 4, 0, 2, 0, 23, 19, 0, + 0, 27, 27, 25, 0, 6, 19, 6, 23, 6, 6, 23, 5, 0, 5, 23, + 23, 0, 16, 16, 23, 25, 27, 27, 16, 0, 4, 5, 5, 6, 6, 5, + 23, 5, 6, 16, 6, 4, 4, 6, 6, 27, 5, 27, 27, 5, 0, 16, + 6, 0, 0, 5, 4, 0, 6, 8, 8, 8, 8, 6, 23, 4, 0, 8, + 8, 0, 11, 27, 27, 0, 0, 25, 23, 27, 5, 8, 8, 5, 23, 11, + 11, 0, 19, 5, 12, 5, 5, 20, 21, 0, 10, 10, 10, 5, 19, 23, + 5, 4, 7, 0, 2, 4, 3, 3, 2, 0, 3, 26, 2, 26, 0, 26, + 1, 26, 26, 0, 12, 12, 12, 16, 19, 19, 28, 29, 20, 28, 13, 14, + 16, 12, 23, 28, 29, 23, 23, 22, 22, 23, 24, 20, 21, 23, 23, 12, + 11, 4, 21, 4, 6, 7, 7, 6, 1, 27, 27, 1, 27, 2, 2, 27, 10, 1, 2, 10, 10, 11, 24, 27, 27, 20, 21, 27, 21, 24, 21, 20, - 24, 0, 2, 6, 27, 4, 5, 10, 19, 20, 21, 21, 27, 10, 19, 4, + 2, 6, 20, 0, 27, 4, 5, 10, 19, 20, 21, 21, 27, 10, 19, 4, 10, 4, 6, 26, 26, 4, 27, 11, 4, 23, 7, 23, 26, 1, 25, 27, 8, 23, 4, 8, 18, 18, 17, 17, 5, 24, 23, 20, 19, 22, 22, 20, - 22, 22, 24, 19, 24, 26, 0, 11, 23, 10, 5, 11, 23, 16, 27, 8, - 8, 16, 16, 6, + 22, 22, 24, 19, 24, 0, 24, 26, 25, 0, 0, 11, 6, 11, 10, 0, + 23, 10, 5, 11, 23, 16, 27, 8, 8, 16, 16, 6, }; -/* General_Category: 8556 bytes. */ +/* General_Category: 9340 bytes. */ RE_UINT32 re_get_general_category(RE_UINT32 ch) { RE_UINT32 code; @@ -3068,9 +3335,9 @@ RE_UINT32 re_get_general_category(RE_UINT32 ch) { RE_UINT32 pos; RE_UINT32 value; - f = ch >> 11; - code = ch ^ (f << 11); - pos = (RE_UINT32)re_general_category_stage_1[f] << 4; + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_general_category_stage_1[f] << 5; f = code >> 7; code ^= f << 7; pos = (RE_UINT32)re_general_category_stage_2[pos + f] << 3; @@ -3088,40 +3355,23 @@ RE_UINT32 re_get_general_category(RE_UINT32 ch) { /* Block. */ static RE_UINT8 re_block_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 10, 11, 12, 12, 12, 12, 13, 14, 15, 15, 15, 16, - 17, 18, 19, 20, 21, 20, 22, 20, 20, 20, 20, 20, 20, 23, 20, 20, - 20, 20, 20, 20, 20, 20, 24, 20, 20, 20, 25, 20, 20, 26, 27, 20, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 29, 30, 31, 32, 20, 20, 20, 20, 20, 20, 20, 33, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 34, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 7, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 15, 16, 15, 15, 15, 15, 17, 15, 18, 19, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 23, 15, 15, 15, 24, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 25, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, }; static RE_UINT8 re_block_stage_2[] = { @@ -3133,35 +3383,54 @@ static RE_UINT8 re_block_stage_2[] = { 65, 65, 66, 67, 68, 68, 69, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 82, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 89, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 103, 104, 104, 104, 104, 104, 104, 104, 105, 106, 106, 106, 106, 106, 106, 106, 106, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, + 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, + 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 108, 108, 108, 108, 109, 110, 110, 110, 110, 110, 111, 112, 113, 114, - 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 119, 119, 119, 119, 119, 119, - 125, 119, 126, 127, 128, 119, 129, 119, 130, 119, 119, 119, 131, 119, 119, 119, - 132, 133, 134, 135, 119, 119, 119, 119, 119, 119, 119, 119, 119, 136, 119, 119, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 119, 126, 126, 126, 119, + 127, 128, 129, 130, 131, 132, 133, 134, 135, 119, 119, 119, 136, 119, 119, 119, + 137, 138, 139, 140, 141, 142, 143, 119, 119, 144, 119, 145, 146, 147, 119, 119, + 119, 148, 119, 119, 119, 149, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 150, 150, 150, 150, 150, 150, 150, 150, 151, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 137, 137, 137, 137, 137, 137, 137, 137, 138, 119, 119, 119, 119, 119, 119, 119, - 139, 139, 139, 139, 139, 139, 139, 139, 140, 119, 119, 119, 119, 119, 119, 119, - 141, 141, 141, 141, 142, 119, 119, 119, 119, 119, 119, 119, 119, 119, 143, 144, - 145, 145, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 146, 146, 147, 147, 148, 119, 149, 119, 150, 150, 150, 150, 150, 150, 150, 150, - 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 151, 151, 119, 119, - 152, 153, 154, 154, 155, 155, 156, 156, 156, 156, 156, 156, 157, 158, 159, 119, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 161, 162, 162, - 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, - 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 163, 164, - 165, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 166, 166, 166, 166, 167, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 168, 119, 169, 170, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, - 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, + 152, 152, 152, 152, 152, 152, 152, 152, 153, 119, 119, 119, 119, 119, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 154, 154, 154, 154, 155, 156, 157, 158, 119, 119, 119, 119, 119, 119, 159, 160, + 161, 161, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 119, 162, 163, 119, 119, 119, 119, 119, 119, + 164, 164, 165, 165, 166, 119, 167, 119, 168, 168, 168, 168, 168, 168, 168, 168, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 169, 170, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 171, 171, 119, 119, + 172, 173, 174, 174, 175, 175, 176, 176, 176, 176, 176, 176, 177, 178, 179, 180, + 181, 181, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, + 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, + 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 183, 184, 184, + 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, + 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 185, 186, + 187, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 188, 188, 188, 188, 189, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 190, 119, 191, 192, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, + 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, + 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, + 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, }; static RE_UINT8 re_block_stage_3[] = { @@ -3186,72 +3455,83 @@ static RE_UINT8 re_block_stage_3[] = { 43, 43, 44, 44, 45, 45, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, 50, 50, 50, 50, 50, 51, 51, 51, 52, 52, 52, 52, 52, 52, 53, 53, - 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 19, 19, 19, 19, 19, - 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, - 59, 59, 59, 59, 59, 60, 60, 60, 19, 19, 19, 19, 61, 62, 62, 62, - 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, - 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, - 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, - 72, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, - 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, - 77, 77, 77, 77, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 80, 80, 80, 80, 80, 80, 80, 80, 81, 81, 82, 82, 82, 82, 82, 82, - 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, - 84, 84, 84, 84, 85, 85, 85, 86, 87, 87, 87, 87, 87, 87, 87, 87, - 88, 88, 88, 88, 88, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, - 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, 91, 91, - 92, 92, 92, 92, 92, 92, 93, 93, 94, 94, 94, 94, 94, 94, 94, 94, - 95, 95, 95, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 98, 98, - 99, 99, 99, 99, 99, 99, 99, 99, 100, 100, 100, 100, 100, 100, 100, 100, - 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 19, 102, - 103, 103, 103, 103, 104, 104, 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, - 106, 106, 106, 107, 107, 107, 107, 107, 107, 108, 109, 109, 110, 110, 110, 111, - 112, 112, 112, 112, 112, 112, 112, 112, 113, 113, 113, 113, 113, 113, 113, 113, - 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 115, 115, 115, 115, - 116, 116, 116, 116, 116, 116, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, - 117, 118, 118, 118, 118, 119, 119, 119, 120, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 120, 121, 121, 121, 121, 121, 121, 122, 122, 122, 122, 122, 122, - 123, 123, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, - 125, 125, 125, 126, 127, 127, 127, 127, 128, 128, 128, 128, 128, 128, 129, 129, - 130, 130, 130, 131, 131, 131, 132, 132, 133, 133, 133, 133, 133, 133, 19, 19, - 134, 134, 134, 134, 134, 134, 135, 135, 136, 136, 136, 136, 136, 136, 137, 137, - 138, 138, 138, 19, 19, 19, 19, 19, 19, 19, 19, 19, 139, 139, 139, 139, - 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 141, 141, 141, 141, 141, - 142, 142, 142, 142, 142, 142, 142, 142, 143, 143, 143, 143, 143, 143, 143, 143, - 144, 144, 144, 144, 144, 144, 144, 144, 145, 145, 145, 145, 145, 145, 145, 145, - 146, 146, 146, 146, 146, 146, 146, 146, 147, 147, 147, 147, 147, 148, 148, 148, - 148, 148, 148, 148, 148, 148, 148, 148, 149, 150, 151, 152, 152, 153, 153, 154, - 154, 154, 154, 154, 154, 154, 154, 154, 155, 155, 155, 155, 155, 155, 155, 155, - 155, 155, 155, 155, 155, 155, 155, 156, 157, 157, 157, 157, 157, 157, 157, 157, - 158, 158, 158, 158, 158, 158, 158, 158, 159, 159, 159, 159, 160, 160, 160, 160, - 160, 161, 161, 161, 161, 162, 162, 162, 19, 19, 19, 19, 19, 19, 19, 19, - 163, 163, 164, 164, 164, 164, 19, 19, 165, 165, 165, 166, 166, 19, 19, 19, - 167, 167, 168, 168, 168, 168, 19, 19, 169, 169, 169, 169, 169, 170, 170, 170, - 171, 171, 171, 19, 19, 19, 19, 19, 172, 172, 172, 172, 173, 173, 19, 19, - 174, 174, 175, 175, 19, 19, 19, 19, 176, 176, 177, 177, 177, 177, 177, 177, - 178, 178, 178, 178, 178, 178, 179, 179, 180, 180, 180, 180, 181, 181, 182, 182, - 183, 183, 183, 183, 183, 19, 19, 19, 19, 19, 19, 19, 19, 19, 184, 184, - 185, 185, 185, 185, 185, 185, 185, 185, 186, 186, 186, 186, 186, 187, 187, 187, - 188, 188, 188, 188, 188, 19, 19, 19, 189, 189, 189, 189, 189, 189, 19, 19, - 190, 190, 190, 190, 190, 19, 19, 19, 191, 191, 191, 191, 191, 191, 191, 191, - 192, 192, 192, 192, 192, 192, 192, 192, 193, 193, 193, 193, 193, 193, 193, 193, - 193, 193, 193, 19, 19, 19, 19, 19, 194, 194, 194, 194, 194, 194, 194, 194, - 194, 194, 194, 194, 19, 19, 19, 19, 195, 195, 195, 195, 195, 195, 195, 195, - 195, 195, 19, 19, 19, 19, 19, 19, 196, 196, 196, 196, 196, 196, 196, 196, - 197, 197, 197, 197, 197, 197, 197, 197, 198, 198, 198, 198, 198, 198, 198, 198, - 199, 199, 199, 199, 199, 19, 19, 19, 200, 200, 200, 200, 200, 200, 201, 201, - 202, 202, 202, 202, 202, 202, 202, 202, 203, 203, 203, 203, 203, 203, 203, 203, - 204, 204, 204, 205, 205, 205, 205, 205, 205, 205, 206, 206, 206, 206, 206, 206, - 207, 207, 207, 207, 207, 207, 207, 207, 208, 208, 208, 208, 208, 208, 208, 208, - 209, 209, 209, 209, 209, 209, 209, 209, 210, 210, 210, 210, 210, 19, 19, 19, - 211, 211, 211, 211, 211, 211, 211, 211, 212, 212, 212, 212, 212, 212, 212, 212, - 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 19, 19, - 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 19, 19, 19, 19, 19, 19, - 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 19, 19, 19, 19, 19, 19, - 217, 217, 217, 217, 217, 217, 217, 217, 218, 218, 218, 218, 218, 218, 218, 218, - 218, 218, 218, 218, 218, 218, 218, 19, 219, 219, 219, 219, 219, 219, 219, 219, - 220, 220, 220, 220, 220, 220, 220, 220, + 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, + 57, 57, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, + 60, 60, 60, 60, 60, 61, 61, 61, 19, 19, 19, 19, 62, 63, 63, 63, + 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, + 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, + 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, + 73, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, + 76, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, + 78, 78, 78, 78, 79, 79, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, + 81, 81, 81, 81, 81, 81, 81, 81, 82, 82, 83, 83, 83, 83, 83, 83, + 84, 84, 84, 84, 84, 84, 84, 84, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 86, 86, 86, 87, 88, 88, 88, 88, 88, 88, 88, 88, + 89, 89, 89, 89, 89, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90, 90, + 91, 91, 91, 91, 91, 91, 91, 91, 92, 92, 92, 92, 92, 92, 92, 92, + 93, 93, 93, 93, 93, 93, 94, 94, 95, 95, 95, 95, 95, 95, 95, 95, + 96, 96, 96, 97, 97, 97, 97, 97, 98, 98, 98, 98, 98, 98, 99, 99, + 100, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101, 101, 101, 101, 101, 101, + 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 19, 103, + 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, 106, 106, 106, 106, 106, 106, + 107, 107, 107, 108, 108, 108, 108, 108, 108, 109, 110, 110, 111, 111, 111, 112, + 113, 113, 113, 113, 113, 113, 113, 113, 114, 114, 114, 114, 114, 114, 114, 114, + 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 116, 116, 116, 116, + 117, 117, 117, 117, 117, 117, 117, 117, 118, 118, 118, 118, 118, 118, 118, 118, + 118, 119, 119, 119, 119, 120, 120, 120, 121, 121, 121, 121, 121, 121, 121, 121, + 121, 121, 121, 121, 122, 122, 122, 122, 122, 122, 123, 123, 123, 123, 123, 123, + 124, 124, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, + 126, 126, 126, 127, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 130, 130, + 131, 131, 131, 132, 132, 132, 133, 133, 134, 134, 134, 134, 134, 134, 135, 135, + 136, 136, 136, 136, 136, 136, 137, 137, 138, 138, 138, 138, 138, 138, 139, 139, + 140, 140, 140, 141, 141, 141, 141, 19, 19, 19, 19, 19, 142, 142, 142, 142, + 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 144, 144, 144, 144, 144, + 145, 145, 145, 145, 145, 145, 145, 145, 146, 146, 146, 146, 146, 146, 146, 146, + 147, 147, 147, 147, 147, 147, 147, 147, 148, 148, 148, 148, 148, 148, 148, 148, + 149, 149, 149, 149, 149, 149, 149, 149, 150, 150, 150, 150, 150, 151, 151, 151, + 151, 151, 151, 151, 151, 151, 151, 151, 152, 153, 154, 155, 155, 156, 156, 157, + 157, 157, 157, 157, 157, 157, 157, 157, 158, 158, 158, 158, 158, 158, 158, 158, + 158, 158, 158, 158, 158, 158, 158, 159, 160, 160, 160, 160, 160, 160, 160, 160, + 161, 161, 161, 161, 161, 161, 161, 161, 162, 162, 162, 162, 163, 163, 163, 163, + 163, 164, 164, 164, 164, 165, 165, 165, 19, 19, 19, 19, 19, 19, 19, 19, + 166, 166, 167, 167, 167, 167, 168, 168, 169, 169, 169, 170, 170, 171, 171, 171, + 172, 172, 173, 173, 173, 173, 19, 19, 174, 174, 174, 174, 174, 175, 175, 175, + 176, 176, 176, 19, 19, 19, 19, 19, 177, 177, 177, 178, 178, 178, 178, 19, + 179, 179, 179, 179, 179, 179, 179, 179, 180, 180, 180, 180, 181, 181, 182, 182, + 183, 183, 183, 19, 19, 19, 19, 19, 184, 184, 185, 185, 19, 19, 19, 19, + 186, 186, 187, 187, 187, 187, 187, 187, 188, 188, 188, 188, 188, 188, 189, 189, + 190, 190, 19, 19, 191, 191, 191, 191, 192, 192, 192, 192, 193, 193, 194, 194, + 195, 195, 195, 19, 19, 19, 19, 19, 196, 196, 196, 196, 196, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 197, 197, 198, 198, 198, 198, 198, 198, 198, 198, + 199, 199, 199, 199, 199, 200, 200, 200, 201, 201, 201, 201, 201, 202, 202, 202, + 203, 203, 203, 203, 203, 203, 204, 204, 205, 205, 205, 205, 205, 19, 19, 19, + 19, 19, 19, 206, 206, 206, 206, 206, 207, 207, 207, 207, 207, 207, 207, 207, + 208, 208, 208, 208, 208, 208, 19, 19, 209, 209, 209, 209, 209, 209, 209, 209, + 210, 210, 210, 210, 210, 210, 19, 19, 211, 211, 211, 211, 211, 19, 19, 19, + 19, 19, 212, 212, 212, 212, 212, 212, 19, 19, 19, 19, 213, 213, 213, 213, + 214, 214, 214, 214, 214, 214, 214, 214, 215, 215, 215, 215, 215, 215, 215, 215, + 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 19, 19, 19, 19, 19, + 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 218, 218, 218, 19, + 19, 19, 19, 19, 19, 219, 219, 219, 220, 220, 220, 220, 220, 220, 220, 220, + 220, 19, 19, 19, 19, 19, 19, 19, 221, 221, 221, 221, 221, 221, 221, 221, + 221, 221, 19, 19, 19, 19, 19, 19, 222, 222, 222, 222, 222, 222, 222, 222, + 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 224, 19, 19, 19, 19, 19, + 225, 225, 225, 225, 225, 225, 225, 225, 226, 226, 226, 226, 226, 226, 226, 226, + 227, 227, 227, 227, 227, 19, 19, 19, 228, 228, 228, 228, 228, 228, 229, 229, + 230, 230, 230, 230, 230, 230, 230, 230, 231, 231, 231, 231, 231, 231, 231, 231, + 231, 231, 231, 231, 231, 231, 19, 19, 232, 232, 232, 232, 232, 232, 232, 232, + 233, 233, 233, 234, 234, 234, 234, 234, 234, 234, 235, 235, 235, 235, 235, 235, + 236, 236, 236, 236, 236, 236, 236, 236, 237, 237, 237, 237, 237, 237, 237, 237, + 238, 238, 238, 238, 238, 238, 238, 238, 239, 239, 239, 239, 239, 240, 240, 240, + 241, 241, 241, 241, 241, 241, 241, 241, 242, 242, 242, 242, 242, 242, 242, 242, + 243, 243, 243, 243, 243, 243, 243, 243, 244, 244, 244, 244, 244, 244, 244, 244, + 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 19, 19, + 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 247, 247, 247, 247, + 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 19, 19, 19, 19, 19, 19, + 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 19, 19, 19, 19, 19, 19, + 249, 249, 249, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250, 250, 250, + 250, 250, 250, 250, 250, 250, 250, 19, 251, 251, 251, 251, 251, 251, 251, 251, + 252, 252, 252, 252, 252, 252, 252, 252, }; static RE_UINT8 re_block_stage_4[] = { @@ -3310,7 +3590,15 @@ static RE_UINT8 re_block_stage_4[] = { 208, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 210, 211, 211, 211, 211, 212, 212, 212, 212, 213, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 215, 216, 216, 216, 216, 217, 217, 217, 217, 218, 218, 218, 218, 219, 219, 219, 219, - 220, 220, 220, 220, + 220, 220, 220, 220, 221, 221, 221, 221, 222, 222, 222, 222, 223, 223, 223, 223, + 224, 224, 224, 224, 225, 225, 225, 225, 226, 226, 226, 226, 227, 227, 227, 227, + 228, 228, 228, 228, 229, 229, 229, 229, 230, 230, 230, 230, 231, 231, 231, 231, + 232, 232, 232, 232, 233, 233, 233, 233, 234, 234, 234, 234, 235, 235, 235, 235, + 236, 236, 236, 236, 237, 237, 237, 237, 238, 238, 238, 238, 239, 239, 239, 239, + 240, 240, 240, 240, 241, 241, 241, 241, 242, 242, 242, 242, 243, 243, 243, 243, + 244, 244, 244, 244, 245, 245, 245, 245, 246, 246, 246, 246, 247, 247, 247, 247, + 248, 248, 248, 248, 249, 249, 249, 249, 250, 250, 250, 250, 251, 251, 251, 251, + 252, 252, 252, 252, }; static RE_UINT8 re_block_stage_5[] = { @@ -3369,10 +3657,18 @@ static RE_UINT8 re_block_stage_5[] = { 208, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 210, 211, 211, 211, 211, 212, 212, 212, 212, 213, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 215, 216, 216, 216, 216, 217, 217, 217, 217, 218, 218, 218, 218, 219, 219, 219, 219, - 220, 220, 220, 220, + 220, 220, 220, 220, 221, 221, 221, 221, 222, 222, 222, 222, 223, 223, 223, 223, + 224, 224, 224, 224, 225, 225, 225, 225, 226, 226, 226, 226, 227, 227, 227, 227, + 228, 228, 228, 228, 229, 229, 229, 229, 230, 230, 230, 230, 231, 231, 231, 231, + 232, 232, 232, 232, 233, 233, 233, 233, 234, 234, 234, 234, 235, 235, 235, 235, + 236, 236, 236, 236, 237, 237, 237, 237, 238, 238, 238, 238, 239, 239, 239, 239, + 240, 240, 240, 240, 241, 241, 241, 241, 242, 242, 242, 242, 243, 243, 243, 243, + 244, 244, 244, 244, 245, 245, 245, 245, 246, 246, 246, 246, 247, 247, 247, 247, + 248, 248, 248, 248, 249, 249, 249, 249, 250, 250, 250, 250, 251, 251, 251, 251, + 252, 252, 252, 252, }; -/* Block: 4288 bytes. */ +/* Block: 4752 bytes. */ RE_UINT32 re_get_block(RE_UINT32 ch) { RE_UINT32 code; @@ -3380,9 +3676,9 @@ RE_UINT32 re_get_block(RE_UINT32 ch) { RE_UINT32 pos; RE_UINT32 value; - f = ch >> 11; - code = ch ^ (f << 11); - pos = (RE_UINT32)re_block_stage_1[f] << 4; + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_block_stage_1[f] << 5; f = code >> 7; code ^= f << 7; pos = (RE_UINT32)re_block_stage_2[pos + f] << 3; @@ -3400,40 +3696,23 @@ RE_UINT32 re_get_block(RE_UINT32 ch) { /* Script. */ static RE_UINT8 re_script_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 12, 12, 12, 12, 13, 14, 14, 14, 14, 15, - 16, 17, 18, 14, 19, 14, 20, 14, 14, 14, 14, 14, 14, 21, 14, 14, - 14, 14, 14, 14, 14, 14, 22, 14, 14, 14, 23, 14, 14, 24, 25, 14, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 26, 7, 27, 28, 14, 14, 14, 14, 14, 14, 14, 29, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 30, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 0, 1, 2, 3, 4, 5, 5, 5, 5, 6, 7, 8, 8, 9, 10, 11, + 12, 13, 14, 15, 10, 10, 16, 10, 10, 10, 10, 17, 10, 18, 19, 20, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, 22, 10, 10, 10, 23, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 24, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, }; static RE_UINT8 re_script_stage_2[] = { @@ -3441,33 +3720,52 @@ static RE_UINT8 re_script_stage_2[] = { 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 33, 34, 35, 36, 37, 37, 37, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 2, 2, 53, 54, - 55, 56, 57, 58, 59, 59, 59, 60, 61, 59, 59, 59, 59, 59, 62, 59, - 63, 63, 59, 59, 59, 59, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, - 74, 75, 76, 77, 78, 79, 80, 59, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 81, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 82, - 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 85, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 32, 32, 32, 32, 32, 32, 32, 32, + 55, 56, 57, 58, 59, 59, 59, 60, 61, 59, 59, 59, 59, 59, 59, 59, + 62, 62, 59, 59, 59, 59, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 59, 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 80, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 81, + 82, 82, 82, 82, 82, 82, 82, 82, 82, 83, 84, 84, 85, 86, 87, 88, + 89, 90, 91, 92, 93, 94, 95, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 98, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 72, 72, 99, 100, 101, 102, 103, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 65, 114, 115, 116, 117, 118, 65, 65, 65, 65, 65, 65, - 119, 65, 120, 121, 122, 65, 123, 65, 124, 65, 65, 65, 125, 65, 65, 65, - 126, 127, 128, 129, 65, 65, 65, 65, 65, 65, 65, 65, 65, 130, 65, 65, - 131, 131, 131, 131, 131, 131, 132, 65, 133, 65, 65, 65, 65, 65, 65, 65, - 134, 134, 134, 134, 134, 134, 134, 134, 135, 65, 65, 65, 65, 65, 65, 65, - 136, 136, 136, 136, 137, 65, 65, 65, 65, 65, 65, 65, 65, 65, 138, 139, - 140, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 59, 141, 142, 143, 144, 65, 145, 65, 146, 147, 148, 59, 59, 149, 59, 150, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 151, 152, 65, 65, - 153, 154, 155, 156, 157, 65, 158, 159, 160, 161, 162, 163, 164, 165, 60, 65, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 166, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 167, 72, - 168, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 72, 72, 72, 72, 168, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 169, 65, 170, 171, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 97, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 71, 71, 99, 100, 101, 102, 103, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 98, 114, 115, 116, 117, 118, 119, 98, 120, 120, 121, 98, + 122, 123, 124, 125, 126, 127, 128, 129, 130, 98, 98, 98, 131, 98, 98, 98, + 132, 133, 134, 135, 136, 137, 138, 98, 98, 139, 98, 140, 141, 142, 98, 98, + 98, 143, 98, 98, 98, 144, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 145, 145, 145, 145, 145, 145, 145, 146, 147, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 148, 148, 148, 148, 148, 148, 148, 148, 149, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 150, 150, 150, 150, 151, 152, 153, 154, 98, 98, 98, 98, 98, 98, 155, 156, + 157, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, 158, 159, 98, 98, 98, 98, 98, 98, + 59, 160, 161, 162, 163, 98, 164, 98, 165, 166, 167, 59, 59, 168, 59, 169, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 170, 171, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 172, 173, 98, 98, + 174, 175, 176, 177, 178, 98, 179, 180, 59, 181, 182, 183, 184, 185, 186, 187, + 188, 189, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 190, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 191, 71, + 192, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 71, 71, 71, 71, 192, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 193, 98, 194, 195, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, }; static RE_UINT16 re_script_stage_3[] = { @@ -3475,363 +3773,369 @@ static RE_UINT16 re_script_stage_3[] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 0, 0, 7, 0, 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 11, 11, 11, 13, 11, 14, 14, 14, 14, 14, 14, 14, 14, 15, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 16, 17, 18, 19, 17, 18, 20, 21, 22, 22, 23, 22, 24, 25, - 26, 27, 28, 28, 29, 30, 31, 32, 28, 28, 28, 28, 28, 33, 28, 28, - 34, 35, 35, 35, 36, 28, 28, 28, 37, 37, 37, 38, 39, 39, 39, 40, - 41, 41, 42, 43, 44, 45, 46, 46, 46, 46, 47, 46, 46, 46, 48, 49, - 50, 50, 50, 50, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, - 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, - 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 46, 124, - 125, 126, 126, 127, 126, 128, 46, 46, 129, 130, 131, 132, 133, 134, 46, 46, - 135, 135, 135, 135, 136, 135, 137, 138, 135, 136, 135, 139, 139, 140, 46, 46, - 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 142, 142, 143, 142, 142, 144, - 145, 145, 145, 145, 145, 145, 145, 145, 146, 146, 146, 146, 147, 148, 146, 146, - 147, 146, 146, 149, 150, 151, 146, 146, 146, 150, 146, 146, 146, 152, 146, 153, - 146, 154, 155, 155, 155, 155, 155, 156, 157, 157, 157, 157, 157, 157, 157, 157, - 158, 159, 160, 160, 160, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, - 171, 171, 171, 171, 171, 172, 173, 173, 174, 175, 176, 176, 176, 176, 176, 177, - 176, 176, 178, 157, 157, 157, 157, 179, 180, 181, 182, 182, 183, 184, 185, 186, - 187, 187, 188, 187, 189, 190, 171, 171, 191, 192, 193, 193, 193, 194, 193, 195, - 196, 196, 197, 46, 46, 46, 46, 46, 198, 198, 198, 198, 199, 198, 198, 200, - 201, 201, 201, 201, 202, 202, 202, 203, 204, 204, 204, 205, 206, 207, 207, 207, - 46, 46, 46, 46, 208, 209, 210, 211, 4, 4, 212, 4, 4, 213, 214, 215, - 4, 4, 4, 216, 8, 8, 217, 218, 11, 219, 11, 11, 219, 220, 11, 221, - 11, 11, 11, 222, 222, 223, 11, 224, 225, 0, 0, 0, 0, 0, 226, 227, - 228, 229, 0, 230, 46, 8, 8, 231, 0, 0, 232, 233, 234, 0, 4, 4, - 235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 236, 0, 0, 237, 46, 230, 46, 0, 0, - 238, 0, 0, 0, 0, 0, 0, 0, 239, 239, 239, 239, 239, 239, 239, 239, - 0, 0, 0, 0, 240, 241, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, - 242, 242, 243, 242, 242, 243, 4, 4, 244, 244, 244, 244, 244, 244, 244, 245, - 142, 142, 143, 246, 246, 246, 247, 248, 146, 249, 250, 250, 250, 250, 14, 14, - 0, 0, 0, 251, 46, 46, 46, 46, 252, 253, 252, 252, 252, 252, 252, 254, - 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 255, 46, 251, - 256, 0, 257, 258, 259, 260, 260, 260, 260, 261, 262, 263, 263, 263, 263, 264, - 265, 266, 267, 268, 145, 145, 145, 145, 269, 0, 266, 270, 0, 0, 236, 263, - 145, 269, 0, 0, 0, 0, 145, 271, 0, 0, 0, 0, 0, 263, 263, 272, - 263, 263, 263, 263, 263, 273, 0, 0, 252, 252, 252, 255, 0, 0, 0, 0, - 252, 252, 252, 252, 274, 46, 46, 46, 275, 275, 275, 275, 275, 275, 275, 275, - 276, 275, 275, 275, 277, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, - 279, 279, 280, 46, 14, 14, 14, 14, 14, 281, 282, 282, 282, 282, 282, 283, - 0, 0, 284, 4, 4, 4, 4, 4, 285, 286, 287, 46, 46, 46, 46, 288, - 289, 289, 290, 241, 291, 291, 291, 292, 293, 293, 293, 293, 294, 295, 50, 296, - 297, 297, 297, 298, 298, 299, 145, 300, 301, 301, 301, 301, 302, 303, 46, 46, - 304, 304, 304, 305, 306, 307, 141, 308, 309, 309, 309, 309, 310, 311, 312, 313, - 314, 315, 250, 46, 46, 46, 46, 46, 46, 46, 46, 46, 312, 312, 316, 317, - 145, 145, 318, 145, 319, 145, 145, 320, 252, 252, 252, 252, 252, 252, 321, 252, - 252, 252, 252, 252, 252, 322, 46, 46, 323, 324, 22, 325, 326, 28, 28, 28, - 28, 28, 28, 28, 327, 328, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 329, 46, 28, 28, 28, 28, 330, 28, 28, 331, 46, 46, 332, - 8, 241, 217, 0, 0, 333, 334, 335, 28, 28, 28, 28, 28, 28, 28, 336, - 238, 0, 1, 2, 1, 2, 337, 262, 263, 338, 145, 269, 339, 340, 341, 342, - 343, 344, 345, 346, 347, 347, 46, 46, 344, 344, 344, 344, 344, 344, 344, 348, - 349, 0, 0, 350, 11, 11, 11, 11, 351, 251, 46, 46, 46, 0, 0, 352, - 353, 354, 355, 355, 355, 356, 46, 46, 357, 358, 359, 360, 361, 46, 46, 46, - 362, 363, 364, 364, 365, 366, 46, 46, 367, 367, 367, 367, 367, 368, 368, 368, - 369, 370, 371, 46, 46, 46, 46, 46, 372, 373, 373, 374, 375, 376, 46, 46, - 377, 378, 379, 380, 46, 46, 46, 46, 381, 381, 382, 383, 46, 46, 46, 46, - 384, 385, 386, 387, 388, 389, 390, 390, 391, 391, 391, 392, 393, 394, 395, 396, - 397, 397, 397, 397, 398, 46, 46, 46, 46, 46, 46, 46, 46, 46, 28, 49, - 399, 399, 399, 399, 400, 401, 399, 46, 402, 402, 402, 402, 403, 404, 405, 406, - 407, 407, 407, 408, 409, 46, 46, 46, 410, 410, 410, 410, 411, 412, 46, 46, - 413, 413, 413, 414, 415, 46, 46, 46, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 417, 46, 416, 416, 416, 416, 416, 416, 418, 419, - 420, 420, 420, 420, 420, 420, 420, 420, 420, 420, 421, 46, 46, 46, 46, 46, - 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 422, 46, 46, 46, 46, - 423, 423, 423, 423, 424, 423, 423, 425, 426, 423, 46, 46, 46, 46, 46, 46, - 427, 46, 46, 46, 46, 46, 46, 46, 0, 0, 0, 0, 0, 0, 0, 428, - 0, 0, 429, 0, 0, 0, 430, 431, 432, 0, 433, 0, 0, 434, 46, 46, - 11, 11, 11, 11, 435, 46, 46, 46, 0, 0, 0, 0, 0, 237, 0, 436, - 0, 0, 0, 0, 0, 226, 0, 0, 0, 437, 438, 439, 440, 0, 0, 0, - 441, 442, 0, 443, 444, 445, 0, 0, 0, 0, 446, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 447, 0, 0, 0, 448, 28, 449, 450, 451, 452, 453, 454, - 455, 456, 457, 456, 46, 46, 46, 327, 0, 0, 251, 0, 0, 0, 0, 0, - 0, 236, 228, 458, 238, 238, 46, 46, 230, 0, 228, 0, 0, 0, 251, 0, - 0, 230, 46, 46, 46, 46, 459, 0, 460, 0, 0, 230, 461, 436, 46, 46, - 0, 0, 462, 463, 0, 0, 0, 240, 0, 236, 0, 0, 464, 46, 0, 462, - 0, 0, 0, 228, 445, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 465, - 0, 0, 0, 434, 236, 0, 466, 46, 46, 46, 46, 46, 46, 46, 46, 467, - 0, 0, 0, 0, 468, 46, 46, 46, 0, 0, 0, 0, 428, 46, 46, 46, - 252, 252, 252, 252, 252, 469, 46, 46, 252, 252, 252, 470, 252, 252, 252, 252, - 252, 321, 46, 46, 46, 46, 46, 46, 471, 46, 0, 0, 0, 0, 0, 0, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 46, + 14, 14, 14, 16, 17, 18, 16, 17, 19, 20, 21, 21, 22, 21, 23, 24, + 25, 26, 27, 27, 28, 29, 30, 31, 27, 27, 27, 27, 27, 32, 27, 27, + 33, 34, 34, 34, 35, 27, 27, 27, 36, 36, 36, 37, 38, 38, 38, 39, + 40, 40, 41, 42, 43, 44, 45, 45, 45, 45, 27, 46, 45, 45, 47, 27, + 48, 48, 48, 48, 48, 49, 50, 48, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, + 123, 124, 124, 125, 124, 126, 45, 45, 127, 128, 129, 130, 131, 132, 45, 45, + 133, 133, 133, 133, 134, 133, 135, 136, 133, 134, 133, 137, 137, 138, 45, 45, + 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 140, 140, 141, 140, 140, 142, + 143, 143, 143, 143, 143, 143, 143, 143, 144, 144, 144, 144, 145, 146, 144, 144, + 145, 144, 144, 147, 148, 149, 144, 144, 144, 148, 144, 144, 144, 150, 144, 151, + 144, 152, 153, 153, 153, 153, 153, 154, 155, 155, 155, 155, 155, 155, 155, 155, + 156, 157, 158, 158, 158, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, + 169, 169, 169, 169, 169, 170, 171, 171, 172, 173, 174, 174, 174, 174, 174, 175, + 174, 174, 176, 155, 155, 155, 155, 177, 178, 179, 180, 180, 181, 182, 183, 184, + 185, 185, 186, 185, 187, 188, 169, 169, 189, 190, 191, 191, 191, 192, 191, 193, + 194, 194, 195, 196, 45, 45, 45, 45, 197, 197, 197, 197, 198, 197, 197, 199, + 200, 200, 200, 200, 201, 201, 201, 202, 203, 203, 203, 204, 205, 206, 206, 206, + 45, 45, 45, 45, 207, 208, 209, 210, 4, 4, 211, 4, 4, 212, 213, 214, + 4, 4, 4, 215, 8, 8, 8, 216, 11, 217, 11, 11, 217, 218, 11, 219, + 11, 11, 11, 220, 220, 221, 11, 222, 223, 0, 0, 0, 0, 0, 224, 225, + 226, 227, 0, 228, 45, 8, 8, 229, 0, 0, 230, 231, 232, 0, 4, 4, + 233, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 235, 45, 234, 45, 0, 0, + 236, 236, 236, 236, 236, 236, 236, 236, 0, 0, 0, 0, 0, 0, 0, 237, + 0, 238, 0, 239, 240, 241, 45, 45, 242, 242, 243, 242, 242, 243, 4, 4, + 244, 244, 244, 244, 244, 244, 244, 245, 140, 140, 141, 246, 246, 246, 247, 248, + 144, 249, 250, 250, 250, 250, 14, 14, 0, 0, 0, 0, 251, 45, 45, 45, + 252, 253, 252, 252, 252, 252, 252, 254, 252, 252, 252, 252, 252, 252, 252, 252, + 252, 252, 252, 252, 252, 255, 45, 256, 257, 0, 258, 259, 260, 261, 261, 261, + 261, 262, 263, 264, 264, 264, 264, 265, 266, 267, 268, 269, 143, 143, 143, 143, + 270, 0, 267, 271, 0, 0, 272, 264, 143, 270, 0, 0, 0, 0, 143, 273, + 0, 0, 0, 0, 0, 264, 264, 274, 264, 264, 264, 264, 264, 275, 0, 0, + 252, 252, 252, 255, 0, 0, 0, 0, 252, 252, 252, 252, 276, 45, 45, 45, + 277, 277, 277, 277, 277, 277, 277, 277, 278, 277, 277, 277, 279, 280, 280, 280, + 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 45, 14, 14, 14, 14, + 14, 283, 284, 284, 284, 284, 284, 285, 0, 0, 286, 4, 4, 4, 4, 4, + 287, 4, 288, 289, 45, 45, 45, 290, 291, 291, 292, 293, 294, 294, 294, 295, + 296, 296, 296, 296, 297, 298, 48, 299, 300, 300, 301, 302, 302, 303, 143, 304, + 305, 305, 305, 305, 306, 307, 139, 308, 309, 309, 309, 310, 311, 312, 139, 139, + 313, 313, 313, 313, 314, 315, 316, 317, 318, 319, 250, 4, 4, 320, 321, 45, + 45, 45, 45, 45, 316, 316, 322, 323, 143, 143, 324, 143, 325, 143, 143, 326, + 45, 45, 45, 45, 45, 45, 45, 45, 252, 252, 252, 252, 252, 252, 327, 252, + 252, 252, 252, 252, 252, 328, 45, 45, 329, 330, 21, 331, 332, 27, 27, 27, + 27, 27, 27, 27, 333, 334, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 335, 45, 27, 27, 27, 27, 336, 27, 27, 337, 45, 45, 338, + 8, 293, 339, 0, 0, 340, 341, 342, 27, 27, 27, 27, 27, 27, 27, 343, + 344, 0, 1, 2, 1, 2, 345, 263, 264, 346, 143, 270, 347, 348, 349, 350, + 351, 352, 353, 354, 355, 355, 45, 45, 352, 352, 352, 352, 352, 352, 352, 356, + 357, 0, 0, 358, 11, 11, 11, 11, 359, 256, 360, 45, 45, 0, 0, 361, + 362, 363, 364, 364, 364, 365, 366, 256, 367, 367, 368, 369, 370, 371, 371, 372, + 373, 374, 375, 375, 376, 377, 45, 45, 378, 378, 378, 378, 378, 379, 379, 379, + 380, 381, 382, 45, 45, 45, 45, 45, 383, 383, 384, 385, 385, 385, 386, 45, + 387, 387, 387, 387, 387, 387, 387, 387, 387, 387, 387, 388, 387, 389, 390, 45, + 391, 392, 392, 393, 394, 395, 396, 396, 397, 398, 399, 45, 45, 45, 45, 45, + 400, 401, 402, 403, 45, 45, 45, 45, 404, 404, 405, 406, 45, 45, 45, 45, + 407, 408, 409, 410, 411, 412, 413, 413, 414, 414, 45, 45, 415, 415, 416, 417, + 418, 418, 418, 419, 420, 421, 422, 423, 424, 425, 426, 45, 45, 45, 45, 45, + 427, 427, 427, 427, 428, 45, 45, 45, 45, 45, 45, 45, 45, 45, 27, 429, + 430, 430, 430, 430, 431, 432, 430, 433, 434, 434, 434, 434, 435, 436, 437, 438, + 439, 439, 439, 440, 441, 442, 442, 443, 444, 444, 444, 444, 445, 446, 447, 448, + 449, 450, 449, 451, 45, 45, 45, 45, 45, 45, 45, 452, 452, 452, 453, 454, + 455, 456, 457, 458, 459, 460, 461, 462, 463, 463, 463, 463, 464, 465, 45, 45, + 466, 466, 466, 467, 468, 45, 45, 45, 469, 469, 469, 469, 470, 471, 45, 45, + 472, 472, 472, 473, 474, 45, 45, 45, 45, 45, 475, 475, 475, 475, 475, 476, + 45, 45, 45, 45, 477, 477, 477, 478, 479, 479, 479, 479, 479, 479, 479, 479, + 479, 480, 45, 45, 45, 45, 45, 45, 479, 479, 479, 479, 479, 479, 481, 482, + 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 484, 45, 45, 45, 45, 45, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 485, 486, 487, 488, 45, + 45, 45, 45, 45, 45, 489, 490, 491, 492, 492, 492, 492, 493, 494, 495, 496, + 492, 45, 45, 45, 45, 45, 45, 45, 497, 497, 497, 497, 498, 497, 497, 499, + 500, 497, 45, 45, 45, 45, 45, 45, 501, 45, 45, 45, 45, 45, 45, 45, + 502, 502, 502, 502, 502, 502, 503, 504, 505, 506, 272, 45, 45, 45, 45, 45, + 0, 0, 0, 0, 0, 0, 0, 507, 0, 0, 508, 0, 0, 0, 509, 510, + 511, 0, 512, 0, 0, 228, 45, 45, 11, 11, 11, 11, 513, 45, 45, 45, + 0, 0, 0, 0, 0, 235, 0, 241, 0, 0, 0, 0, 0, 224, 0, 0, + 0, 514, 515, 516, 517, 0, 0, 0, 518, 519, 0, 520, 521, 522, 0, 0, + 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, 0, 0, 523, 0, 0, 0, + 524, 524, 524, 524, 524, 524, 524, 524, 524, 524, 524, 524, 525, 526, 45, 45, + 527, 27, 528, 529, 530, 531, 532, 533, 534, 535, 536, 535, 45, 45, 45, 333, + 0, 0, 256, 0, 0, 0, 0, 0, 0, 272, 226, 344, 344, 344, 0, 507, + 537, 0, 226, 0, 0, 0, 256, 0, 0, 234, 45, 45, 45, 45, 538, 0, + 539, 0, 0, 234, 540, 241, 45, 45, 0, 0, 537, 0, 0, 0, 0, 228, + 0, 0, 0, 0, 226, 541, 0, 542, 0, 0, 0, 0, 0, 0, 0, 226, + 0, 0, 0, 0, 234, 0, 0, 543, 0, 0, 517, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 544, 0, 0, 0, 0, 0, 0, 0, 0, 45, 537, 272, + 0, 0, 0, 0, 0, 0, 0, 272, 0, 0, 0, 0, 0, 545, 45, 45, + 256, 0, 0, 0, 542, 293, 0, 0, 542, 0, 228, 45, 45, 45, 45, 45, + 252, 252, 252, 252, 252, 546, 45, 45, 252, 252, 252, 547, 252, 252, 252, 252, + 252, 327, 45, 45, 45, 45, 45, 45, 548, 45, 0, 0, 0, 0, 0, 0, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 45, }; -static RE_UINT8 re_script_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, - 2, 2, 2, 2, 3, 0, 0, 0, 2, 2, 3, 0, 0, 4, 0, 0, - 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 6, 8, 6, 6, 9, - 8, 8, 10, 10, 6, 11, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 16, 14, 14, 14, 14, - 14, 14, 14, 14, 8, 8, 8, 8, 17, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 17, 18, 18, 18, - 18, 18, 18, 18, 20, 19, 8, 17, 21, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 8, 8, 8, 8, - 22, 22, 22, 22, 22, 23, 8, 8, 22, 22, 23, 8, 8, 8, 8, 8, - 24, 24, 25, 24, 24, 24, 26, 24, 24, 24, 24, 24, 24, 27, 25, 27, - 24, 24, 24, 24, 24, 24, 24, 24, 26, 24, 24, 24, 24, 28, 5, 5, - 5, 5, 5, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0, 24, 24, 24, - 29, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 27, 24, - 30, 30, 30, 30, 30, 30, 30, 31, 30, 30, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 30, 30, 32, 31, 30, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 8, 8, 8, 8, 8, 8, 8, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 35, 8, 8, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 8, 36, 36, 36, 36, 36, 36, 36, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 8, 39, - 8, 8, 8, 8, 8, 8, 8, 8, 25, 24, 24, 24, 24, 24, 25, 8, - 8, 8, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, - 40, 40, 40, 40, 40, 40, 40, 40, 41, 42, 40, 40, 40, 40, 40, 40, - 40, 40, 0, 40, 40, 40, 40, 40, 40, 40, 40, 40, 43, 40, 40, 40, - 44, 45, 44, 45, 45, 45, 46, 44, 46, 44, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 46, 45, 45, 45, 46, 46, 8, 45, 45, 8, 45, 45, - 45, 45, 46, 44, 46, 44, 45, 46, 8, 8, 8, 44, 8, 8, 45, 44, - 45, 45, 8, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 8, 8, - 47, 48, 47, 48, 48, 49, 8, 47, 49, 47, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 49, 48, 48, 48, 49, 48, 47, 49, 48, 8, 49, 48, - 48, 49, 8, 47, 49, 47, 48, 8, 47, 8, 8, 8, 47, 48, 49, 49, - 8, 8, 8, 48, 48, 48, 48, 48, 48, 48, 48, 8, 8, 8, 8, 8, - 50, 51, 50, 51, 51, 51, 51, 50, 51, 50, 51, 51, 51, 51, 51, 51, - 51, 51, 51, 51, 52, 51, 51, 51, 52, 51, 50, 51, 51, 8, 51, 51, - 51, 51, 51, 50, 51, 50, 51, 8, 52, 8, 8, 8, 8, 8, 8, 8, - 51, 51, 8, 51, 51, 51, 51, 51, 51, 8, 8, 8, 8, 8, 8, 8, - 53, 54, 53, 54, 54, 54, 55, 53, 55, 53, 54, 54, 54, 54, 54, 54, - 54, 54, 54, 54, 55, 54, 54, 54, 55, 54, 53, 54, 54, 8, 54, 54, - 54, 54, 55, 53, 55, 53, 54, 8, 8, 8, 8, 54, 8, 8, 54, 53, - 54, 54, 8, 54, 54, 54, 54, 54, 54, 54, 54, 54, 8, 8, 8, 8, - 8, 56, 57, 56, 56, 58, 8, 56, 58, 56, 56, 8, 57, 58, 58, 56, - 8, 57, 58, 8, 56, 58, 8, 56, 56, 56, 56, 56, 56, 8, 8, 56, - 56, 58, 8, 56, 58, 56, 56, 8, 58, 8, 8, 57, 8, 8, 8, 8, - 8, 8, 8, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 58, 8, 8, - 59, 60, 59, 60, 60, 60, 61, 60, 61, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 61, 60, 60, 60, 60, 60, 59, 60, 60, 8, 59, 60, - 60, 60, 61, 60, 61, 60, 60, 8, 8, 8, 59, 61, 60, 8, 8, 8, - 60, 60, 8, 60, 60, 60, 60, 60, 8, 8, 8, 8, 60, 60, 60, 60, - 8, 62, 63, 62, 62, 62, 64, 62, 64, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 64, 62, 62, 62, 62, 62, 63, 62, 62, 8, 62, 62, - 62, 62, 64, 62, 64, 62, 62, 8, 8, 8, 63, 64, 8, 8, 8, 64, - 62, 62, 8, 62, 62, 62, 62, 62, 63, 64, 8, 8, 8, 8, 8, 8, - 8, 65, 66, 65, 65, 65, 67, 65, 67, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 67, 66, 65, - 65, 65, 67, 65, 67, 65, 65, 67, 8, 8, 8, 66, 8, 8, 8, 8, - 65, 65, 8, 65, 65, 65, 65, 65, 65, 65, 65, 8, 66, 65, 65, 65, - 8, 68, 69, 68, 68, 68, 68, 68, 68, 68, 68, 70, 8, 68, 68, 68, - 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 68, 68, 68, 68, 69, 8, - 68, 68, 68, 70, 8, 70, 8, 69, 68, 68, 70, 70, 68, 68, 68, 68, - 8, 68, 70, 8, 8, 8, 8, 8, 71, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 8, 20, - 72, 72, 72, 72, 72, 72, 8, 8, 74, 75, 75, 74, 75, 75, 74, 8, - 8, 8, 76, 76, 74, 76, 76, 76, 74, 76, 74, 74, 8, 76, 74, 76, - 76, 76, 76, 76, 76, 74, 76, 8, 76, 76, 75, 75, 76, 76, 76, 8, - 76, 76, 76, 76, 76, 8, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 78, 77, 77, 77, 77, 77, 77, 77, 77, 77, 79, 8, - 78, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 79, 77, - 77, 77, 80, 0, 81, 79, 8, 8, 82, 82, 82, 82, 82, 82, 82, 82, - 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 8, 8, 84, 8, - 83, 83, 83, 83, 83, 85, 83, 83, 86, 86, 86, 86, 86, 86, 86, 86, - 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 87, 87, 8, - 87, 87, 87, 88, 88, 87, 87, 8, 88, 87, 87, 8, 87, 87, 87, 88, - 88, 87, 87, 8, 87, 87, 87, 87, 87, 87, 87, 88, 87, 87, 87, 87, - 87, 87, 87, 87, 87, 88, 89, 87, 87, 87, 87, 87, 87, 87, 88, 8, - 87, 87, 87, 87, 87, 8, 8, 8, 90, 90, 90, 90, 90, 90, 90, 90, - 90, 90, 91, 8, 8, 8, 8, 8, 92, 92, 92, 92, 92, 92, 92, 92, - 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 94, 8, - 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 96, 0, 95, - 97, 8, 8, 8, 8, 8, 8, 8, 98, 98, 98, 98, 98, 98, 99, 98, - 98, 98, 99, 8, 8, 8, 8, 8, 100, 100, 100, 100, 100, 100, 100, 100, - 100, 100, 101, 9, 8, 8, 8, 8, 102, 102, 102, 102, 102, 102, 102, 102, - 102, 102, 8, 8, 8, 8, 8, 8, 103, 103, 103, 103, 103, 103, 104, 103, - 104, 103, 8, 8, 8, 8, 8, 8, 105, 105, 105, 105, 105, 105, 105, 105, - 105, 105, 105, 105, 105, 105, 105, 8, 105, 105, 105, 105, 105, 8, 8, 8, - 106, 0, 107, 106, 106, 106, 106, 108, 106, 106, 106, 106, 106, 8, 8, 8, - 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 8, 8, 8, 8, - 106, 106, 106, 106, 106, 108, 8, 8, 92, 92, 92, 8, 8, 8, 8, 8, - 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 110, 8, - 109, 109, 109, 109, 109, 109, 8, 8, 110, 8, 109, 109, 109, 109, 109, 109, - 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 8, - 111, 111, 112, 8, 8, 8, 8, 8, 113, 113, 113, 113, 113, 113, 113, 113, - 113, 113, 113, 113, 113, 113, 8, 8, 113, 113, 113, 113, 113, 8, 8, 8, - 113, 113, 113, 113, 113, 114, 8, 113, 115, 115, 115, 115, 115, 115, 115, 115, - 115, 115, 115, 115, 115, 115, 8, 115, 116, 116, 116, 116, 116, 116, 116, 116, - 116, 116, 116, 116, 116, 116, 116, 117, 116, 116, 116, 116, 116, 116, 117, 118, - 116, 116, 116, 116, 116, 8, 8, 8, 116, 116, 116, 116, 116, 116, 116, 8, - 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 8, 8, - 119, 119, 119, 119, 119, 119, 120, 8, 121, 121, 121, 121, 121, 121, 121, 121, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 8, 8, 8, 8, 122, 122, - 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 8, 124, 123, 123, - 123, 123, 123, 123, 123, 8, 124, 123, 125, 125, 125, 125, 125, 125, 125, 125, - 121, 121, 121, 121, 8, 8, 8, 8, 5, 126, 5, 5, 5, 5, 5, 5, - 126, 5, 5, 5, 126, 0, 127, 0, 0, 0, 126, 9, 8, 8, 8, 8, - 2, 2, 2, 6, 6, 128, 2, 2, 2, 2, 2, 2, 2, 2, 129, 6, - 6, 2, 2, 6, 6, 130, 2, 2, 2, 2, 2, 2, 131, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 129, 5, 5, 5, 132, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 5, 5, 6, 6, 6, 8, 6, 6, 6, 8, - 6, 6, 6, 6, 12, 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 8, - 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 8, 6, 6, 6, 12, 6, - 8, 6, 11, 6, 6, 6, 6, 11, 0, 0, 0, 0, 0, 0, 5, 0, - 0, 0, 9, 0, 0, 0, 0, 0, 1, 8, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 9, 2, 2, 2, 2, 2, 2, 133, 8, - 0, 0, 0, 0, 0, 9, 8, 8, 132, 8, 8, 8, 8, 8, 8, 8, - 0, 0, 0, 10, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 3, 2, 2, 2, 2, 3, 8, 8, 8, - 0, 0, 8, 8, 8, 8, 8, 8, 0, 0, 0, 9, 8, 8, 8, 8, - 20, 0, 0, 0, 0, 0, 0, 0, 134, 134, 134, 134, 134, 134, 134, 134, - 0, 0, 0, 0, 0, 0, 9, 8, 0, 0, 0, 0, 0, 8, 8, 8, - 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 136, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 8, 8, 137, 13, 13, 13, - 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 8, 8, 8, 139, - 140, 8, 8, 8, 8, 8, 8, 139, 87, 87, 87, 88, 8, 8, 8, 8, - 87, 87, 87, 88, 87, 87, 87, 88, 0, 0, 0, 0, 0, 0, 8, 8, - 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 142, 141, 141, - 141, 141, 8, 8, 8, 8, 8, 8, 141, 141, 141, 8, 8, 8, 8, 8, - 0, 0, 143, 143, 0, 0, 0, 0, 143, 141, 141, 141, 141, 5, 5, 86, - 0, 0, 0, 0, 141, 141, 0, 0, 144, 145, 145, 145, 145, 145, 145, 145, - 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 146, 147, 126, 148, 145, - 149, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, - 150, 150, 150, 150, 150, 151, 149, 150, 8, 8, 152, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, - 153, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 154, - 4, 4, 4, 4, 4, 155, 8, 8, 86, 86, 86, 86, 86, 86, 86, 156, - 150, 150, 150, 150, 150, 150, 150, 157, 150, 150, 150, 150, 0, 0, 0, 0, - 141, 141, 141, 141, 141, 141, 158, 8, 159, 159, 159, 159, 159, 159, 159, 159, - 159, 159, 159, 159, 159, 159, 160, 8, 159, 159, 159, 160, 8, 8, 8, 8, - 161, 161, 161, 161, 161, 161, 161, 161, 162, 162, 162, 162, 162, 162, 162, 162, - 162, 162, 162, 162, 162, 162, 8, 8, 14, 14, 14, 14, 8, 8, 8, 163, - 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 8, 8, 8, 8, - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 133, - 2, 2, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 133, 8, 8, - 8, 8, 8, 8, 2, 2, 2, 2, 165, 165, 165, 165, 165, 165, 165, 165, - 165, 165, 165, 165, 165, 165, 8, 8, 166, 166, 166, 166, 166, 166, 166, 166, - 166, 166, 166, 166, 8, 8, 8, 8, 167, 167, 167, 167, 167, 167, 167, 167, - 167, 167, 168, 8, 8, 8, 8, 167, 167, 167, 167, 167, 167, 8, 8, 8, - 40, 40, 40, 40, 40, 40, 8, 8, 169, 169, 169, 169, 169, 169, 169, 169, - 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 8, 8, 8, 8, 8, 171, - 86, 86, 86, 86, 86, 86, 154, 8, 172, 172, 172, 172, 172, 172, 172, 172, - 172, 172, 172, 172, 172, 172, 172, 20, 172, 172, 172, 172, 172, 8, 8, 172, - 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 174, 8, 8, 8, 8, - 173, 173, 173, 173, 173, 173, 173, 8, 173, 173, 173, 173, 173, 8, 173, 173, - 82, 82, 82, 82, 82, 82, 8, 8, 175, 175, 175, 175, 175, 175, 175, 175, - 175, 176, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 177, 175, 175, - 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 179, 8, 8, 8, 8, - 89, 87, 87, 88, 89, 87, 87, 88, 89, 87, 87, 88, 8, 8, 8, 8, - 178, 178, 178, 178, 178, 178, 178, 8, 178, 178, 178, 178, 178, 8, 8, 8, - 86, 86, 8, 8, 8, 8, 8, 8, 86, 86, 86, 154, 8, 153, 86, 86, - 86, 86, 86, 86, 86, 86, 8, 8, 141, 141, 141, 141, 141, 141, 141, 8, - 141, 141, 141, 141, 141, 8, 8, 8, 2, 2, 2, 133, 8, 8, 8, 8, - 8, 17, 18, 18, 8, 8, 21, 22, 22, 22, 22, 23, 22, 22, 23, 23, - 22, 21, 23, 22, 22, 22, 22, 22, 24, 8, 8, 8, 8, 8, 8, 8, - 8, 180, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, - 8, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 8, 8, 8, 8, - 24, 24, 24, 24, 24, 24, 27, 8, 0, 9, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 9, 0, 0, 8, 8, 24, 24, 25, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 25, 20, 0, 0, 0, 150, 150, 150, 150, 150, - 150, 150, 150, 150, 150, 150, 150, 0, 8, 86, 86, 86, 8, 86, 86, 86, - 8, 86, 86, 86, 8, 86, 154, 8, 0, 0, 0, 9, 0, 0, 0, 9, - 8, 8, 8, 8, 20, 0, 0, 8, 181, 181, 181, 181, 181, 181, 182, 181, - 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 183, 181, 181, 181, 181, - 181, 181, 181, 181, 181, 183, 181, 182, 181, 181, 181, 181, 181, 181, 181, 8, - 181, 181, 181, 181, 181, 183, 8, 8, 0, 9, 8, 20, 0, 0, 0, 0, - 0, 0, 8, 20, 0, 0, 0, 0, 6, 6, 6, 6, 6, 11, 8, 8, - 0, 0, 0, 0, 0, 0, 127, 8, 184, 184, 184, 184, 184, 184, 184, 184, - 184, 184, 184, 184, 184, 184, 185, 8, 186, 186, 186, 186, 186, 186, 186, 186, - 187, 8, 8, 8, 8, 8, 8, 8, 188, 188, 188, 188, 188, 188, 188, 188, - 188, 188, 188, 188, 188, 188, 188, 189, 188, 188, 8, 8, 8, 8, 8, 8, - 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 191, 8, 8, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 193, - 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 8, 8, 194, 194, 194, 194, - 194, 194, 194, 8, 8, 8, 8, 8, 195, 195, 195, 195, 195, 195, 195, 195, - 196, 196, 196, 196, 196, 196, 196, 196, 197, 197, 197, 197, 197, 197, 197, 197, - 197, 197, 197, 197, 197, 197, 197, 8, 197, 197, 197, 197, 197, 8, 8, 8, - 198, 198, 198, 8, 199, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, - 198, 198, 198, 200, 199, 8, 199, 200, 201, 201, 201, 201, 201, 201, 201, 201, - 201, 201, 201, 202, 201, 201, 201, 201, 203, 203, 203, 203, 203, 203, 203, 203, - 203, 203, 203, 203, 203, 203, 8, 204, 205, 205, 205, 205, 205, 205, 205, 205, - 205, 205, 205, 205, 205, 8, 8, 206, 207, 207, 207, 207, 207, 207, 207, 207, - 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 8, 8, 8, 208, - 209, 209, 210, 211, 8, 8, 209, 209, 209, 209, 210, 209, 210, 209, 209, 209, - 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 8, 8, 209, 211, 8, 210, - 209, 209, 209, 209, 8, 8, 8, 8, 209, 209, 209, 209, 211, 8, 8, 8, - 212, 212, 212, 212, 212, 212, 212, 212, 213, 213, 213, 213, 213, 213, 213, 213, - 213, 213, 213, 8, 214, 213, 213, 213, 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 8, 215, 215, 215, 215, 216, 216, 216, 216, 216, 216, 216, 216, - 216, 217, 8, 8, 216, 216, 216, 216, 218, 218, 218, 218, 218, 218, 218, 218, - 218, 218, 218, 218, 219, 8, 8, 8, 220, 220, 220, 220, 220, 220, 220, 220, - 220, 220, 220, 220, 220, 220, 220, 8, 8, 220, 220, 220, 220, 220, 220, 220, - 221, 221, 221, 221, 221, 221, 221, 221, 221, 8, 8, 8, 8, 8, 8, 8, - 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 223, 8, 8, 8, - 222, 222, 222, 222, 222, 8, 8, 8, 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 225, 224, 224, 224, 224, 224, 224, 224, 8, 8, 8, 8, 8, 8, - 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 227, 8, 8, 8, - 226, 226, 226, 226, 226, 8, 8, 8, 228, 228, 228, 228, 228, 228, 228, 228, - 228, 228, 228, 228, 8, 8, 8, 8, 228, 228, 228, 228, 228, 8, 8, 8, - 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 230, - 229, 230, 8, 8, 8, 8, 8, 8, 229, 229, 8, 8, 8, 8, 8, 8, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 232, - 164, 164, 164, 164, 233, 8, 8, 8, 234, 234, 234, 234, 234, 234, 234, 234, - 234, 234, 235, 8, 8, 8, 8, 8, 234, 234, 234, 234, 234, 234, 234, 235, - 8, 8, 8, 8, 8, 8, 8, 236, 237, 8, 8, 8, 8, 8, 8, 8, - 0, 0, 0, 8, 8, 8, 8, 8, 0, 0, 0, 9, 20, 0, 0, 0, - 0, 0, 0, 127, 5, 0, 0, 0, 0, 0, 0, 0, 0, 127, 5, 5, - 5, 126, 127, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, - 0, 0, 0, 0, 0, 0, 0, 8, 6, 6, 6, 8, 8, 8, 8, 8, - 0, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 9, 0, - 8, 9, 20, 9, 20, 0, 9, 0, 0, 0, 0, 0, 0, 20, 20, 0, - 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 9, 20, 0, - 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 20, 0, 9, - 0, 0, 9, 9, 8, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, - 24, 24, 180, 24, 24, 24, 24, 24, 180, 25, 25, 180, 180, 24, 24, 24, - 24, 25, 24, 24, 180, 180, 8, 8, 8, 25, 8, 180, 180, 180, 180, 24, - 180, 25, 25, 180, 180, 180, 180, 180, 180, 25, 25, 180, 24, 25, 24, 24, - 24, 25, 24, 24, 180, 24, 25, 25, 24, 24, 24, 24, 24, 180, 24, 24, - 24, 24, 24, 24, 24, 24, 8, 8, 180, 24, 180, 24, 24, 180, 24, 24, - 20, 0, 0, 0, 0, 0, 0, 9, 8, 8, 8, 0, 0, 0, 0, 0, - 238, 9, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 9, 8, 8, 8, - 9, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 20, 0, 0, 0, 0, - 0, 0, 9, 0, 0, 9, 8, 8, 0, 0, 0, 0, 20, 0, 9, 8, - 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 20, 0, 0, - 9, 8, 20, 0, 0, 0, 0, 0, 141, 141, 141, 158, 8, 8, 8, 8, - 141, 141, 158, 8, 8, 8, 8, 8, 20, 8, 8, 8, 8, 8, 8, 8, +static RE_UINT16 re_script_stage_4[] = { + 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 3, 0, 0, 0, 4, 0, + 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 5, 0, 2, 5, 6, 0, + 7, 7, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15, 8, 8, 8, 8, + 16, 8, 8, 8, 17, 18, 18, 18, 19, 19, 19, 19, 19, 20, 19, 19, + 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 21, 22, 22, 22, 24, 21, + 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 12, 12, 26, 26, 27, 12, + 26, 28, 12, 12, 29, 30, 29, 31, 29, 29, 32, 33, 29, 29, 29, 29, + 31, 29, 34, 7, 7, 35, 29, 29, 0, 0, 36, 29, 37, 29, 29, 29, + 29, 29, 29, 30, 38, 38, 38, 39, 38, 38, 38, 38, 38, 38, 40, 41, + 42, 42, 42, 42, 43, 12, 12, 12, 44, 44, 44, 44, 44, 44, 45, 12, + 46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 48, 49, 49, 49, 49, + 49, 49, 49, 50, 12, 12, 12, 12, 51, 12, 12, 12, 12, 29, 29, 29, + 52, 52, 52, 52, 53, 52, 52, 52, 52, 54, 52, 52, 55, 56, 55, 57, + 57, 55, 55, 55, 55, 55, 58, 55, 59, 60, 61, 55, 55, 57, 57, 62, + 12, 63, 12, 64, 55, 60, 55, 55, 55, 55, 55, 12, 65, 65, 66, 67, + 68, 69, 69, 69, 69, 69, 70, 69, 70, 71, 72, 70, 66, 67, 68, 72, + 73, 12, 65, 74, 12, 75, 69, 69, 69, 72, 12, 12, 76, 76, 77, 78, + 78, 77, 77, 77, 77, 77, 79, 77, 79, 76, 80, 77, 77, 78, 78, 80, + 81, 12, 12, 12, 77, 82, 77, 77, 80, 12, 12, 12, 83, 83, 84, 85, + 85, 84, 84, 84, 84, 84, 86, 84, 86, 83, 87, 84, 84, 85, 85, 87, + 12, 88, 12, 89, 84, 88, 84, 84, 84, 84, 12, 12, 90, 91, 92, 90, + 93, 94, 95, 93, 96, 97, 92, 90, 98, 98, 94, 90, 92, 90, 93, 94, + 97, 96, 12, 12, 12, 90, 98, 98, 98, 98, 92, 12, 99, 100, 99, 101, + 101, 99, 99, 99, 99, 99, 101, 99, 99, 99, 102, 100, 99, 101, 101, 102, + 12, 103, 102, 12, 99, 104, 99, 99, 12, 12, 99, 99, 105, 105, 106, 107, + 107, 106, 106, 106, 106, 106, 107, 106, 106, 105, 108, 106, 106, 107, 107, 108, + 12, 109, 12, 110, 106, 111, 106, 106, 109, 12, 12, 12, 112, 112, 113, 114, + 114, 113, 113, 113, 113, 113, 113, 113, 113, 113, 115, 112, 113, 114, 114, 115, + 12, 116, 12, 12, 113, 117, 113, 113, 113, 118, 112, 113, 119, 120, 121, 121, + 121, 122, 119, 121, 121, 121, 121, 121, 123, 121, 121, 124, 121, 122, 125, 126, + 121, 127, 121, 121, 12, 119, 121, 121, 119, 128, 12, 12, 129, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 131, 132, 130, 130, 130, 12, 133, 134, 135, 136, + 12, 137, 138, 137, 138, 139, 140, 138, 137, 137, 141, 142, 137, 135, 137, 142, + 137, 137, 142, 137, 143, 143, 143, 143, 143, 143, 144, 143, 143, 143, 143, 145, + 144, 143, 143, 143, 143, 143, 143, 146, 143, 147, 148, 12, 149, 149, 149, 149, + 150, 150, 150, 150, 150, 151, 12, 152, 150, 150, 153, 150, 154, 154, 154, 154, + 155, 155, 155, 155, 155, 155, 156, 157, 155, 158, 156, 157, 156, 157, 155, 158, + 156, 157, 155, 155, 155, 158, 155, 155, 155, 155, 158, 159, 155, 155, 155, 160, + 155, 155, 157, 12, 161, 161, 161, 161, 161, 162, 12, 12, 163, 163, 163, 163, + 164, 164, 164, 164, 164, 164, 164, 165, 166, 166, 166, 166, 166, 166, 167, 168, + 166, 166, 169, 12, 170, 170, 170, 171, 170, 172, 12, 12, 173, 173, 173, 173, + 173, 174, 12, 12, 175, 175, 175, 175, 175, 12, 12, 12, 176, 176, 176, 177, + 177, 12, 12, 12, 178, 178, 178, 178, 178, 178, 178, 179, 178, 178, 179, 12, + 180, 181, 182, 183, 182, 182, 184, 12, 182, 182, 182, 182, 182, 182, 12, 12, + 182, 182, 183, 12, 163, 185, 12, 12, 186, 186, 186, 186, 186, 186, 186, 187, + 186, 186, 186, 12, 188, 186, 186, 186, 189, 189, 189, 189, 189, 189, 189, 190, + 189, 191, 12, 12, 192, 192, 192, 192, 192, 192, 192, 12, 192, 192, 193, 12, + 192, 192, 194, 195, 196, 196, 196, 196, 196, 196, 196, 197, 198, 198, 198, 198, + 198, 198, 198, 199, 198, 198, 198, 200, 198, 198, 201, 12, 198, 198, 198, 201, + 7, 7, 7, 202, 203, 203, 203, 203, 203, 203, 203, 12, 203, 203, 203, 204, + 205, 205, 205, 205, 206, 206, 206, 206, 206, 12, 12, 206, 207, 207, 207, 207, + 207, 207, 208, 207, 207, 207, 209, 210, 211, 211, 211, 211, 205, 205, 12, 12, + 212, 7, 7, 7, 213, 7, 214, 215, 0, 216, 217, 12, 2, 218, 219, 2, + 2, 2, 2, 220, 221, 218, 222, 2, 2, 2, 223, 2, 2, 2, 2, 224, + 7, 217, 12, 7, 8, 225, 8, 225, 8, 8, 226, 226, 8, 8, 8, 225, + 8, 15, 8, 8, 8, 10, 8, 227, 10, 15, 8, 14, 0, 0, 0, 228, + 0, 229, 0, 0, 230, 0, 0, 231, 0, 0, 0, 232, 2, 2, 2, 233, + 0, 0, 0, 234, 235, 12, 12, 12, 0, 236, 237, 0, 4, 0, 0, 0, + 0, 0, 0, 4, 2, 2, 238, 12, 0, 0, 232, 12, 0, 232, 12, 12, + 239, 239, 239, 239, 0, 240, 0, 0, 0, 234, 0, 0, 0, 0, 234, 241, + 0, 0, 229, 0, 234, 12, 12, 12, 242, 242, 242, 242, 242, 242, 242, 243, + 18, 18, 18, 18, 18, 12, 244, 18, 245, 245, 245, 245, 245, 245, 12, 246, + 247, 12, 12, 246, 155, 158, 12, 12, 155, 158, 155, 158, 232, 12, 12, 12, + 248, 248, 248, 248, 248, 248, 249, 248, 248, 12, 12, 12, 248, 250, 12, 12, + 0, 0, 0, 12, 0, 251, 0, 0, 252, 248, 253, 254, 0, 0, 248, 0, + 255, 256, 256, 256, 256, 256, 256, 256, 256, 257, 258, 259, 260, 261, 261, 261, + 261, 261, 261, 261, 261, 261, 262, 260, 12, 263, 264, 264, 264, 264, 264, 264, + 264, 264, 264, 265, 266, 154, 154, 154, 154, 154, 154, 267, 264, 264, 268, 12, + 0, 12, 12, 12, 154, 154, 154, 269, 261, 261, 261, 270, 261, 261, 0, 0, + 248, 248, 248, 271, 272, 272, 272, 272, 272, 272, 272, 273, 272, 274, 12, 12, + 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 12, 19, 19, 19, 277, + 278, 278, 278, 278, 278, 278, 12, 12, 237, 2, 2, 2, 2, 2, 231, 279, + 2, 2, 2, 280, 280, 12, 12, 12, 12, 281, 2, 2, 282, 282, 282, 282, + 282, 282, 282, 12, 0, 0, 234, 12, 283, 283, 283, 283, 283, 283, 12, 12, + 284, 284, 284, 284, 284, 285, 12, 286, 284, 284, 287, 12, 52, 52, 52, 12, + 288, 288, 288, 288, 288, 288, 288, 289, 290, 290, 290, 290, 290, 12, 12, 291, + 154, 154, 154, 292, 293, 293, 293, 293, 293, 293, 293, 294, 293, 293, 295, 296, + 149, 149, 149, 297, 298, 298, 298, 298, 298, 299, 12, 12, 298, 298, 298, 300, + 298, 298, 300, 298, 301, 301, 301, 301, 302, 12, 12, 12, 12, 12, 303, 301, + 304, 304, 304, 304, 304, 305, 12, 12, 159, 158, 159, 158, 159, 158, 12, 12, + 2, 2, 3, 2, 12, 306, 12, 12, 304, 304, 304, 307, 304, 304, 307, 12, + 154, 12, 12, 12, 154, 267, 308, 154, 154, 154, 154, 12, 248, 248, 248, 250, + 248, 248, 250, 12, 2, 279, 12, 12, 309, 22, 12, 25, 26, 27, 26, 310, + 311, 312, 26, 26, 313, 12, 12, 12, 314, 29, 29, 29, 29, 29, 29, 315, + 316, 29, 29, 29, 29, 29, 12, 12, 29, 29, 29, 313, 7, 7, 7, 217, + 232, 0, 0, 0, 0, 232, 0, 12, 29, 317, 29, 29, 29, 29, 29, 318, + 241, 0, 0, 0, 0, 319, 261, 261, 261, 261, 261, 320, 321, 154, 321, 154, + 321, 154, 321, 292, 0, 232, 0, 232, 12, 12, 241, 234, 322, 322, 322, 323, + 322, 322, 322, 322, 322, 324, 322, 322, 322, 322, 324, 325, 322, 322, 322, 326, + 322, 322, 324, 12, 232, 132, 0, 0, 0, 132, 0, 0, 8, 8, 8, 327, + 327, 12, 12, 12, 0, 0, 0, 328, 329, 329, 329, 329, 329, 329, 329, 330, + 331, 331, 331, 331, 332, 12, 12, 12, 214, 0, 0, 0, 333, 333, 333, 333, + 333, 12, 12, 12, 334, 334, 334, 334, 334, 334, 335, 12, 336, 336, 336, 336, + 336, 336, 337, 12, 338, 338, 338, 338, 338, 338, 338, 339, 340, 340, 340, 340, + 340, 12, 340, 340, 340, 341, 12, 12, 342, 342, 342, 342, 343, 343, 343, 343, + 344, 344, 344, 344, 344, 344, 344, 345, 344, 344, 345, 12, 346, 346, 346, 346, + 346, 346, 12, 12, 347, 347, 347, 347, 347, 12, 12, 348, 349, 349, 349, 349, + 349, 350, 12, 12, 349, 351, 12, 12, 349, 349, 12, 12, 352, 353, 354, 352, + 352, 352, 352, 352, 352, 355, 356, 357, 358, 358, 358, 358, 358, 359, 358, 358, + 360, 360, 360, 360, 361, 361, 361, 361, 361, 361, 361, 362, 12, 363, 361, 361, + 364, 364, 364, 364, 364, 364, 364, 365, 366, 366, 366, 366, 366, 366, 367, 368, + 369, 369, 369, 369, 370, 370, 370, 370, 370, 370, 12, 371, 372, 373, 12, 372, + 372, 374, 374, 372, 372, 372, 372, 372, 372, 12, 375, 376, 372, 372, 12, 12, + 372, 372, 377, 12, 378, 378, 378, 378, 379, 379, 379, 379, 380, 380, 380, 380, + 380, 381, 382, 380, 380, 381, 12, 12, 383, 383, 383, 383, 383, 384, 385, 383, + 386, 386, 386, 386, 386, 387, 386, 386, 388, 388, 388, 388, 389, 12, 388, 388, + 390, 390, 390, 390, 391, 12, 392, 393, 12, 12, 392, 390, 394, 394, 394, 394, + 394, 394, 395, 12, 29, 29, 29, 51, 396, 396, 396, 396, 396, 396, 396, 397, + 398, 396, 396, 396, 12, 12, 12, 399, 400, 400, 400, 400, 401, 12, 12, 12, + 402, 402, 402, 402, 402, 402, 403, 12, 402, 402, 404, 12, 405, 405, 405, 405, + 405, 406, 405, 405, 405, 12, 12, 12, 407, 407, 407, 407, 407, 408, 12, 12, + 409, 409, 409, 409, 409, 409, 410, 411, 409, 409, 412, 12, 120, 121, 121, 121, + 121, 128, 12, 12, 413, 413, 413, 413, 414, 413, 413, 413, 413, 413, 413, 415, + 416, 416, 416, 416, 416, 416, 417, 12, 416, 416, 418, 12, 419, 419, 420, 421, + 421, 420, 420, 420, 420, 420, 422, 420, 422, 419, 423, 420, 420, 421, 421, 423, + 12, 424, 12, 419, 420, 425, 420, 426, 420, 426, 12, 12, 427, 427, 427, 427, + 427, 427, 12, 12, 427, 427, 428, 12, 429, 429, 429, 429, 429, 430, 429, 429, + 429, 429, 430, 12, 431, 431, 431, 431, 431, 432, 12, 12, 431, 431, 433, 12, + 434, 434, 434, 434, 434, 434, 12, 12, 434, 434, 435, 12, 436, 436, 436, 436, + 437, 12, 12, 438, 439, 439, 439, 439, 439, 439, 440, 12, 441, 441, 441, 441, + 441, 441, 442, 12, 441, 441, 441, 443, 441, 442, 12, 12, 444, 444, 444, 444, + 444, 444, 444, 445, 278, 278, 446, 12, 447, 447, 447, 447, 447, 447, 447, 448, + 447, 447, 449, 450, 451, 451, 451, 451, 451, 451, 451, 452, 451, 452, 12, 12, + 453, 453, 453, 453, 453, 454, 12, 12, 453, 453, 455, 453, 455, 453, 453, 453, + 453, 453, 12, 456, 457, 457, 457, 457, 457, 458, 12, 12, 457, 457, 457, 459, + 12, 12, 12, 460, 461, 12, 12, 12, 462, 462, 462, 462, 462, 462, 463, 12, + 462, 462, 462, 464, 462, 462, 464, 12, 462, 462, 465, 462, 0, 234, 12, 12, + 0, 232, 241, 0, 0, 466, 228, 0, 0, 0, 466, 7, 212, 467, 7, 0, + 0, 0, 468, 228, 8, 225, 12, 12, 0, 0, 0, 229, 469, 470, 241, 229, + 0, 0, 471, 241, 0, 241, 0, 0, 0, 471, 232, 241, 0, 229, 0, 229, + 0, 0, 471, 232, 0, 472, 240, 0, 229, 0, 0, 0, 0, 0, 0, 240, + 473, 473, 473, 473, 473, 474, 473, 473, 473, 475, 12, 12, 29, 476, 29, 29, + 477, 478, 476, 29, 51, 29, 479, 12, 480, 314, 479, 476, 477, 478, 479, 479, + 477, 478, 51, 29, 51, 29, 476, 481, 29, 29, 482, 29, 29, 29, 29, 12, + 476, 476, 482, 29, 0, 0, 0, 483, 12, 240, 0, 0, 484, 12, 12, 12, + 0, 0, 483, 12, 12, 0, 0, 0, 0, 0, 12, 12, 0, 0, 471, 0, + 232, 241, 0, 0, 0, 483, 12, 12, 248, 485, 12, 12, 248, 271, 12, 12, + 486, 12, 12, 12, }; static RE_UINT8 re_script_stage_5[] = { - 1, 1, 1, 2, 2, 2, 2, 1, 35, 35, 41, 41, 3, 3, 1, 3, - 0, 0, 1, 0, 3, 1, 3, 0, 0, 3, 55, 55, 4, 4, 4, 41, - 41, 4, 0, 5, 5, 5, 5, 0, 0, 1, 0, 6, 6, 6, 6, 0, - 7, 7, 7, 0, 1, 7, 7, 1, 7, 41, 41, 7, 8, 8, 0, 8, - 8, 0, 9, 9, 66, 66, 66, 0, 82, 82, 82, 0, 95, 95, 95, 0, - 10, 10, 10, 41, 41, 10, 0, 10, 0, 11, 11, 11, 11, 0, 0, 12, - 12, 12, 12, 0, 0, 13, 13, 13, 13, 0, 0, 14, 14, 14, 14, 0, - 15, 15, 0, 15, 15, 0, 0, 16, 16, 16, 16, 0, 17, 17, 0, 17, - 17, 0, 18, 18, 0, 18, 18, 0, 19, 19, 0, 19, 19, 0, 0, 20, - 20, 20, 20, 0, 0, 21, 21, 0, 21, 21, 22, 22, 0, 22, 22, 0, - 22, 1, 1, 22, 23, 23, 24, 24, 0, 24, 24, 1, 25, 25, 26, 26, - 26, 0, 0, 26, 27, 27, 27, 0, 28, 28, 29, 29, 29, 0, 30, 30, - 30, 1, 30, 0, 42, 42, 42, 0, 43, 43, 43, 1, 44, 44, 45, 45, - 45, 0, 31, 31, 32, 32, 32, 1, 32, 0, 46, 46, 46, 0, 47, 47, - 47, 0, 56, 56, 56, 0, 54, 54, 78, 78, 78, 0, 0, 78, 62, 62, - 62, 0, 67, 67, 93, 93, 68, 68, 0, 68, 69, 69, 41, 1, 1, 41, - 3, 4, 2, 3, 3, 2, 4, 2, 41, 0, 2, 0, 53, 53, 57, 57, - 57, 0, 0, 55, 58, 58, 0, 58, 58, 0, 36, 36, 0, 36, 1, 36, - 0, 33, 33, 33, 33, 0, 0, 41, 1, 33, 1, 34, 34, 34, 34, 1, - 0, 35, 0, 25, 25, 0, 35, 0, 25, 1, 34, 0, 36, 0, 37, 37, - 37, 0, 83, 83, 70, 70, 0, 4, 84, 84, 59, 59, 65, 65, 71, 71, - 71, 0, 72, 72, 73, 73, 0, 73, 85, 85, 77, 77, 77, 0, 79, 79, - 79, 0, 0, 79, 86, 86, 86, 0, 0, 7, 48, 48, 0, 48, 48, 0, - 74, 74, 74, 0, 75, 75, 75, 0, 38, 38, 38, 0, 39, 39, 39, 0, - 49, 49, 0, 49, 60, 60, 40, 40, 50, 50, 51, 51, 52, 52, 52, 0, - 0, 52, 87, 87, 0, 87, 64, 64, 0, 64, 76, 76, 0, 76, 98, 98, - 97, 97, 61, 61, 0, 61, 61, 0, 88, 88, 80, 80, 0, 80, 89, 89, - 90, 90, 90, 0, 91, 91, 91, 0, 94, 94, 92, 92, 101, 101, 101, 0, - 96, 96, 96, 0, 100, 100, 100, 0, 102, 102, 63, 63, 63, 0, 81, 81, - 81, 0, 84, 0, 99, 99, 99, 0, 0, 99, 34, 33, 33, 1, + 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, + 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 35, 35, 41, 41, 41, 41, + 3, 3, 3, 3, 1, 3, 3, 3, 0, 0, 3, 3, 3, 3, 1, 3, + 0, 0, 0, 0, 3, 1, 3, 1, 3, 3, 3, 0, 3, 0, 3, 3, + 3, 3, 0, 3, 3, 3, 55, 55, 55, 55, 55, 55, 4, 4, 4, 4, + 4, 41, 41, 4, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, + 0, 1, 5, 0, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, + 6, 0, 0, 0, 7, 7, 7, 7, 7, 1, 7, 7, 1, 7, 7, 7, + 7, 7, 7, 1, 1, 0, 7, 1, 7, 7, 7, 41, 41, 41, 7, 7, + 1, 1, 7, 7, 41, 7, 7, 7, 8, 8, 8, 8, 8, 8, 0, 8, + 8, 8, 8, 0, 0, 8, 8, 8, 9, 9, 9, 9, 9, 9, 0, 0, + 66, 66, 66, 66, 66, 66, 66, 0, 82, 82, 82, 82, 82, 82, 0, 0, + 82, 82, 82, 0, 95, 95, 95, 95, 0, 0, 95, 0, 7, 7, 7, 0, + 10, 10, 10, 10, 10, 41, 41, 10, 1, 1, 10, 10, 11, 11, 11, 11, + 0, 11, 11, 11, 11, 0, 0, 11, 11, 0, 11, 11, 11, 0, 11, 0, + 0, 0, 11, 11, 11, 11, 0, 0, 11, 11, 11, 0, 0, 0, 0, 11, + 11, 11, 0, 11, 0, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 12, + 12, 0, 0, 12, 12, 12, 12, 12, 12, 0, 12, 12, 0, 12, 12, 0, + 12, 12, 0, 0, 0, 12, 0, 0, 12, 0, 12, 0, 0, 0, 12, 12, + 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, 0, 13, 13, 0, 13, 13, + 13, 13, 0, 0, 13, 0, 0, 0, 0, 0, 13, 13, 0, 14, 14, 14, + 14, 14, 14, 14, 14, 0, 0, 14, 14, 0, 14, 14, 14, 14, 0, 0, + 0, 0, 14, 14, 14, 14, 0, 14, 0, 0, 15, 15, 0, 15, 15, 15, + 15, 15, 15, 0, 15, 0, 15, 15, 15, 15, 0, 0, 0, 15, 15, 0, + 0, 0, 0, 15, 15, 0, 0, 0, 15, 15, 15, 15, 16, 16, 16, 16, + 0, 16, 16, 16, 16, 0, 16, 16, 16, 16, 0, 0, 0, 16, 16, 0, + 0, 0, 16, 16, 0, 17, 17, 17, 17, 17, 17, 17, 17, 0, 17, 17, + 17, 17, 0, 0, 0, 17, 17, 0, 0, 0, 17, 0, 0, 0, 17, 17, + 0, 18, 18, 18, 18, 18, 18, 18, 18, 0, 18, 18, 18, 18, 18, 0, + 0, 0, 0, 18, 0, 0, 18, 18, 18, 18, 0, 0, 0, 0, 19, 19, + 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 19, 19, 0, 19, + 0, 19, 0, 0, 0, 0, 19, 0, 0, 0, 0, 19, 19, 0, 19, 0, + 19, 0, 0, 0, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, + 0, 0, 0, 1, 0, 21, 21, 0, 21, 0, 0, 21, 21, 0, 21, 0, + 0, 21, 0, 0, 21, 21, 21, 21, 0, 21, 21, 21, 0, 21, 0, 21, + 0, 0, 21, 21, 21, 21, 0, 21, 21, 21, 0, 0, 22, 22, 22, 22, + 0, 22, 22, 22, 22, 0, 0, 0, 22, 0, 22, 22, 22, 1, 1, 1, + 1, 22, 22, 0, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 0, 24, + 0, 24, 0, 0, 24, 24, 24, 1, 25, 25, 25, 25, 26, 26, 26, 26, + 26, 0, 26, 26, 26, 26, 0, 0, 26, 26, 26, 0, 0, 26, 26, 26, + 26, 0, 0, 0, 27, 27, 27, 27, 27, 0, 0, 0, 28, 28, 28, 28, + 29, 29, 29, 29, 29, 0, 0, 0, 30, 30, 30, 30, 30, 30, 30, 1, + 1, 1, 30, 30, 30, 0, 0, 0, 42, 42, 42, 42, 42, 0, 42, 42, + 42, 0, 0, 0, 43, 43, 43, 43, 43, 1, 1, 0, 44, 44, 44, 44, + 45, 45, 45, 45, 45, 0, 45, 45, 31, 31, 31, 31, 31, 31, 0, 0, + 32, 32, 1, 1, 32, 1, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, + 32, 32, 0, 0, 28, 28, 0, 0, 46, 46, 46, 46, 46, 46, 46, 0, + 46, 0, 0, 0, 47, 47, 47, 47, 47, 47, 0, 0, 47, 0, 0, 0, + 56, 56, 56, 56, 56, 56, 0, 0, 56, 56, 56, 0, 0, 0, 56, 56, + 54, 54, 54, 54, 0, 0, 54, 54, 78, 78, 78, 78, 78, 78, 78, 0, + 78, 0, 0, 78, 78, 78, 0, 0, 41, 41, 41, 0, 62, 62, 62, 62, + 62, 0, 0, 0, 67, 67, 67, 67, 93, 93, 93, 93, 68, 68, 68, 68, + 0, 0, 0, 68, 68, 68, 0, 0, 0, 68, 68, 68, 69, 69, 69, 69, + 41, 41, 41, 1, 41, 1, 41, 41, 41, 1, 1, 1, 1, 41, 1, 1, + 41, 1, 1, 0, 41, 41, 0, 0, 2, 2, 3, 3, 3, 3, 3, 4, + 2, 3, 3, 3, 3, 3, 2, 2, 3, 3, 3, 2, 4, 2, 2, 2, + 2, 2, 2, 3, 3, 3, 0, 0, 0, 3, 0, 3, 0, 3, 3, 3, + 41, 41, 1, 1, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, 1, 2, + 1, 1, 1, 0, 2, 0, 0, 0, 1, 1, 0, 0, 41, 0, 0, 0, + 1, 1, 3, 1, 1, 1, 2, 2, 2, 1, 0, 0, 53, 53, 53, 53, + 0, 0, 1, 1, 0, 1, 1, 1, 57, 57, 57, 57, 57, 57, 57, 0, + 0, 55, 55, 55, 58, 58, 58, 58, 0, 0, 0, 58, 58, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 0, 36, 36, 36, 0, 0, 1, 36, 1, 36, + 1, 36, 36, 36, 36, 36, 41, 41, 41, 41, 25, 25, 0, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 0, 0, 41, 41, 1, 1, 33, 33, 33, + 1, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 1, 0, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, + 35, 35, 35, 0, 25, 25, 25, 1, 34, 34, 34, 0, 36, 0, 0, 0, + 37, 37, 37, 37, 37, 0, 0, 0, 37, 37, 37, 0, 83, 83, 83, 83, + 70, 70, 70, 70, 4, 4, 0, 4, 84, 84, 84, 84, 2, 2, 2, 0, + 2, 2, 0, 0, 0, 0, 0, 2, 59, 59, 59, 59, 65, 65, 65, 65, + 71, 71, 71, 71, 71, 0, 0, 0, 0, 0, 71, 71, 71, 71, 0, 0, + 72, 72, 72, 72, 72, 72, 1, 72, 73, 73, 73, 73, 0, 0, 0, 73, + 25, 0, 0, 0, 85, 85, 85, 85, 85, 85, 0, 1, 85, 85, 0, 0, + 0, 0, 85, 85, 23, 23, 23, 0, 77, 77, 77, 77, 77, 77, 77, 0, + 77, 77, 0, 0, 79, 79, 79, 79, 79, 79, 79, 0, 0, 0, 0, 79, + 86, 86, 86, 86, 86, 86, 86, 0, 2, 3, 0, 0, 86, 86, 0, 0, + 0, 0, 0, 25, 0, 0, 0, 5, 6, 0, 6, 0, 6, 6, 0, 6, + 6, 0, 6, 6, 7, 7, 0, 0, 0, 0, 0, 7, 7, 7, 1, 1, + 0, 0, 7, 7, 7, 0, 7, 7, 7, 0, 0, 1, 1, 1, 34, 34, + 34, 34, 1, 1, 0, 0, 25, 25, 48, 48, 48, 48, 0, 48, 48, 48, + 48, 48, 48, 0, 48, 48, 0, 48, 48, 48, 0, 0, 3, 0, 0, 0, + 1, 41, 0, 0, 74, 74, 74, 74, 74, 0, 0, 0, 75, 75, 75, 75, + 75, 0, 0, 0, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 0, + 120, 120, 120, 120, 120, 120, 120, 0, 49, 49, 49, 49, 49, 49, 0, 49, + 60, 60, 60, 60, 60, 60, 0, 0, 40, 40, 40, 40, 50, 50, 50, 50, + 51, 51, 51, 51, 51, 51, 0, 0, 106, 106, 106, 106, 103, 103, 103, 103, + 0, 0, 0, 103, 110, 110, 110, 110, 110, 110, 110, 0, 110, 110, 0, 0, + 52, 52, 52, 52, 52, 52, 0, 0, 52, 0, 52, 52, 52, 52, 0, 52, + 52, 0, 0, 0, 52, 0, 0, 52, 87, 87, 87, 87, 87, 87, 0, 87, + 118, 118, 118, 118, 117, 117, 117, 117, 117, 117, 117, 0, 0, 0, 0, 117, + 64, 64, 64, 64, 0, 0, 0, 64, 76, 76, 76, 76, 76, 76, 0, 0, + 0, 0, 0, 76, 98, 98, 98, 98, 97, 97, 97, 97, 0, 0, 97, 97, + 61, 61, 61, 61, 0, 61, 61, 0, 0, 61, 61, 61, 61, 61, 61, 0, + 0, 0, 0, 61, 61, 0, 0, 0, 88, 88, 88, 88, 116, 116, 116, 116, + 112, 112, 112, 112, 112, 112, 112, 0, 0, 0, 0, 112, 80, 80, 80, 80, + 80, 80, 0, 0, 0, 80, 80, 80, 89, 89, 89, 89, 89, 89, 0, 0, + 90, 90, 90, 90, 90, 90, 90, 0, 121, 121, 121, 121, 121, 121, 0, 0, + 0, 121, 121, 121, 121, 0, 0, 0, 91, 91, 91, 91, 91, 0, 0, 0, + 94, 94, 94, 94, 94, 94, 0, 0, 0, 0, 94, 94, 0, 0, 0, 94, + 92, 92, 92, 92, 92, 92, 0, 0, 101, 101, 101, 101, 101, 0, 0, 0, + 101, 101, 0, 0, 96, 96, 96, 96, 96, 0, 96, 96, 111, 111, 111, 111, + 111, 111, 111, 0, 100, 100, 100, 100, 100, 0, 0, 0, 0, 100, 0, 0, + 100, 100, 100, 0, 109, 109, 109, 109, 109, 109, 0, 109, 109, 109, 0, 0, + 123, 123, 123, 123, 123, 123, 123, 0, 123, 123, 0, 0, 0, 107, 107, 107, + 107, 107, 107, 107, 107, 0, 0, 107, 107, 0, 107, 107, 107, 107, 0, 0, + 0, 0, 0, 107, 0, 0, 107, 107, 107, 0, 0, 0, 124, 124, 124, 124, + 124, 124, 0, 0, 122, 122, 122, 122, 122, 122, 0, 0, 114, 114, 114, 114, + 114, 0, 0, 0, 114, 114, 0, 0, 102, 102, 102, 102, 102, 102, 0, 0, + 125, 125, 125, 125, 125, 125, 125, 0, 0, 0, 0, 125, 119, 119, 119, 119, + 119, 0, 0, 0, 63, 63, 63, 63, 63, 0, 0, 0, 63, 63, 63, 0, + 81, 81, 81, 81, 81, 81, 81, 0, 84, 0, 0, 0, 115, 115, 115, 115, + 115, 115, 115, 0, 115, 115, 0, 0, 0, 0, 115, 115, 104, 104, 104, 104, + 104, 104, 0, 0, 108, 108, 108, 108, 108, 108, 0, 0, 108, 108, 0, 108, + 0, 108, 108, 108, 99, 99, 99, 99, 99, 0, 0, 0, 99, 99, 99, 0, + 0, 0, 0, 99, 34, 33, 0, 0, 105, 105, 105, 105, 105, 105, 105, 0, + 105, 0, 0, 0, 105, 105, 0, 0, 1, 1, 1, 41, 1, 41, 41, 41, + 1, 1, 41, 41, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 0, 113, 113, 113, 113, 113, 0, 0, 113, 113, 113, 113, 0, + 0, 7, 7, 7, 0, 7, 7, 0, 7, 0, 0, 7, 0, 7, 0, 7, + 0, 0, 7, 0, 7, 0, 7, 0, 7, 7, 0, 7, 1, 0, 0, 0, + 33, 1, 1, 0, 36, 36, 36, 0, 0, 1, 0, 0, }; -/* Script: 8046 bytes. */ +/* Script: 10548 bytes. */ RE_UINT32 re_get_script(RE_UINT32 ch) { RE_UINT32 code; @@ -3839,18 +4143,18 @@ RE_UINT32 re_get_script(RE_UINT32 ch) { RE_UINT32 pos; RE_UINT32 value; - f = ch >> 11; - code = ch ^ (f << 11); - pos = (RE_UINT32)re_script_stage_1[f] << 4; + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_script_stage_1[f] << 5; f = code >> 7; code ^= f << 7; pos = (RE_UINT32)re_script_stage_2[pos + f] << 3; f = code >> 4; code ^= f << 4; - pos = (RE_UINT32)re_script_stage_3[pos + f] << 3; - f = code >> 1; - code ^= f << 1; - pos = (RE_UINT32)re_script_stage_4[pos + f] << 1; + pos = (RE_UINT32)re_script_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_script_stage_4[pos + f] << 2; value = re_script_stage_5[pos + code]; return value; @@ -3897,25 +4201,25 @@ static RE_UINT8 re_word_break_stage_2[] = { 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 84, 85, 2, 2, 86, 87, 88, 89, 90, 91, - 92, 93, 94, 95, 57, 96, 97, 98, 2, 99, 57, 57, 57, 57, 57, 57, - 100, 57, 101, 102, 103, 57, 104, 57, 105, 57, 57, 57, 57, 57, 57, 57, - 106, 107, 108, 109, 57, 57, 57, 57, 57, 57, 57, 57, 57, 110, 57, 57, + 92, 93, 94, 95, 57, 96, 97, 98, 2, 99, 100, 57, 2, 2, 101, 57, + 102, 103, 104, 105, 106, 107, 108, 109, 110, 57, 57, 57, 57, 57, 57, 57, + 111, 112, 113, 114, 115, 116, 117, 57, 57, 118, 57, 119, 120, 121, 57, 57, + 57, 122, 57, 57, 57, 123, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 2, 2, 2, 2, 2, 2, 2, 124, 125, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 2, 2, 2, 2, 2, 2, 111, 57, 112, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 2, 2, 2, 2, 2, 2, 2, 2, 113, 57, 57, 57, 57, 57, 57, 57, + 2, 2, 2, 2, 2, 2, 2, 2, 126, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 2, 2, 2, 2, 114, 57, 57, 57, 57, 57, 57, 57, 57, 57, 115, 116, - 117, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 118, 119, 120, 57, 57, 57, 121, 122, 123, 2, 2, 124, 125, 126, + 2, 2, 2, 2, 127, 128, 129, 130, 57, 57, 57, 57, 57, 57, 131, 132, + 133, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 134, 135, 57, 57, 57, 57, 57, 57, + 57, 57, 136, 137, 138, 57, 57, 57, 139, 140, 141, 2, 2, 142, 143, 144, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 127, 128, 57, 57, - 57, 57, 57, 129, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 2, 145, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 146, 147, 57, 57, + 57, 57, 148, 149, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 130, 57, 131, 132, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 150, 57, 151, 152, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, }; @@ -3927,130 +4231,150 @@ static RE_UINT8 re_word_break_stage_3[] = { 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 21, 22, 23, 7, 7, 24, 7, 7, 7, 7, 7, 7, 7, 7, 7, 25, 7, 26, 27, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 0, 6, 7, 7, 7, 14, 28, 6, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 14, 28, 6, 7, 7, 7, 7, 29, 30, 19, 19, 19, 19, 31, 32, 0, 33, 33, 33, 34, 35, 0, 36, 37, 19, 38, 7, 7, 7, 7, 7, 39, 19, 19, 4, 40, 41, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 42, 43, 44, 45, 4, 46, 0, 47, 48, 7, 7, 7, 19, 19, 19, 49, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 50, 19, 51, 0, 4, 52, 7, 7, 7, 39, 53, 54, 7, 7, 50, 55, 56, 57, 0, 0, 7, 7, 7, 58, 0, 0, 0, 0, - 0, 0, 0, 0, 59, 17, 0, 0, 0, 0, 0, 0, 60, 19, 19, 61, - 62, 7, 7, 7, 7, 7, 7, 63, 19, 19, 64, 7, 65, 4, 6, 6, - 66, 67, 68, 7, 7, 59, 69, 70, 71, 72, 73, 74, 65, 4, 75, 0, - 66, 76, 68, 7, 7, 59, 77, 78, 79, 80, 81, 82, 83, 4, 84, 0, - 66, 25, 24, 7, 7, 59, 85, 70, 31, 86, 87, 0, 65, 4, 0, 0, - 66, 67, 68, 7, 7, 59, 85, 70, 71, 80, 88, 74, 65, 4, 28, 0, + 0, 0, 0, 0, 7, 7, 9, 0, 0, 0, 0, 0, 59, 19, 19, 19, + 60, 7, 7, 7, 7, 7, 7, 61, 19, 19, 62, 7, 63, 4, 6, 7, + 64, 65, 66, 7, 7, 67, 68, 69, 70, 71, 72, 73, 63, 4, 74, 0, + 75, 76, 66, 7, 7, 67, 77, 78, 79, 80, 81, 82, 83, 4, 84, 0, + 75, 25, 24, 7, 7, 67, 85, 69, 31, 86, 87, 0, 63, 4, 0, 0, + 75, 65, 66, 7, 7, 67, 85, 69, 70, 80, 88, 73, 63, 4, 28, 0, 89, 90, 91, 92, 93, 90, 7, 94, 95, 96, 97, 0, 83, 4, 0, 0, - 66, 20, 59, 7, 7, 59, 98, 99, 100, 96, 101, 75, 65, 4, 0, 0, - 102, 20, 59, 7, 7, 59, 98, 70, 100, 96, 101, 103, 65, 4, 104, 0, - 102, 20, 59, 7, 7, 7, 7, 105, 100, 106, 73, 0, 65, 4, 0, 107, - 102, 7, 14, 107, 7, 7, 24, 108, 14, 109, 110, 19, 0, 0, 111, 0, - 0, 0, 0, 0, 0, 0, 112, 113, 73, 61, 4, 114, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 112, 115, 0, 116, 4, 114, 0, 0, 0, 0, - 87, 0, 0, 117, 4, 114, 118, 119, 7, 6, 7, 7, 7, 17, 30, 19, - 100, 120, 19, 30, 19, 19, 19, 121, 122, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 123, 19, 61, 4, 114, 88, 124, 125, 116, 126, 0, - 127, 31, 4, 128, 7, 7, 7, 7, 25, 129, 7, 7, 7, 7, 7, 130, + 98, 20, 67, 7, 7, 67, 7, 99, 100, 96, 101, 74, 63, 4, 0, 0, + 75, 20, 67, 7, 7, 67, 102, 69, 100, 96, 101, 103, 63, 4, 104, 0, + 75, 20, 67, 7, 7, 7, 7, 105, 100, 106, 72, 0, 63, 4, 0, 107, + 108, 7, 14, 107, 7, 7, 24, 109, 14, 110, 111, 19, 83, 4, 112, 0, + 0, 0, 0, 0, 0, 0, 113, 114, 72, 115, 4, 116, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 113, 117, 0, 118, 4, 116, 0, 0, 0, 0, + 87, 0, 0, 119, 4, 116, 120, 121, 7, 6, 7, 7, 7, 17, 30, 19, + 100, 122, 19, 30, 19, 19, 19, 123, 124, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 125, 19, 115, 4, 116, 88, 126, 127, 118, 128, 0, + 129, 31, 4, 130, 7, 7, 7, 7, 25, 131, 7, 7, 7, 7, 7, 132, 7, 7, 7, 7, 7, 7, 7, 7, 7, 91, 14, 91, 7, 7, 7, 7, 7, 91, 7, 7, 7, 7, 91, 14, 91, 7, 14, 7, 7, 7, 7, 7, - 7, 7, 91, 7, 7, 7, 7, 7, 7, 7, 7, 131, 0, 0, 0, 0, + 7, 7, 91, 7, 7, 7, 7, 7, 7, 7, 7, 133, 0, 0, 0, 0, 7, 7, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 17, 0, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 67, 7, 7, - 6, 7, 7, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 90, 87, 0, - 7, 20, 132, 0, 7, 7, 132, 0, 7, 7, 133, 0, 7, 20, 134, 0, - 0, 0, 0, 0, 0, 0, 60, 19, 19, 19, 135, 136, 4, 114, 0, 0, - 0, 137, 4, 114, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, - 7, 7, 7, 7, 7, 138, 7, 7, 7, 7, 7, 7, 7, 7, 139, 0, - 7, 7, 7, 17, 19, 135, 19, 135, 83, 4, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 19, 19, 140, 117, 4, 114, 0, 0, 0, 0, - 7, 7, 141, 135, 0, 0, 0, 0, 0, 0, 142, 61, 19, 19, 19, 71, - 4, 114, 4, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 143, 7, 7, 7, 7, 7, 144, 19, 143, 145, 4, 114, 0, 123, 135, 0, - 146, 7, 7, 7, 64, 147, 4, 52, 7, 7, 7, 7, 50, 19, 135, 0, - 7, 7, 7, 7, 144, 19, 19, 0, 4, 148, 4, 52, 7, 7, 7, 139, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 149, 19, 19, 150, 151, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 19, 19, 19, 19, 61, 0, 0, 60, - 7, 7, 139, 139, 7, 7, 7, 7, 139, 139, 7, 152, 7, 7, 7, 139, - 7, 7, 7, 7, 7, 7, 20, 153, 154, 17, 155, 145, 7, 17, 154, 17, - 0, 156, 0, 157, 158, 159, 0, 160, 161, 0, 162, 0, 163, 164, 28, 165, - 0, 0, 7, 17, 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 140, 0, - 166, 107, 108, 167, 18, 168, 7, 169, 170, 171, 0, 0, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 65, 7, 7, + 6, 7, 7, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 90, 7, 87, + 7, 20, 134, 0, 7, 7, 134, 0, 7, 7, 135, 0, 7, 20, 136, 0, + 0, 0, 0, 0, 0, 0, 59, 19, 19, 19, 137, 138, 4, 116, 0, 0, + 0, 139, 4, 116, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, + 7, 7, 7, 7, 7, 140, 7, 7, 7, 7, 7, 7, 7, 7, 141, 0, + 7, 7, 7, 14, 19, 137, 19, 137, 83, 4, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 19, 19, 142, 119, 4, 116, 0, 0, 0, 0, + 7, 7, 143, 137, 0, 0, 0, 0, 0, 0, 144, 115, 19, 19, 19, 70, + 4, 116, 4, 116, 0, 0, 19, 115, 0, 0, 0, 0, 0, 0, 0, 0, + 145, 7, 7, 7, 7, 7, 146, 19, 145, 147, 4, 116, 0, 125, 137, 0, + 148, 7, 7, 7, 62, 149, 4, 52, 7, 7, 7, 7, 50, 19, 137, 0, + 7, 7, 7, 7, 146, 19, 19, 0, 4, 150, 4, 52, 7, 7, 7, 141, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 151, 19, 19, 152, 153, 119, + 7, 7, 7, 7, 7, 7, 7, 7, 19, 19, 19, 19, 19, 19, 118, 59, + 7, 7, 141, 141, 7, 7, 7, 7, 141, 141, 7, 154, 7, 7, 7, 141, + 7, 7, 7, 7, 7, 7, 20, 155, 156, 17, 157, 147, 7, 17, 156, 17, + 0, 158, 0, 159, 160, 161, 0, 162, 163, 0, 164, 0, 165, 166, 28, 167, + 0, 0, 7, 17, 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 142, 0, + 168, 107, 109, 169, 18, 170, 7, 171, 172, 173, 0, 0, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 172, 7, 7, 7, 7, 7, 7, 75, 0, 0, + 0, 0, 0, 0, 0, 0, 174, 7, 7, 7, 7, 7, 7, 74, 0, 0, 7, 7, 7, 7, 7, 14, 7, 7, 7, 7, 7, 14, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 17, 173, 174, 0, - 7, 7, 7, 7, 25, 129, 7, 7, 7, 7, 7, 7, 7, 165, 0, 73, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 17, 175, 176, 0, + 7, 7, 7, 7, 25, 131, 7, 7, 7, 7, 7, 7, 7, 167, 0, 72, 7, 7, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 19, 19, 19, 19, - 0, 0, 0, 0, 0, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 129, 0, 0, 0, 0, 127, 175, 93, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 176, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 178, - 170, 7, 7, 7, 7, 139, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 14, 0, 0, 7, 7, 7, 9, 0, 0, 0, 0, 0, 0, 177, 177, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177, 177, 177, 177, 177, 179, - 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 0, 0, 0, 0, 0, - 7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 139, - 7, 17, 7, 7, 4, 180, 0, 0, 7, 7, 7, 7, 7, 141, 149, 181, - 7, 7, 7, 73, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 117, 0, - 0, 0, 165, 7, 107, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 182, 145, 0, 7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, - 183, 184, 7, 7, 39, 0, 0, 0, 7, 7, 7, 7, 7, 7, 145, 0, - 27, 7, 7, 7, 7, 7, 144, 19, 121, 0, 4, 114, 19, 19, 27, 185, - 4, 52, 7, 7, 50, 116, 7, 7, 141, 19, 135, 0, 7, 7, 7, 17, - 62, 7, 7, 7, 7, 7, 39, 19, 140, 165, 4, 114, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 64, 61, 0, 184, 186, 4, 114, 0, 0, 0, 187, - 0, 0, 0, 0, 0, 0, 125, 188, 81, 0, 0, 0, 7, 39, 189, 0, - 190, 190, 190, 0, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 39, 191, 4, 114, - 7, 7, 7, 7, 145, 0, 7, 7, 14, 192, 7, 7, 7, 7, 7, 145, - 14, 0, 192, 193, 33, 194, 195, 196, 197, 33, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 75, 0, 0, 0, 192, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 139, 0, 0, 7, 7, 7, 7, 7, 7, - 7, 7, 107, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 7, 145, - 19, 19, 198, 0, 61, 0, 199, 0, 0, 200, 201, 0, 0, 0, 20, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 202, - 203, 3, 0, 204, 6, 7, 7, 8, 6, 7, 7, 9, 205, 177, 177, 177, - 177, 177, 177, 206, 7, 7, 7, 14, 107, 107, 107, 207, 0, 0, 0, 208, - 7, 98, 7, 7, 14, 7, 7, 209, 7, 139, 7, 139, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 167, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 131, 0, 0, 0, 0, 129, 177, 93, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 178, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 180, + 172, 7, 7, 7, 7, 141, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 14, 0, 0, 7, 7, 7, 9, 0, 0, 0, 0, 0, 0, 179, 179, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 179, 179, 179, 179, 179, 181, + 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 0, 0, 0, 0, 0, + 7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 141, + 7, 17, 7, 7, 4, 182, 0, 0, 7, 7, 7, 7, 7, 143, 151, 183, + 7, 7, 7, 184, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 119, 0, + 0, 0, 167, 7, 107, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 185, 7, 7, 7, 141, 74, 0, 0, 0, 0, 0, 0, 0, 167, 7, + 186, 187, 7, 7, 39, 0, 0, 0, 7, 7, 7, 7, 7, 7, 147, 0, + 27, 7, 7, 7, 7, 7, 146, 19, 123, 0, 4, 116, 19, 19, 27, 188, + 4, 52, 7, 7, 50, 118, 7, 7, 143, 19, 137, 0, 7, 7, 7, 17, + 60, 7, 7, 7, 7, 7, 39, 19, 142, 167, 4, 116, 138, 0, 4, 116, + 7, 7, 7, 7, 7, 62, 115, 0, 187, 189, 4, 116, 0, 0, 0, 190, + 0, 0, 0, 0, 0, 0, 127, 191, 81, 0, 0, 0, 7, 39, 192, 0, + 193, 193, 193, 0, 14, 14, 7, 7, 7, 7, 7, 132, 194, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 39, 195, 4, 116, + 7, 7, 7, 7, 147, 0, 7, 7, 14, 196, 7, 7, 7, 7, 7, 147, + 14, 0, 196, 197, 33, 198, 199, 200, 201, 33, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 74, 0, 0, 0, 196, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 141, 0, 0, 7, 7, 7, 7, 7, 7, + 7, 7, 107, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 7, 147, + 19, 19, 202, 0, 19, 118, 203, 0, 0, 204, 205, 0, 0, 0, 20, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 206, + 207, 3, 0, 208, 6, 7, 7, 8, 6, 7, 7, 9, 209, 179, 179, 179, + 179, 179, 179, 210, 7, 7, 7, 14, 107, 107, 107, 211, 0, 0, 0, 212, + 7, 102, 7, 7, 14, 7, 7, 213, 7, 141, 7, 141, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 17, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, - 7, 7, 7, 17, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, - 7, 7, 7, 14, 0, 0, 7, 7, 7, 9, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 139, 7, 7, 7, 7, 145, 7, 167, 0, 0, 0, 0, 0, - 7, 7, 7, 139, 4, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 139, 59, 7, 7, 7, 7, 25, 210, 7, 7, 139, 0, 0, 0, 0, 0, - 7, 7, 139, 0, 7, 7, 7, 75, 0, 0, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 172, 0, 0, 0, 0, 0, 0, 0, 0, - 211, 60, 98, 6, 7, 7, 145, 79, 0, 0, 0, 0, 7, 7, 7, 17, - 7, 7, 7, 7, 7, 7, 139, 0, 7, 7, 139, 0, 7, 7, 9, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 138, + 7, 7, 7, 17, 7, 7, 7, 7, 7, 7, 87, 0, 142, 0, 0, 0, + 7, 7, 7, 7, 0, 0, 7, 7, 7, 9, 7, 7, 7, 7, 50, 114, + 7, 7, 7, 141, 7, 7, 7, 7, 147, 7, 169, 0, 0, 0, 0, 0, + 7, 7, 7, 141, 4, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 147, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 14, 0, 7, 7, 141, 0, 7, 0, 0, 0, + 141, 67, 7, 7, 7, 7, 25, 214, 7, 7, 141, 0, 7, 7, 14, 0, + 7, 7, 7, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 141, 0, 7, 7, 7, 74, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 174, 0, 0, 0, 0, 0, 0, 0, 0, + 215, 59, 102, 6, 7, 7, 147, 79, 0, 0, 0, 0, 7, 7, 7, 17, + 7, 7, 7, 17, 0, 0, 0, 0, 7, 6, 7, 7, 216, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 141, 0, 7, 7, 141, 0, 7, 7, 9, 0, + 7, 7, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, 0, - 146, 7, 7, 7, 7, 7, 7, 19, 61, 0, 0, 0, 83, 4, 0, 0, - 146, 7, 7, 7, 7, 7, 19, 212, 0, 0, 7, 7, 7, 87, 4, 114, - 146, 7, 7, 7, 141, 19, 213, 4, 0, 0, 0, 0, 0, 0, 0, 0, - 146, 7, 7, 7, 7, 7, 39, 19, 214, 0, 4, 114, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 39, 19, 0, 4, 114, 0, 0, 0, 0, 0, 0, + 148, 7, 7, 7, 7, 7, 7, 19, 115, 0, 0, 0, 83, 4, 0, 72, + 148, 7, 7, 7, 7, 7, 19, 217, 0, 0, 7, 7, 7, 87, 4, 116, + 148, 7, 7, 7, 143, 19, 218, 4, 0, 0, 7, 7, 7, 7, 219, 0, + 148, 7, 7, 7, 7, 7, 39, 19, 220, 0, 4, 221, 0, 0, 0, 0, + 7, 7, 24, 7, 7, 146, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 143, 19, 114, 4, 116, + 75, 65, 66, 7, 7, 67, 85, 69, 70, 80, 72, 172, 222, 123, 123, 0, + 7, 7, 7, 7, 7, 7, 19, 19, 223, 0, 4, 116, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 143, 118, 19, 142, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 19, 19, 224, 0, 4, 116, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 39, 19, 0, 4, 116, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 4, 116, 0, 167, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 87, + 7, 7, 7, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 0, 0, 0, 7, 7, 7, 7, 7, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 17, 0, 64, 19, 19, 19, 19, 61, - 0, 73, 146, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 215, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 142, 216, 217, 218, - 219, 135, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 221, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 87, 7, 7, 7, 14, 4, 116, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 141, 123, 0, + 7, 7, 7, 7, 7, 7, 115, 0, 147, 0, 4, 116, 196, 7, 7, 172, + 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 17, 0, 62, 19, 19, 19, 19, 115, + 0, 72, 148, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 225, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 7, 17, + 7, 87, 7, 226, 227, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144, 228, 229, 230, + 231, 137, 0, 0, 0, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 233, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 20, 7, 7, 7, 7, 7, - 7, 7, 7, 20, 222, 223, 7, 224, 98, 7, 7, 7, 7, 7, 7, 7, - 25, 225, 20, 20, 7, 7, 7, 226, 153, 107, 59, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 139, 7, 7, 7, 59, 7, 7, 130, 7, 7, 7, 130, + 7, 7, 7, 20, 234, 235, 7, 236, 102, 7, 7, 7, 7, 7, 7, 7, + 25, 237, 20, 20, 7, 7, 7, 238, 155, 107, 67, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 141, 7, 7, 7, 67, 7, 7, 132, 7, 7, 7, 132, 7, 7, 20, 7, 7, 7, 20, 7, 7, 14, 7, 7, 7, 14, 7, 7, - 7, 59, 7, 7, 7, 59, 7, 7, 130, 227, 4, 4, 4, 4, 4, 4, - 98, 7, 7, 7, 228, 6, 130, 229, 166, 230, 228, 152, 228, 130, 130, 82, - 7, 24, 7, 145, 231, 24, 7, 145, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232, 233, 233, 233, - 234, 0, 0, 0, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, + 7, 67, 7, 7, 7, 67, 7, 7, 132, 239, 4, 4, 4, 4, 4, 4, + 7, 7, 7, 7, 7, 7, 7, 7, 17, 0, 115, 0, 0, 0, 0, 0, + 102, 7, 7, 7, 240, 6, 132, 241, 168, 242, 240, 154, 240, 132, 132, 82, + 7, 24, 7, 147, 243, 24, 7, 147, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7, 7, 7, 74, 7, 7, 7, 74, 7, 7, + 7, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 244, 245, 245, 245, + 246, 0, 0, 0, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, }; @@ -4060,32 +4384,33 @@ static RE_UINT8 re_word_break_stage_4[] = { 10, 11, 10, 0, 0, 12, 13, 14, 0, 15, 13, 0, 9, 10, 16, 17, 16, 18, 9, 19, 0, 20, 21, 21, 9, 22, 17, 23, 0, 24, 10, 22, 25, 9, 9, 25, 26, 21, 27, 9, 28, 0, 29, 0, 30, 21, 21, 31, - 32, 31, 33, 33, 34, 0, 35, 36, 37, 38, 0, 39, 40, 38, 41, 21, - 42, 43, 44, 9, 9, 45, 21, 46, 21, 47, 48, 27, 49, 50, 0, 51, - 52, 9, 40, 8, 9, 53, 54, 0, 49, 9, 21, 16, 55, 0, 56, 21, - 21, 57, 57, 58, 57, 0, 22, 9, 0, 21, 21, 40, 21, 9, 53, 59, - 57, 21, 53, 60, 30, 8, 9, 50, 50, 9, 20, 17, 16, 59, 21, 61, - 61, 62, 0, 63, 0, 25, 16, 0, 10, 64, 22, 65, 16, 48, 40, 63, - 61, 58, 66, 0, 8, 20, 0, 60, 27, 67, 22, 8, 31, 58, 19, 0, - 0, 68, 69, 8, 10, 17, 22, 16, 65, 22, 64, 19, 16, 68, 40, 68, - 48, 58, 19, 63, 9, 8, 16, 45, 21, 48, 0, 32, 68, 8, 0, 13, - 65, 0, 10, 45, 48, 62, 17, 9, 9, 28, 70, 63, 21, 71, 68, 0, - 66, 21, 40, 0, 72, 0, 31, 73, 21, 58, 58, 0, 0, 74, 66, 68, - 9, 57, 21, 73, 0, 70, 63, 21, 58, 68, 48, 61, 30, 73, 68, 21, - 75, 58, 0, 28, 10, 9, 10, 30, 53, 73, 53, 0, 76, 0, 21, 0, - 0, 66, 63, 77, 78, 0, 9, 16, 73, 0, 9, 41, 0, 30, 21, 44, - 9, 21, 9, 0, 79, 9, 21, 27, 72, 8, 40, 21, 44, 52, 53, 80, - 81, 81, 9, 20, 17, 22, 9, 17, 0, 82, 83, 0, 0, 84, 85, 86, - 0, 11, 87, 88, 0, 87, 37, 89, 37, 37, 0, 64, 13, 64, 8, 16, - 22, 25, 16, 9, 0, 8, 16, 13, 0, 17, 64, 41, 27, 0, 90, 91, - 92, 93, 94, 94, 95, 94, 94, 95, 49, 0, 21, 96, 50, 10, 97, 97, - 41, 9, 64, 0, 9, 58, 63, 0, 73, 68, 17, 98, 8, 10, 40, 58, - 64, 9, 0, 99, 100, 33, 33, 34, 33, 101, 102, 100, 103, 88, 11, 87, - 0, 104, 5, 105, 9, 106, 0, 107, 108, 0, 0, 109, 94, 110, 17, 19, - 111, 0, 10, 25, 19, 50, 57, 32, 40, 14, 21, 112, 44, 19, 93, 0, - 58, 30, 113, 37, 114, 21, 40, 30, 68, 58, 68, 73, 13, 65, 8, 22, - 25, 8, 10, 8, 25, 10, 9, 60, 65, 50, 81, 0, 81, 8, 8, 8, - 0, 115, 116, 116, 14, 0, + 32, 31, 33, 33, 34, 0, 35, 36, 37, 38, 0, 39, 40, 41, 42, 21, + 43, 44, 45, 9, 9, 46, 21, 47, 21, 48, 49, 27, 50, 51, 0, 52, + 53, 9, 40, 8, 9, 54, 55, 0, 50, 9, 21, 16, 56, 0, 57, 21, + 21, 58, 58, 59, 58, 0, 0, 21, 21, 9, 54, 60, 58, 21, 54, 61, + 58, 8, 9, 51, 51, 9, 22, 9, 20, 17, 16, 60, 21, 62, 62, 63, + 0, 64, 0, 25, 16, 0, 30, 8, 10, 65, 22, 66, 16, 49, 40, 64, + 62, 59, 67, 0, 8, 20, 0, 61, 27, 68, 22, 8, 31, 59, 19, 0, + 0, 69, 70, 8, 10, 17, 22, 16, 66, 22, 65, 19, 16, 69, 40, 69, + 49, 59, 19, 64, 21, 8, 16, 46, 21, 49, 0, 32, 9, 8, 0, 13, + 66, 0, 10, 46, 49, 63, 17, 9, 69, 8, 9, 28, 71, 64, 21, 72, + 69, 0, 67, 21, 40, 0, 21, 40, 73, 0, 31, 74, 21, 59, 59, 0, + 0, 75, 67, 69, 9, 58, 21, 74, 0, 71, 64, 21, 59, 69, 49, 62, + 30, 74, 69, 21, 76, 59, 0, 28, 10, 9, 10, 30, 54, 74, 54, 0, + 77, 0, 21, 0, 0, 67, 64, 78, 79, 0, 9, 16, 74, 0, 9, 42, + 0, 30, 21, 45, 9, 21, 9, 0, 80, 9, 21, 27, 73, 8, 40, 21, + 45, 53, 54, 81, 82, 82, 9, 20, 17, 22, 9, 17, 0, 83, 84, 0, + 0, 85, 86, 87, 0, 11, 88, 89, 0, 88, 37, 90, 37, 37, 0, 65, + 13, 65, 8, 16, 22, 25, 16, 9, 0, 8, 16, 13, 0, 17, 65, 42, + 27, 0, 91, 92, 93, 94, 95, 95, 96, 95, 95, 96, 50, 0, 21, 97, + 9, 26, 51, 10, 98, 98, 42, 9, 65, 0, 9, 59, 64, 59, 74, 69, + 17, 99, 8, 10, 0, 16, 40, 59, 65, 9, 0, 100, 101, 33, 33, 34, + 33, 102, 103, 101, 104, 89, 11, 88, 0, 105, 5, 106, 9, 107, 0, 108, + 109, 0, 0, 110, 95, 111, 17, 19, 112, 0, 10, 25, 19, 51, 58, 32, + 9, 99, 40, 14, 21, 113, 42, 13, 45, 19, 114, 0, 54, 69, 21, 25, + 74, 19, 94, 0, 16, 32, 37, 0, 59, 30, 115, 37, 116, 21, 40, 30, + 69, 59, 69, 74, 13, 66, 8, 22, 25, 8, 10, 8, 25, 10, 9, 61, + 66, 51, 82, 0, 82, 8, 8, 8, 0, 117, 118, 118, 14, 0, }; static RE_UINT8 re_word_break_stage_5[] = { @@ -4094,34 +4419,34 @@ static RE_UINT8 re_word_break_stage_5[] = { 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0, 0, 0, 16, 0, 6, 0, 0, 0, 0, 11, 0, 0, 9, 0, 0, 0, 11, 0, 12, 11, 11, 0, 0, 0, 0, 11, 11, 0, 0, 0, 12, 11, 0, 0, 0, - 11, 0, 11, 0, 7, 7, 7, 7, 11, 0, 11, 11, 11, 11, 13, 0, + 11, 0, 11, 0, 7, 7, 7, 7, 11, 0, 11, 11, 11, 11, 13, 11, 0, 0, 11, 12, 11, 11, 0, 11, 11, 11, 0, 7, 7, 7, 11, 11, 0, 11, 0, 0, 0, 13, 0, 0, 0, 7, 7, 7, 7, 7, 0, 7, 0, 7, 7, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 11, - 12, 0, 0, 0, 9, 9, 9, 9, 9, 0, 0, 0, 13, 13, 0, 0, - 7, 7, 7, 0, 11, 11, 11, 7, 15, 15, 0, 15, 13, 0, 11, 11, - 7, 11, 11, 11, 0, 11, 7, 7, 7, 9, 0, 7, 7, 11, 11, 7, - 7, 0, 7, 7, 15, 15, 11, 11, 11, 0, 0, 11, 0, 0, 0, 9, - 11, 7, 11, 11, 11, 11, 7, 7, 7, 11, 0, 0, 13, 0, 11, 0, - 7, 7, 11, 7, 11, 7, 7, 7, 7, 7, 0, 0, 7, 11, 7, 7, - 0, 0, 15, 15, 7, 0, 0, 7, 7, 7, 11, 0, 0, 0, 0, 7, - 0, 0, 0, 11, 0, 11, 11, 0, 0, 7, 0, 0, 11, 7, 0, 0, - 0, 0, 7, 7, 0, 0, 7, 11, 0, 0, 7, 0, 7, 0, 7, 0, - 15, 15, 0, 0, 7, 0, 0, 0, 0, 7, 0, 7, 15, 15, 7, 7, - 11, 0, 7, 7, 7, 7, 9, 0, 11, 7, 11, 0, 7, 7, 7, 11, - 7, 11, 11, 0, 0, 11, 0, 11, 7, 7, 9, 9, 14, 14, 0, 0, - 14, 0, 0, 12, 6, 6, 9, 9, 9, 9, 9, 0, 16, 0, 0, 0, - 13, 0, 0, 0, 9, 0, 9, 9, 0, 10, 10, 10, 10, 10, 0, 0, - 0, 7, 7, 10, 10, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 0, - 7, 7, 0, 11, 11, 11, 7, 11, 11, 7, 7, 0, 0, 3, 7, 3, - 3, 0, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 13, 0, 0, 12, - 0, 16, 16, 16, 13, 12, 0, 0, 11, 0, 0, 9, 0, 0, 0, 14, - 0, 0, 12, 13, 0, 0, 10, 10, 10, 10, 7, 7, 0, 9, 9, 9, - 7, 0, 15, 15, 7, 7, 7, 9, 9, 9, 9, 7, 0, 0, 8, 8, - 8, 8, 8, 8, + 12, 0, 0, 0, 9, 9, 9, 9, 9, 9, 0, 0, 13, 13, 0, 0, + 7, 7, 7, 0, 9, 0, 0, 0, 11, 11, 11, 7, 15, 15, 0, 15, + 13, 0, 11, 11, 7, 11, 11, 11, 0, 11, 7, 7, 7, 9, 0, 7, + 7, 11, 11, 7, 7, 0, 7, 7, 15, 15, 11, 11, 11, 0, 0, 11, + 0, 0, 0, 9, 11, 7, 11, 11, 11, 11, 7, 7, 7, 11, 0, 0, + 13, 0, 11, 0, 7, 7, 11, 7, 11, 7, 7, 7, 7, 7, 0, 0, + 7, 11, 7, 7, 0, 0, 15, 15, 7, 0, 0, 7, 7, 7, 11, 0, + 0, 0, 0, 7, 0, 0, 0, 11, 0, 11, 11, 0, 0, 7, 0, 0, + 11, 7, 0, 0, 0, 0, 7, 7, 0, 0, 7, 11, 0, 0, 7, 0, + 7, 0, 7, 0, 15, 15, 0, 0, 7, 0, 0, 0, 0, 7, 0, 7, + 15, 15, 7, 7, 11, 0, 7, 7, 7, 7, 9, 0, 11, 7, 11, 0, + 7, 7, 7, 11, 7, 11, 11, 0, 0, 11, 0, 11, 7, 7, 9, 9, + 14, 14, 0, 0, 14, 0, 0, 12, 6, 6, 9, 9, 9, 9, 9, 0, + 16, 0, 0, 0, 13, 0, 0, 0, 9, 0, 9, 9, 0, 10, 10, 10, + 10, 10, 0, 0, 0, 7, 7, 10, 10, 0, 0, 0, 10, 10, 10, 10, + 10, 10, 10, 0, 7, 7, 0, 11, 11, 11, 7, 11, 11, 7, 7, 0, + 0, 3, 7, 3, 3, 0, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, + 13, 0, 0, 12, 0, 16, 16, 16, 13, 12, 0, 0, 11, 0, 0, 9, + 0, 0, 0, 14, 0, 0, 12, 13, 0, 0, 10, 10, 10, 10, 7, 7, + 0, 9, 9, 9, 7, 0, 15, 15, 15, 15, 11, 0, 7, 7, 7, 9, + 9, 9, 9, 7, 0, 0, 8, 8, 8, 8, 8, 8, }; -/* Word_Break: 3946 bytes. */ +/* Word_Break: 4298 bytes. */ RE_UINT32 re_get_word_break(RE_UINT32 ch) { RE_UINT32 code; @@ -4149,14 +4474,14 @@ RE_UINT32 re_get_word_break(RE_UINT32 ch) { /* Grapheme_Cluster_Break. */ static RE_UINT8 re_grapheme_cluster_break_stage_1[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 6, 2, 2, 7, 2, 2, 8, 9, + 0, 1, 2, 2, 2, 3, 4, 5, 6, 2, 2, 7, 2, 8, 9, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; @@ -4173,15 +4498,17 @@ static RE_UINT8 re_grapheme_cluster_break_stage_2[] = { 35, 36, 37, 38, 39, 40, 34, 41, 42, 42, 42, 42, 42, 42, 42, 42, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43, 1, 1, 44, 45, - 1, 46, 1, 1, 1, 1, 1, 1, 1, 1, 47, 1, 1, 1, 1, 1, - 48, 49, 1, 1, 1, 1, 50, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 51, + 1, 46, 47, 48, 1, 1, 1, 1, 1, 1, 49, 1, 1, 1, 1, 1, + 50, 51, 52, 53, 54, 55, 56, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 57, 58, 1, 1, 1, 59, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 52, 53, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 54, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 42, 55, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 1, 61, 62, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 63, 1, 1, 1, 1, 1, 1, 1, + 1, 64, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 42, 65, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; @@ -4193,124 +4520,138 @@ static RE_UINT8 re_grapheme_cluster_break_stage_3[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 5, 8, 9, 2, 2, 2, 10, 11, 2, 2, 12, 5, 2, 13, 2, 2, 2, 2, 2, 14, 15, 2, 3, 16, 2, 5, 17, 2, 2, 2, 2, 2, 18, 13, 2, 2, 12, 19, - 2, 20, 21, 2, 2, 22, 2, 2, 2, 2, 2, 2, 2, 2, 23, 24, - 25, 2, 2, 26, 27, 28, 29, 2, 30, 2, 2, 31, 32, 33, 29, 2, - 34, 2, 2, 35, 36, 16, 2, 37, 34, 2, 2, 35, 38, 2, 29, 2, - 30, 2, 2, 39, 32, 40, 29, 2, 41, 2, 2, 42, 43, 33, 2, 2, - 44, 2, 2, 45, 46, 47, 29, 2, 48, 2, 2, 49, 50, 47, 29, 2, - 48, 2, 2, 42, 51, 33, 29, 2, 48, 2, 2, 2, 52, 53, 2, 48, - 2, 2, 2, 54, 55, 2, 2, 2, 2, 2, 2, 56, 57, 2, 2, 2, - 2, 58, 2, 59, 2, 2, 2, 60, 61, 62, 5, 63, 64, 2, 2, 2, - 2, 2, 65, 66, 2, 67, 13, 68, 69, 70, 2, 2, 2, 2, 2, 2, - 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 73, 74, 74, 74, 74, 74, - 2, 2, 2, 2, 2, 65, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 75, 2, 75, 2, 29, 2, 29, 2, 2, 2, 76, 77, 78, 2, 2, - 79, 2, 2, 2, 2, 2, 2, 2, 2, 2, 80, 2, 2, 2, 2, 2, - 2, 2, 81, 82, 2, 2, 2, 2, 2, 2, 2, 83, 2, 2, 2, 2, - 2, 84, 2, 2, 2, 85, 86, 87, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 20, 21, 2, 2, 22, 2, 2, 2, 2, 2, 2, 2, 2, 23, 5, + 24, 2, 2, 25, 26, 27, 28, 2, 29, 2, 2, 30, 31, 32, 28, 2, + 33, 2, 2, 34, 35, 16, 2, 36, 33, 2, 2, 34, 37, 2, 28, 2, + 29, 2, 2, 38, 31, 39, 28, 2, 40, 2, 2, 41, 42, 32, 2, 2, + 43, 2, 2, 44, 45, 46, 28, 2, 29, 2, 2, 47, 48, 46, 28, 2, + 29, 2, 2, 41, 49, 32, 28, 2, 50, 2, 2, 2, 51, 52, 2, 50, + 2, 2, 2, 53, 54, 2, 2, 2, 2, 2, 2, 55, 56, 2, 2, 2, + 2, 57, 2, 58, 2, 2, 2, 59, 60, 61, 5, 62, 63, 2, 2, 2, + 2, 2, 64, 65, 2, 66, 13, 67, 68, 69, 2, 2, 2, 2, 2, 2, + 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 72, 73, 73, 73, 73, 73, + 2, 2, 2, 2, 2, 64, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 74, 2, 74, 2, 28, 2, 28, 2, 2, 2, 75, 76, 77, 2, 2, + 78, 2, 2, 2, 2, 2, 2, 2, 2, 2, 79, 2, 2, 2, 2, 2, + 2, 2, 80, 81, 2, 2, 2, 2, 2, 2, 2, 82, 2, 2, 2, 2, + 2, 83, 2, 2, 2, 84, 85, 86, 2, 2, 2, 87, 2, 2, 2, 2, 88, 2, 2, 89, 90, 2, 12, 19, 91, 2, 92, 2, 2, 2, 93, 94, 2, 2, 95, 96, 2, 2, 2, 2, 2, 2, 2, 2, 2, 97, 98, 99, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 100, 101, - 102, 2, 103, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 5, 5, 13, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 104, 105, - 2, 2, 2, 2, 2, 2, 2, 104, 2, 2, 2, 2, 2, 2, 5, 5, - 2, 2, 106, 2, 2, 2, 2, 2, 2, 107, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 104, 108, 2, 104, 2, 2, 2, 2, 2, 105, - 109, 2, 110, 2, 2, 2, 2, 2, 111, 2, 2, 112, 113, 2, 5, 105, - 2, 2, 114, 2, 115, 94, 71, 116, 25, 2, 2, 117, 118, 2, 2, 2, - 2, 2, 119, 120, 121, 2, 2, 2, 2, 2, 2, 122, 16, 2, 123, 124, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 125, 2, - 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, - 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, - 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, - 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, - 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, - 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, - 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, - 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 131, 72, 132, 74, 74, 133, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 100, + 101, 2, 102, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 5, 5, 13, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 103, 104, + 2, 2, 2, 2, 2, 2, 2, 103, 2, 2, 2, 2, 2, 2, 5, 5, + 2, 2, 105, 2, 2, 2, 2, 2, 2, 106, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 103, 107, 2, 103, 2, 2, 2, 2, 2, 104, + 108, 2, 109, 2, 2, 2, 2, 2, 110, 2, 2, 111, 112, 2, 5, 104, + 2, 2, 113, 2, 114, 94, 70, 115, 24, 2, 2, 116, 117, 2, 118, 2, + 2, 2, 119, 120, 121, 2, 2, 122, 2, 2, 2, 123, 16, 2, 124, 125, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 126, 2, + 127, 128, 129, 130, 129, 131, 129, 127, 128, 129, 130, 129, 131, 129, 127, 128, + 129, 130, 129, 131, 129, 127, 128, 129, 130, 129, 131, 129, 127, 128, 129, 130, + 129, 131, 129, 127, 128, 129, 130, 129, 131, 129, 127, 128, 129, 130, 129, 131, + 129, 127, 128, 129, 130, 129, 131, 129, 127, 128, 129, 130, 129, 131, 129, 127, + 128, 129, 130, 129, 131, 129, 127, 128, 129, 130, 129, 131, 129, 127, 128, 129, + 130, 129, 131, 129, 127, 128, 129, 130, 129, 131, 129, 127, 128, 129, 130, 129, + 131, 129, 127, 128, 129, 130, 129, 131, 129, 127, 128, 129, 130, 129, 131, 129, + 129, 130, 129, 131, 129, 127, 128, 129, 130, 129, 132, 71, 133, 73, 73, 134, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 134, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 5, 2, 100, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 45, 2, 2, 2, 2, 2, 135, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 70, - 136, 2, 2, 137, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 138, 2, 2, 139, 100, 2, 2, 2, 91, 2, 2, 140, 2, 2, 2, 2, - 141, 2, 142, 143, 2, 2, 2, 2, 91, 2, 2, 144, 118, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 145, 146, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 147, 148, 149, 104, 141, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 150, 151, 152, 2, 153, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 75, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 154, 155, + 2, 135, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 5, 2, 136, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 44, 2, 2, 2, 2, 2, 137, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 69, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 13, 2, + 2, 2, 2, 2, 2, 2, 2, 138, 2, 2, 2, 2, 2, 2, 2, 2, + 139, 2, 2, 140, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46, 2, + 141, 2, 2, 142, 143, 2, 2, 103, 91, 2, 2, 144, 2, 2, 2, 2, + 145, 2, 146, 147, 2, 2, 2, 148, 91, 2, 2, 149, 117, 2, 2, 2, + 2, 2, 150, 151, 2, 2, 2, 2, 2, 2, 2, 2, 2, 103, 152, 2, + 29, 2, 2, 30, 153, 32, 154, 147, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 155, 156, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 103, 157, 13, 2, 2, 2, + 2, 2, 2, 158, 13, 2, 2, 2, 2, 2, 159, 160, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 147, + 2, 2, 2, 143, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 161, 162, 163, 103, 145, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 164, 165, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 166, 167, 168, 2, 169, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 74, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 143, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 170, 171, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, }; static RE_UINT8 re_grapheme_cluster_break_stage_4[] = { 0, 0, 1, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 5, 6, 6, 6, 6, 7, 6, 8, 3, 9, 6, 6, 6, - 6, 6, 6, 10, 11, 10, 3, 3, 0, 12, 3, 3, 6, 6, 13, 12, - 3, 3, 7, 6, 14, 3, 3, 3, 3, 15, 6, 16, 6, 17, 18, 8, - 19, 3, 3, 3, 6, 6, 13, 3, 3, 15, 6, 6, 6, 3, 3, 3, - 3, 15, 10, 6, 6, 9, 9, 8, 3, 3, 9, 3, 3, 6, 6, 6, - 6, 6, 6, 13, 20, 3, 3, 3, 3, 3, 21, 22, 23, 6, 24, 25, - 9, 6, 3, 3, 15, 3, 3, 3, 26, 3, 3, 3, 3, 3, 3, 27, - 23, 28, 29, 30, 3, 7, 3, 3, 31, 3, 3, 3, 3, 3, 3, 22, - 32, 7, 17, 8, 8, 19, 3, 3, 23, 10, 33, 30, 3, 3, 3, 18, - 3, 15, 3, 3, 34, 3, 3, 3, 3, 3, 3, 21, 35, 36, 37, 30, - 38, 3, 3, 3, 3, 3, 3, 15, 24, 39, 18, 8, 3, 11, 3, 3, - 36, 3, 3, 3, 3, 3, 3, 40, 41, 42, 37, 8, 23, 22, 37, 30, - 3, 3, 34, 7, 43, 44, 45, 46, 47, 6, 13, 3, 3, 7, 6, 13, - 47, 6, 10, 14, 3, 3, 6, 8, 3, 3, 8, 3, 3, 48, 19, 36, - 9, 6, 6, 20, 6, 18, 3, 9, 6, 6, 9, 6, 6, 6, 6, 14, - 3, 34, 3, 3, 3, 3, 3, 9, 49, 6, 31, 32, 3, 36, 8, 15, - 9, 14, 3, 3, 34, 32, 3, 19, 3, 3, 3, 19, 50, 50, 50, 50, - 51, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 52, 15, 14, 3, 3, - 3, 53, 6, 54, 45, 41, 23, 6, 6, 3, 3, 19, 3, 3, 7, 55, - 3, 3, 19, 3, 20, 46, 24, 3, 41, 45, 23, 3, 3, 38, 56, 3, - 3, 7, 57, 3, 3, 58, 6, 13, 44, 9, 6, 24, 46, 6, 6, 17, - 6, 59, 3, 3, 3, 49, 20, 24, 41, 59, 3, 3, 60, 3, 3, 3, - 61, 54, 53, 62, 3, 21, 54, 63, 54, 3, 3, 3, 3, 45, 45, 6, - 6, 43, 3, 3, 13, 6, 6, 6, 49, 6, 14, 19, 36, 14, 3, 3, - 6, 13, 3, 3, 3, 3, 3, 6, 3, 3, 4, 64, 3, 3, 0, 65, - 3, 3, 3, 7, 8, 3, 3, 3, 3, 3, 15, 6, 3, 3, 11, 3, - 13, 6, 6, 8, 34, 34, 7, 3, 66, 67, 3, 3, 62, 3, 3, 3, - 3, 45, 45, 45, 45, 14, 3, 3, 3, 15, 6, 8, 3, 7, 6, 6, - 50, 50, 50, 68, 7, 43, 54, 24, 59, 3, 3, 3, 3, 3, 9, 20, - 67, 32, 3, 3, 7, 3, 3, 69, 18, 17, 14, 15, 3, 3, 66, 54, - 3, 70, 3, 3, 66, 25, 35, 30, 71, 72, 72, 72, 72, 72, 72, 71, - 72, 72, 72, 72, 72, 72, 71, 72, 72, 71, 72, 72, 72, 3, 3, 3, - 51, 73, 74, 52, 52, 52, 52, 3, 3, 3, 3, 34, 0, 0, 0, 3, - 9, 11, 3, 6, 3, 3, 13, 7, 75, 3, 3, 3, 3, 3, 6, 6, - 46, 20, 32, 5, 13, 3, 3, 3, 3, 7, 6, 23, 6, 14, 3, 3, - 66, 43, 6, 20, 3, 3, 7, 25, 6, 53, 3, 3, 38, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 76, 3, 77, 8, 61, 78, 0, 79, 6, - 13, 9, 6, 3, 3, 3, 15, 8, 3, 80, 81, 81, 81, 81, 81, 81, + 6, 6, 6, 10, 11, 10, 3, 3, 0, 12, 3, 3, 6, 6, 13, 14, + 3, 3, 7, 6, 15, 3, 3, 3, 3, 16, 6, 17, 6, 18, 19, 8, + 20, 3, 3, 3, 6, 6, 13, 3, 3, 16, 6, 6, 6, 3, 3, 3, + 3, 16, 10, 6, 6, 9, 9, 8, 3, 3, 9, 3, 3, 6, 6, 6, + 21, 3, 3, 3, 3, 3, 22, 23, 24, 6, 25, 26, 9, 6, 3, 3, + 16, 3, 3, 3, 27, 3, 3, 3, 3, 3, 3, 28, 24, 29, 30, 31, + 3, 7, 3, 3, 32, 3, 3, 3, 3, 3, 3, 23, 33, 7, 18, 8, + 8, 20, 3, 3, 24, 10, 34, 31, 3, 3, 3, 19, 3, 16, 3, 3, + 35, 3, 3, 3, 3, 3, 3, 22, 36, 37, 38, 31, 25, 3, 3, 3, + 3, 3, 3, 16, 25, 39, 19, 8, 3, 11, 3, 3, 3, 3, 3, 40, + 41, 42, 38, 8, 24, 23, 38, 31, 37, 3, 3, 3, 3, 3, 35, 7, + 43, 44, 45, 46, 47, 6, 13, 3, 3, 7, 6, 13, 47, 6, 10, 15, + 3, 3, 6, 8, 3, 3, 8, 3, 3, 48, 20, 37, 9, 6, 6, 21, + 6, 19, 3, 9, 6, 6, 9, 6, 6, 6, 6, 15, 3, 35, 3, 3, + 3, 3, 3, 9, 49, 6, 32, 33, 3, 37, 8, 16, 9, 15, 3, 3, + 35, 33, 3, 20, 3, 3, 3, 20, 50, 50, 50, 50, 51, 51, 51, 51, + 51, 51, 52, 52, 52, 52, 52, 52, 16, 15, 3, 3, 3, 53, 6, 54, + 45, 41, 24, 6, 6, 3, 3, 20, 3, 3, 7, 55, 3, 3, 20, 3, + 21, 46, 25, 3, 41, 45, 24, 3, 3, 56, 57, 3, 3, 7, 58, 3, + 3, 59, 6, 13, 44, 9, 6, 25, 46, 6, 6, 18, 6, 6, 6, 13, + 6, 60, 3, 3, 3, 49, 21, 25, 41, 60, 3, 3, 61, 3, 3, 3, + 62, 54, 53, 8, 3, 22, 54, 63, 54, 3, 3, 3, 3, 45, 45, 6, + 6, 43, 3, 3, 13, 6, 6, 6, 49, 6, 15, 20, 37, 15, 8, 3, + 6, 8, 3, 6, 3, 3, 4, 64, 3, 3, 0, 65, 3, 3, 3, 7, + 8, 3, 3, 3, 3, 3, 16, 6, 3, 3, 11, 3, 13, 6, 6, 8, + 35, 35, 7, 3, 66, 67, 3, 3, 68, 3, 3, 3, 3, 45, 45, 45, + 45, 15, 3, 3, 3, 16, 6, 8, 3, 7, 6, 6, 50, 50, 50, 69, + 7, 43, 54, 25, 60, 3, 3, 3, 3, 20, 3, 3, 3, 3, 9, 21, + 67, 33, 3, 3, 7, 3, 3, 70, 3, 3, 3, 15, 19, 18, 15, 16, + 3, 3, 66, 54, 3, 71, 3, 3, 66, 26, 36, 31, 72, 73, 73, 73, + 73, 73, 73, 72, 73, 73, 73, 73, 73, 73, 72, 73, 73, 72, 73, 73, + 73, 3, 3, 3, 51, 74, 75, 52, 52, 52, 52, 3, 3, 3, 3, 35, + 6, 6, 6, 8, 0, 0, 0, 3, 3, 16, 13, 3, 9, 11, 3, 6, + 3, 3, 13, 7, 76, 3, 3, 3, 3, 3, 6, 6, 6, 13, 3, 3, + 46, 21, 33, 5, 13, 3, 3, 3, 3, 7, 6, 24, 6, 15, 3, 3, + 7, 3, 3, 3, 66, 43, 6, 21, 3, 3, 3, 46, 54, 49, 3, 3, + 46, 6, 13, 3, 25, 30, 30, 68, 37, 16, 6, 15, 58, 6, 77, 63, + 49, 3, 3, 3, 43, 8, 45, 53, 46, 6, 21, 63, 3, 3, 7, 26, + 6, 53, 3, 3, 56, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 78, + 3, 3, 3, 11, 0, 3, 3, 3, 3, 79, 8, 62, 80, 0, 81, 6, + 13, 9, 6, 3, 3, 3, 16, 8, 3, 82, 83, 83, 83, 83, 83, 83, }; static RE_UINT8 re_grapheme_cluster_break_stage_5[] = { 3, 3, 3, 3, 3, 3, 2, 3, 3, 1, 3, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 4, 4, 4, 4, 0, 0, 0, 4, 4, 4, 0, 0, 0, 4, 4, 4, 4, 4, 0, 4, 0, 4, 4, 0, - 3, 0, 0, 0, 4, 4, 4, 0, 4, 0, 0, 0, 0, 0, 4, 4, - 4, 3, 0, 4, 4, 0, 0, 4, 4, 0, 4, 4, 0, 4, 0, 0, - 4, 4, 4, 6, 0, 0, 4, 6, 4, 0, 6, 6, 6, 4, 4, 4, - 4, 6, 6, 6, 6, 4, 6, 6, 0, 4, 6, 6, 4, 0, 4, 6, - 4, 0, 0, 6, 6, 0, 0, 6, 6, 4, 0, 0, 0, 4, 4, 6, - 6, 4, 4, 0, 4, 6, 0, 6, 0, 0, 4, 0, 4, 6, 6, 0, - 0, 0, 6, 6, 6, 0, 6, 6, 0, 6, 6, 6, 6, 0, 4, 4, + 3, 3, 0, 0, 4, 4, 4, 0, 3, 0, 0, 0, 4, 0, 0, 0, + 0, 0, 4, 4, 4, 3, 0, 4, 4, 0, 0, 4, 4, 0, 4, 4, + 0, 4, 0, 0, 4, 4, 4, 6, 0, 0, 4, 6, 4, 0, 6, 6, + 6, 4, 4, 4, 4, 6, 6, 6, 6, 4, 6, 6, 0, 4, 6, 6, + 4, 0, 4, 6, 4, 0, 0, 6, 6, 0, 0, 6, 6, 4, 0, 0, + 0, 4, 4, 6, 6, 4, 4, 0, 4, 6, 0, 6, 0, 0, 4, 0, + 4, 6, 6, 0, 0, 0, 6, 6, 6, 0, 6, 6, 6, 0, 4, 4, 4, 0, 6, 4, 6, 6, 4, 6, 6, 0, 4, 6, 6, 6, 4, 4, 4, 0, 4, 0, 6, 6, 6, 6, 6, 6, 6, 4, 0, 4, 0, 6, 0, 4, 0, 4, 4, 6, 4, 4, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 4, 4, 6, 4, 4, 4, 6, 6, 4, 4, 3, 0, - 0, 0, 6, 0, 4, 6, 6, 4, 0, 6, 4, 6, 6, 0, 0, 0, - 4, 4, 6, 0, 0, 6, 4, 4, 6, 6, 0, 0, 6, 4, 6, 4, + 0, 6, 6, 6, 0, 0, 6, 0, 4, 6, 6, 4, 0, 6, 4, 6, + 6, 0, 0, 0, 4, 4, 6, 0, 0, 6, 4, 4, 6, 4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 0, 0, 0, 0, 6, 6, 4, 4, 6, - 7, 0, 0, 0, 4, 6, 0, 0, 0, 6, 4, 0, 10, 11, 11, 11, - 11, 11, 11, 11, 8, 8, 8, 0, 0, 0, 0, 9, 6, 4, 6, 0, - 6, 6, 6, 0, 0, 4, 6, 4, 4, 4, 4, 3, 3, 3, 3, 4, - 0, 0, 5, 5, 5, 5, 5, 5, + 6, 6, 0, 0, 7, 0, 0, 0, 4, 6, 0, 0, 0, 6, 4, 0, + 10, 11, 11, 11, 11, 11, 11, 11, 8, 8, 8, 0, 0, 0, 0, 9, + 6, 4, 6, 0, 4, 6, 4, 6, 6, 6, 6, 0, 0, 4, 6, 4, + 4, 4, 4, 3, 3, 3, 3, 4, 0, 0, 5, 5, 5, 5, 5, 5, }; -/* Grapheme_Cluster_Break: 2336 bytes. */ +/* Grapheme_Cluster_Break: 2600 bytes. */ RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch) { RE_UINT32 code; @@ -4339,8 +4680,8 @@ RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch) { static RE_UINT8 re_sentence_break_stage_1[] = { 0, 1, 2, 3, 4, 5, 5, 5, 5, 6, 7, 5, 5, 8, 9, 10, - 11, 12, 13, 14, 9, 9, 15, 9, 9, 9, 9, 16, 9, 17, 18, 9, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 19, 20, 9, 9, 9, 21, + 11, 12, 13, 14, 9, 9, 15, 9, 9, 9, 9, 16, 9, 17, 18, 19, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 20, 21, 9, 9, 9, 22, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, @@ -4352,35 +4693,36 @@ static RE_UINT8 re_sentence_break_stage_1[] = { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 22, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 23, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, }; static RE_UINT8 re_sentence_break_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 33, 33, 36, 33, 37, 33, 33, 38, 39, 40, 33, - 41, 42, 33, 33, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 43, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 44, - 17, 17, 17, 17, 45, 17, 46, 47, 48, 49, 50, 51, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 52, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 17, 53, 54, 17, 55, 56, 57, - 58, 59, 60, 61, 62, 33, 33, 33, 63, 64, 65, 66, 67, 33, 33, 33, - 68, 69, 33, 33, 33, 33, 70, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 17, 17, 17, 71, 72, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 17, 17, 17, 17, 73, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 74, 33, 33, 33, 33, 75, - 76, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 77, 78, 33, 79, 80, 81, 82, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 83, 33, - 17, 17, 17, 17, 17, 17, 84, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 85, 86, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 86, 33, 33, 33, 33, 33, - 87, 88, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 33, 33, 36, 33, 37, 33, 33, 38, 39, 40, 33, + 41, 42, 33, 33, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 43, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 44, + 17, 17, 17, 17, 45, 17, 46, 47, 48, 49, 50, 51, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 52, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 17, 53, 54, 17, 55, 56, 57, + 58, 59, 60, 61, 62, 63, 17, 64, 65, 66, 67, 68, 69, 33, 33, 33, + 70, 71, 72, 73, 74, 75, 76, 33, 77, 33, 78, 33, 33, 33, 33, 33, + 17, 17, 17, 79, 80, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 17, 17, 17, 17, 81, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 82, 83, 33, 33, 33, 84, + 85, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 86, 33, 33, 33, + 33, 87, 88, 33, 89, 90, 91, 92, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 93, 33, 33, 33, 33, 33, 94, 33, + 33, 95, 33, 33, 33, 33, 96, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 17, 17, 17, 17, 17, 17, 97, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 98, 99, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 99, 33, 33, 33, 33, 33, + 100, 101, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, }; static RE_UINT16 re_sentence_break_stage_3[] = { @@ -4389,46 +4731,52 @@ static RE_UINT16 re_sentence_break_stage_3[] = { 29, 30, 18, 8, 31, 8, 32, 8, 8, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 41, 41, 44, 45, 46, 47, 48, 41, 41, 49, 50, 51, 52, 53, 54, 55, 55, 56, 55, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, 71, 72, 73, 62, 71, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 73, 83, 84, 85, 86, 83, 87, 88, 89, 90, 91, 92, 93, - 94, 95, 96, 55, 97, 98, 99, 55, 100, 101, 102, 103, 104, 105, 106, 55, - 41, 107, 108, 109, 110, 29, 111, 112, 41, 41, 41, 41, 41, 41, 41, 41, - 41, 41, 113, 41, 114, 115, 116, 41, 117, 41, 118, 119, 120, 41, 41, 121, - 94, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 122, 123, 41, 41, 124, - 125, 126, 127, 128, 41, 129, 130, 131, 132, 41, 41, 133, 41, 134, 41, 135, - 136, 137, 138, 139, 41, 140, 141, 55, 142, 41, 143, 144, 145, 146, 55, 55, - 147, 129, 148, 149, 150, 151, 41, 152, 41, 153, 154, 155, 55, 55, 156, 157, - 18, 18, 18, 18, 18, 18, 23, 158, 8, 8, 8, 8, 159, 8, 8, 8, - 160, 161, 162, 163, 161, 164, 165, 166, 167, 168, 169, 170, 171, 55, 172, 173, - 174, 175, 176, 30, 177, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, - 178, 179, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 180, 30, 181, - 55, 55, 182, 183, 55, 55, 184, 185, 55, 55, 55, 55, 186, 55, 187, 188, - 29, 189, 190, 191, 8, 8, 8, 192, 18, 193, 41, 194, 195, 196, 196, 23, - 197, 198, 55, 55, 55, 55, 55, 55, 199, 200, 94, 41, 201, 94, 41, 112, - 202, 203, 41, 41, 204, 205, 55, 206, 41, 41, 41, 41, 41, 135, 55, 55, - 41, 41, 41, 41, 41, 41, 207, 55, 41, 41, 41, 41, 207, 55, 206, 208, - 209, 210, 8, 211, 212, 41, 41, 213, 214, 215, 8, 216, 217, 218, 55, 219, - 220, 221, 41, 222, 223, 129, 224, 225, 50, 226, 227, 136, 58, 228, 229, 55, - 41, 230, 231, 232, 41, 233, 234, 235, 236, 237, 55, 55, 55, 55, 41, 238, - 41, 41, 41, 41, 41, 239, 240, 241, 41, 41, 41, 242, 41, 41, 243, 55, - 244, 245, 246, 41, 41, 247, 248, 41, 41, 249, 206, 41, 250, 41, 251, 252, - 253, 254, 255, 256, 41, 41, 41, 257, 258, 2, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 55, 41, 41, 41, 205, 55, 55, 41, 121, 55, 55, 55, 268, - 55, 55, 55, 55, 136, 41, 269, 55, 262, 206, 270, 55, 271, 41, 272, 55, - 29, 273, 274, 41, 271, 131, 55, 55, 275, 276, 135, 55, 55, 55, 55, 55, - 135, 243, 55, 55, 41, 277, 55, 55, 278, 279, 280, 136, 55, 55, 55, 55, - 41, 135, 135, 281, 55, 55, 55, 55, 41, 41, 282, 55, 55, 55, 55, 55, - 150, 283, 284, 79, 150, 285, 286, 287, 150, 288, 289, 55, 150, 228, 290, 55, - 55, 55, 55, 55, 41, 291, 131, 55, 41, 41, 41, 204, 55, 55, 55, 55, - 41, 41, 41, 292, 55, 55, 55, 55, 41, 204, 55, 55, 55, 55, 55, 55, - 41, 293, 55, 55, 55, 55, 55, 55, 41, 41, 294, 295, 296, 55, 55, 55, - 297, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 298, 299, 300, 55, 55, - 55, 55, 301, 55, 55, 55, 55, 55, 302, 303, 304, 305, 306, 307, 308, 309, - 310, 311, 312, 313, 314, 302, 303, 315, 305, 316, 317, 318, 309, 319, 320, 321, - 322, 323, 324, 189, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 55, 55, - 41, 41, 41, 41, 41, 41, 195, 55, 41, 121, 41, 41, 41, 41, 41, 41, - 271, 55, 55, 55, 55, 55, 55, 55, 335, 336, 336, 336, 55, 55, 55, 55, - 23, 23, 23, 23, 23, 23, 23, 337, + 66, 67, 68, 69, 70, 71, 72, 73, 74, 71, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 73, 84, 85, 86, 87, 84, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 55, 98, 99, 100, 55, 101, 102, 103, 104, 105, 106, 107, 55, + 41, 108, 109, 110, 111, 29, 112, 113, 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 114, 41, 115, 116, 117, 41, 118, 41, 119, 120, 121, 41, 41, 122, + 95, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 123, 124, 41, 41, 125, + 126, 127, 128, 129, 41, 130, 131, 132, 133, 41, 41, 134, 41, 135, 41, 136, + 137, 138, 139, 140, 41, 141, 142, 55, 143, 41, 144, 145, 146, 147, 55, 55, + 148, 130, 149, 150, 151, 152, 41, 153, 41, 154, 155, 156, 55, 55, 157, 158, + 18, 18, 18, 18, 18, 18, 23, 159, 8, 8, 8, 8, 160, 8, 8, 8, + 161, 162, 163, 164, 162, 165, 166, 167, 168, 169, 170, 171, 172, 55, 173, 174, + 175, 176, 177, 30, 178, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 179, 180, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 181, 30, 182, + 55, 55, 183, 184, 55, 55, 185, 186, 55, 55, 55, 55, 187, 55, 188, 189, + 29, 190, 191, 192, 8, 8, 8, 193, 18, 194, 41, 195, 196, 197, 197, 23, + 198, 199, 200, 55, 55, 55, 55, 55, 201, 202, 95, 41, 203, 95, 41, 113, + 204, 205, 41, 41, 206, 207, 55, 208, 41, 41, 41, 41, 41, 136, 55, 55, + 41, 41, 41, 41, 41, 41, 209, 55, 41, 41, 41, 41, 209, 55, 208, 210, + 211, 212, 8, 213, 214, 41, 41, 215, 216, 217, 8, 218, 219, 220, 55, 221, + 222, 223, 41, 224, 225, 130, 226, 227, 50, 228, 229, 230, 58, 231, 232, 233, + 41, 234, 235, 236, 41, 237, 238, 239, 240, 241, 242, 243, 55, 55, 41, 244, + 41, 41, 41, 41, 41, 245, 246, 247, 41, 41, 41, 248, 41, 41, 249, 55, + 250, 251, 252, 41, 41, 253, 254, 41, 41, 255, 208, 41, 256, 41, 257, 258, + 259, 260, 261, 262, 41, 41, 41, 263, 264, 2, 265, 266, 267, 137, 268, 269, + 270, 271, 272, 55, 41, 41, 41, 207, 55, 55, 41, 122, 55, 55, 55, 273, + 55, 55, 55, 55, 230, 41, 274, 275, 41, 208, 276, 277, 278, 41, 279, 55, + 29, 280, 281, 41, 278, 132, 55, 55, 41, 282, 41, 283, 55, 55, 55, 55, + 41, 196, 136, 257, 55, 55, 55, 55, 284, 285, 136, 196, 137, 55, 55, 55, + 136, 249, 55, 55, 41, 286, 55, 55, 287, 288, 289, 230, 230, 55, 103, 290, + 41, 136, 136, 56, 253, 55, 55, 55, 41, 41, 291, 55, 55, 55, 55, 55, + 151, 292, 293, 294, 151, 295, 296, 297, 151, 298, 299, 300, 151, 231, 301, 55, + 302, 303, 55, 55, 55, 208, 304, 305, 74, 71, 306, 307, 55, 55, 55, 55, + 55, 55, 55, 55, 41, 47, 308, 55, 55, 55, 55, 55, 41, 309, 310, 55, + 41, 47, 311, 55, 41, 312, 132, 55, 55, 55, 55, 55, 55, 29, 18, 313, + 55, 55, 55, 55, 55, 55, 41, 314, 41, 41, 41, 41, 314, 55, 55, 55, + 41, 41, 41, 206, 55, 55, 55, 55, 41, 206, 55, 55, 55, 55, 55, 55, + 41, 314, 137, 315, 55, 55, 208, 316, 41, 317, 318, 319, 121, 55, 55, 55, + 41, 41, 320, 321, 322, 55, 55, 55, 323, 55, 55, 55, 55, 55, 55, 55, + 41, 41, 41, 324, 325, 326, 55, 55, 55, 55, 55, 327, 328, 329, 55, 55, + 55, 55, 330, 55, 55, 55, 55, 55, 331, 332, 333, 334, 335, 336, 337, 338, + 339, 340, 341, 342, 343, 331, 332, 344, 334, 345, 346, 347, 338, 348, 349, 350, + 351, 352, 353, 190, 354, 355, 356, 357, 41, 41, 41, 41, 41, 41, 358, 55, + 359, 360, 361, 362, 363, 364, 55, 55, 55, 365, 366, 366, 367, 55, 55, 55, + 55, 55, 55, 368, 55, 55, 55, 55, 41, 41, 41, 41, 41, 41, 196, 55, + 41, 122, 41, 41, 41, 41, 41, 41, 278, 55, 55, 55, 55, 55, 55, 55, + 369, 370, 370, 370, 55, 55, 55, 55, 23, 23, 23, 23, 23, 23, 23, 371, }; static RE_UINT8 re_sentence_break_stage_4[] = { @@ -4444,163 +4792,180 @@ static RE_UINT8 re_sentence_break_stage_4[] = { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 37, 15, 15, 15, 15, 15, 15, 15, 15, 38, 36, 39, 40, 36, 36, 41, 0, 0, 0, 15, 42, 0, 43, 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 25, 45, 46, 39, 0, 47, 22, 48, 32, 11, 11, 11, - 49, 11, 11, 15, 15, 15, 15, 15, 15, 15, 15, 50, 33, 34, 25, 25, - 25, 25, 25, 25, 15, 51, 30, 32, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 15, 15, 15, 15, 52, 44, 53, 25, 25, 25, 25, 25, - 28, 26, 26, 29, 25, 25, 25, 25, 25, 25, 0, 0, 10, 11, 11, 11, - 11, 11, 11, 11, 11, 22, 54, 55, 14, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 56, 0, 57, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 58, - 59, 58, 0, 0, 36, 36, 36, 36, 36, 36, 60, 0, 36, 0, 0, 0, - 61, 62, 0, 63, 44, 44, 64, 65, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 66, 44, 44, 44, 44, 44, 7, 7, 67, 68, 69, 36, 36, 36, - 36, 36, 36, 36, 36, 70, 44, 71, 44, 72, 73, 74, 7, 7, 75, 76, - 77, 0, 0, 78, 79, 36, 36, 36, 36, 36, 36, 36, 44, 44, 44, 44, - 44, 44, 64, 80, 36, 36, 36, 36, 36, 81, 44, 44, 82, 0, 0, 0, - 7, 7, 75, 36, 36, 36, 36, 36, 36, 36, 66, 44, 44, 41, 83, 0, - 36, 36, 36, 36, 36, 81, 84, 44, 44, 85, 85, 86, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 36, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 87, 36, 36, 88, 0, 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 64, - 44, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 81, 89, - 44, 44, 44, 44, 85, 44, 36, 36, 81, 90, 7, 7, 80, 36, 80, 36, - 57, 80, 36, 76, 76, 36, 36, 36, 36, 36, 87, 36, 43, 40, 41, 89, - 44, 91, 91, 92, 0, 93, 0, 94, 81, 95, 7, 7, 41, 0, 0, 0, - 57, 80, 60, 96, 76, 36, 36, 36, 36, 36, 87, 36, 87, 97, 41, 73, - 64, 93, 91, 86, 98, 0, 80, 43, 0, 95, 7, 7, 74, 99, 0, 0, - 57, 80, 36, 94, 94, 36, 36, 36, 36, 36, 87, 36, 87, 80, 41, 89, - 44, 58, 58, 86, 88, 0, 0, 0, 81, 95, 7, 7, 0, 0, 0, 0, - 44, 91, 91, 86, 0, 100, 0, 94, 81, 95, 7, 7, 54, 0, 0, 0, - 101, 80, 60, 40, 87, 41, 97, 87, 96, 88, 60, 40, 36, 36, 41, 100, - 64, 100, 73, 86, 88, 93, 0, 0, 0, 95, 7, 7, 0, 0, 0, 0, - 57, 80, 36, 87, 87, 36, 36, 36, 36, 36, 87, 36, 36, 80, 41, 102, - 44, 73, 73, 86, 0, 59, 41, 0, 100, 80, 36, 87, 87, 36, 36, 36, - 36, 36, 87, 36, 36, 80, 41, 89, 44, 73, 73, 86, 0, 59, 0, 103, - 81, 95, 7, 7, 97, 0, 0, 0, 36, 36, 36, 36, 36, 36, 60, 102, - 44, 73, 73, 92, 0, 93, 0, 0, 81, 95, 7, 7, 0, 0, 40, 36, - 100, 80, 36, 36, 36, 60, 40, 36, 36, 36, 36, 36, 94, 36, 36, 54, - 36, 60, 104, 93, 44, 105, 44, 44, 0, 0, 0, 0, 100, 0, 0, 0, - 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 79, 44, 64, 0, - 36, 66, 44, 64, 7, 7, 106, 0, 97, 76, 43, 54, 0, 36, 80, 36, - 80, 107, 40, 80, 79, 44, 58, 82, 36, 43, 44, 86, 7, 7, 106, 36, - 88, 0, 0, 0, 0, 0, 86, 0, 7, 7, 106, 0, 0, 108, 109, 110, - 36, 36, 80, 36, 36, 36, 36, 36, 36, 36, 36, 88, 57, 44, 44, 44, - 44, 73, 36, 85, 44, 44, 57, 44, 44, 44, 44, 44, 44, 44, 44, 111, - 0, 104, 0, 0, 0, 0, 0, 0, 36, 36, 66, 44, 44, 44, 44, 112, - 7, 7, 113, 0, 36, 81, 74, 81, 89, 72, 44, 74, 85, 69, 36, 36, - 81, 44, 44, 84, 7, 7, 114, 86, 11, 49, 0, 115, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 60, 36, 36, 36, 87, 41, 36, 60, 87, 41, - 36, 36, 87, 41, 36, 36, 36, 36, 36, 36, 36, 36, 87, 41, 36, 60, - 87, 41, 36, 36, 36, 60, 36, 36, 36, 36, 36, 36, 87, 41, 36, 36, - 36, 36, 36, 36, 36, 36, 60, 57, 116, 9, 117, 0, 0, 0, 0, 0, - 36, 36, 36, 36, 0, 0, 0, 0, 36, 36, 36, 36, 36, 88, 0, 0, - 36, 36, 36, 118, 36, 36, 36, 36, 119, 36, 36, 36, 36, 36, 120, 121, - 36, 36, 60, 40, 88, 0, 0, 0, 36, 36, 36, 87, 81, 111, 0, 0, - 36, 36, 36, 36, 81, 122, 0, 0, 36, 36, 36, 36, 81, 0, 0, 0, - 36, 36, 36, 87, 123, 0, 0, 0, 36, 36, 36, 36, 36, 44, 44, 44, - 44, 44, 44, 44, 44, 96, 0, 99, 7, 7, 106, 0, 0, 0, 0, 0, - 124, 0, 125, 126, 7, 7, 106, 0, 36, 36, 36, 36, 36, 36, 0, 0, - 36, 36, 127, 0, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, 0, - 36, 36, 36, 36, 36, 36, 36, 88, 44, 44, 44, 0, 44, 44, 44, 0, - 0, 90, 7, 7, 36, 36, 36, 36, 36, 36, 36, 41, 36, 88, 0, 0, - 36, 36, 36, 0, 44, 44, 44, 44, 69, 36, 86, 0, 7, 7, 106, 0, - 36, 36, 36, 36, 36, 66, 44, 0, 36, 36, 36, 36, 36, 85, 44, 64, - 44, 44, 44, 44, 44, 44, 44, 91, 7, 7, 106, 0, 7, 7, 106, 0, - 0, 96, 128, 0, 0, 0, 0, 0, 44, 69, 36, 36, 36, 36, 36, 36, - 44, 69, 36, 0, 7, 7, 113, 129, 0, 0, 93, 44, 44, 0, 0, 0, - 112, 36, 36, 36, 36, 36, 36, 36, 85, 44, 44, 74, 7, 7, 75, 36, - 36, 81, 44, 44, 44, 0, 0, 0, 36, 44, 44, 44, 44, 44, 9, 117, - 7, 7, 106, 80, 7, 7, 75, 36, 36, 36, 36, 36, 36, 36, 36, 130, - 0, 0, 0, 0, 64, 44, 44, 44, 44, 44, 69, 79, 81, 131, 0, 0, - 44, 64, 0, 0, 0, 0, 0, 44, 25, 25, 25, 25, 25, 34, 15, 27, - 15, 15, 11, 11, 15, 39, 11, 132, 15, 15, 11, 11, 15, 15, 11, 11, - 15, 39, 11, 132, 15, 15, 133, 133, 15, 15, 11, 11, 15, 15, 15, 39, - 15, 15, 11, 11, 15, 134, 11, 135, 46, 134, 11, 136, 15, 46, 11, 0, - 15, 15, 11, 136, 46, 134, 11, 136, 137, 137, 138, 139, 140, 141, 142, 142, - 0, 143, 144, 145, 0, 0, 146, 147, 0, 148, 147, 0, 0, 0, 0, 149, - 61, 150, 61, 61, 21, 0, 0, 151, 0, 0, 0, 146, 15, 15, 15, 42, - 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, 44, 111, 0, 0, 0, - 47, 152, 153, 154, 23, 115, 10, 132, 0, 155, 48, 156, 11, 38, 157, 33, - 0, 158, 39, 159, 0, 0, 0, 0, 160, 38, 88, 0, 0, 0, 0, 0, - 0, 0, 142, 0, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 161, 11, 11, 15, 15, 39, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 4, 162, 0, 0, 142, 142, 142, 5, 0, 0, - 0, 146, 0, 0, 0, 0, 0, 0, 0, 163, 142, 142, 0, 0, 0, 0, - 4, 142, 142, 142, 142, 142, 121, 0, 0, 0, 0, 0, 0, 0, 142, 0, - 0, 0, 0, 0, 0, 0, 0, 5, 11, 11, 11, 22, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 24, 31, 164, 26, 32, 25, 29, 15, 33, - 25, 42, 152, 165, 53, 0, 0, 0, 15, 166, 0, 21, 36, 36, 36, 36, - 36, 36, 0, 96, 0, 0, 0, 93, 36, 36, 36, 36, 36, 60, 0, 0, - 36, 60, 36, 60, 36, 60, 36, 60, 142, 142, 142, 5, 0, 0, 0, 5, - 142, 142, 5, 167, 0, 0, 0, 0, 168, 80, 142, 142, 5, 142, 142, 169, - 80, 36, 81, 44, 80, 41, 36, 88, 36, 36, 36, 36, 36, 60, 59, 80, - 0, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 80, 36, 36, 36, - 36, 36, 36, 60, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 60, 0, - 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 88, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 36, 36, 170, 36, 36, 36, 171, 36, 36, 36, 36, - 7, 7, 75, 0, 0, 0, 0, 0, 25, 25, 25, 172, 64, 44, 44, 173, - 25, 25, 25, 25, 25, 25, 0, 93, 36, 36, 36, 36, 174, 9, 0, 0, - 0, 0, 0, 0, 0, 96, 36, 36, 175, 25, 25, 25, 27, 25, 25, 25, - 25, 25, 25, 25, 15, 15, 26, 30, 25, 25, 176, 177, 25, 0, 0, 0, - 25, 25, 178, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 179, 36, - 180, 180, 66, 36, 36, 36, 36, 36, 66, 44, 0, 0, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 129, 0, 0, 74, 36, 36, 36, 36, 36, 36, 36, - 44, 111, 0, 129, 7, 7, 106, 0, 44, 44, 44, 44, 74, 36, 96, 0, - 36, 81, 44, 174, 36, 36, 36, 36, 36, 66, 44, 44, 44, 0, 0, 0, - 36, 36, 36, 36, 66, 44, 44, 44, 111, 0, 147, 96, 7, 7, 106, 0, - 36, 36, 85, 44, 44, 64, 0, 0, 66, 36, 36, 86, 7, 7, 106, 181, - 36, 36, 36, 36, 36, 60, 182, 0, 36, 36, 36, 36, 89, 72, 69, 81, - 127, 0, 0, 0, 0, 0, 96, 41, 36, 36, 66, 44, 183, 184, 0, 0, - 80, 60, 80, 60, 80, 60, 0, 0, 36, 60, 36, 60, 0, 0, 0, 0, - 66, 44, 185, 86, 7, 7, 106, 0, 36, 0, 0, 0, 36, 36, 36, 36, - 36, 60, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, + 44, 44, 44, 44, 25, 45, 46, 47, 0, 48, 22, 49, 32, 11, 11, 11, + 50, 11, 11, 15, 15, 15, 15, 15, 15, 15, 15, 51, 33, 34, 25, 25, + 25, 25, 25, 25, 15, 52, 30, 32, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 15, 15, 15, 15, 53, 44, 54, 25, 25, 25, 25, 25, + 28, 26, 26, 29, 25, 25, 25, 25, 25, 25, 25, 25, 10, 11, 11, 11, + 11, 11, 11, 11, 11, 22, 55, 56, 14, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 57, 0, 58, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 59, + 60, 59, 0, 0, 36, 36, 36, 36, 36, 36, 61, 0, 36, 0, 0, 0, + 62, 63, 0, 64, 44, 44, 65, 66, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 67, 44, 44, 44, 44, 44, 7, 7, 68, 69, 70, 36, 36, 36, + 36, 36, 36, 36, 36, 71, 44, 72, 44, 73, 74, 75, 7, 7, 76, 77, + 78, 0, 0, 79, 80, 36, 36, 36, 36, 36, 36, 36, 44, 44, 44, 44, + 44, 44, 65, 81, 36, 36, 36, 36, 36, 82, 44, 44, 83, 0, 0, 0, + 7, 7, 76, 36, 36, 36, 36, 36, 36, 36, 67, 44, 44, 41, 84, 0, + 36, 36, 36, 36, 36, 82, 85, 44, 44, 86, 86, 87, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 36, 36, 36, 36, 61, 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, + 44, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 82, 88, + 44, 44, 44, 44, 86, 44, 36, 36, 82, 89, 7, 7, 81, 36, 36, 36, + 86, 81, 36, 77, 77, 36, 36, 36, 36, 36, 90, 36, 43, 40, 41, 88, + 44, 91, 91, 92, 0, 93, 0, 94, 82, 95, 7, 7, 41, 0, 0, 0, + 58, 81, 61, 96, 77, 36, 36, 36, 36, 36, 90, 36, 90, 97, 41, 74, + 65, 93, 91, 87, 98, 0, 81, 43, 0, 95, 7, 7, 75, 99, 0, 0, + 58, 81, 36, 94, 94, 36, 36, 36, 36, 36, 90, 36, 90, 81, 41, 88, + 44, 59, 59, 87, 100, 0, 0, 0, 82, 95, 7, 7, 0, 0, 0, 0, + 58, 81, 36, 77, 77, 36, 36, 36, 44, 91, 91, 87, 0, 101, 0, 94, + 82, 95, 7, 7, 55, 0, 0, 0, 102, 81, 61, 40, 90, 41, 97, 90, + 96, 100, 61, 40, 36, 36, 41, 101, 65, 101, 74, 87, 100, 93, 0, 0, + 0, 95, 7, 7, 0, 0, 0, 0, 44, 81, 36, 90, 90, 36, 36, 36, + 36, 36, 90, 36, 36, 36, 41, 103, 44, 74, 74, 87, 0, 60, 41, 0, + 58, 81, 36, 90, 90, 36, 36, 36, 36, 36, 90, 36, 36, 81, 41, 88, + 44, 74, 74, 87, 0, 60, 0, 104, 82, 95, 7, 7, 97, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 61, 103, 44, 74, 74, 92, 0, 93, 0, 0, + 82, 95, 7, 7, 0, 0, 40, 36, 101, 81, 36, 36, 36, 61, 40, 36, + 36, 36, 36, 36, 94, 36, 36, 55, 36, 61, 105, 93, 44, 106, 44, 44, + 0, 95, 7, 7, 101, 0, 0, 0, 81, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 80, 44, 65, 0, 36, 67, 44, 65, 7, 7, 107, 0, + 97, 77, 43, 55, 0, 36, 81, 36, 81, 108, 40, 81, 80, 44, 59, 83, + 36, 43, 44, 87, 7, 7, 107, 36, 100, 0, 0, 0, 0, 0, 87, 0, + 7, 7, 107, 0, 0, 109, 110, 111, 36, 36, 81, 36, 36, 36, 36, 36, + 36, 36, 36, 100, 58, 44, 44, 44, 44, 74, 36, 86, 44, 44, 58, 44, + 44, 44, 44, 44, 44, 44, 44, 112, 0, 105, 0, 0, 0, 0, 0, 0, + 36, 36, 67, 44, 44, 44, 44, 113, 7, 7, 114, 0, 36, 82, 75, 82, + 88, 73, 44, 75, 86, 70, 36, 36, 82, 44, 44, 85, 7, 7, 115, 87, + 11, 50, 0, 116, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 36, + 36, 36, 90, 41, 36, 61, 90, 41, 36, 36, 90, 41, 36, 36, 36, 36, + 36, 36, 36, 36, 90, 41, 36, 61, 90, 41, 36, 36, 36, 61, 36, 36, + 36, 36, 36, 36, 90, 41, 36, 36, 36, 36, 36, 36, 36, 36, 61, 58, + 117, 9, 118, 0, 0, 0, 0, 0, 36, 36, 36, 36, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 100, 0, 0, 36, 36, 36, 119, 36, 36, 36, 36, + 120, 36, 36, 36, 36, 36, 121, 122, 36, 36, 61, 40, 36, 36, 100, 0, + 36, 36, 36, 90, 82, 112, 0, 0, 36, 36, 36, 36, 82, 123, 0, 0, + 36, 36, 36, 36, 82, 0, 0, 0, 36, 36, 36, 90, 124, 0, 0, 0, + 36, 36, 36, 36, 36, 44, 44, 44, 44, 44, 44, 44, 44, 96, 0, 99, + 7, 7, 107, 0, 0, 0, 0, 0, 125, 0, 126, 127, 7, 7, 107, 0, + 36, 36, 36, 36, 36, 36, 0, 0, 36, 36, 128, 0, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 41, 0, 0, 36, 36, 36, 36, 36, 36, 36, 61, + 44, 44, 44, 0, 44, 44, 44, 0, 0, 89, 7, 7, 36, 36, 36, 36, + 36, 36, 36, 41, 36, 100, 0, 0, 36, 36, 36, 0, 44, 44, 44, 44, + 70, 36, 87, 0, 7, 7, 107, 0, 36, 36, 36, 36, 36, 67, 44, 0, + 36, 36, 36, 36, 36, 86, 44, 65, 44, 44, 44, 44, 44, 44, 44, 91, + 7, 7, 107, 0, 7, 7, 107, 0, 0, 96, 129, 0, 44, 44, 44, 65, + 44, 70, 36, 36, 36, 36, 36, 36, 44, 70, 36, 0, 7, 7, 114, 130, + 0, 0, 93, 44, 44, 0, 0, 0, 113, 36, 36, 36, 36, 36, 36, 36, + 86, 44, 44, 75, 7, 7, 76, 36, 36, 82, 44, 44, 44, 0, 0, 0, + 36, 44, 44, 44, 44, 44, 9, 118, 7, 7, 107, 81, 7, 7, 76, 36, + 36, 36, 36, 36, 36, 36, 36, 131, 0, 0, 0, 0, 65, 44, 44, 44, + 44, 44, 70, 80, 82, 132, 87, 0, 44, 44, 44, 44, 44, 87, 0, 44, + 25, 25, 25, 25, 25, 34, 15, 27, 15, 15, 11, 11, 15, 39, 11, 133, + 15, 15, 11, 11, 15, 15, 11, 11, 15, 39, 11, 133, 15, 15, 134, 134, + 15, 15, 11, 11, 15, 15, 15, 39, 15, 15, 11, 11, 15, 135, 11, 136, + 46, 135, 11, 137, 15, 46, 11, 0, 15, 15, 11, 137, 46, 135, 11, 137, + 138, 138, 139, 140, 141, 142, 143, 143, 0, 144, 145, 146, 0, 0, 147, 148, + 0, 149, 148, 0, 0, 0, 0, 150, 62, 151, 62, 62, 21, 0, 0, 152, + 0, 0, 0, 147, 15, 15, 15, 42, 0, 0, 0, 0, 44, 44, 44, 44, + 44, 44, 44, 44, 112, 0, 0, 0, 48, 153, 154, 155, 23, 116, 10, 133, + 0, 156, 49, 157, 11, 38, 158, 33, 0, 159, 39, 160, 0, 0, 0, 0, + 161, 38, 100, 0, 0, 0, 0, 0, 0, 0, 143, 0, 0, 0, 0, 0, + 0, 0, 147, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 162, 11, 11, + 15, 15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 143, + 122, 0, 143, 143, 143, 5, 0, 0, 0, 147, 0, 0, 0, 0, 0, 0, + 0, 163, 143, 143, 0, 0, 0, 0, 4, 143, 143, 143, 143, 143, 122, 0, + 0, 0, 0, 0, 0, 0, 143, 0, 0, 0, 0, 0, 0, 0, 0, 5, + 11, 11, 11, 22, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 24, + 31, 164, 26, 32, 25, 29, 15, 33, 25, 42, 153, 165, 54, 0, 0, 0, + 15, 166, 0, 21, 36, 36, 36, 36, 36, 36, 0, 96, 0, 0, 0, 93, + 36, 36, 36, 36, 36, 61, 0, 0, 36, 61, 36, 61, 36, 61, 36, 61, + 143, 143, 143, 5, 0, 0, 0, 5, 143, 143, 5, 167, 0, 0, 0, 118, + 168, 0, 0, 0, 0, 0, 0, 0, 169, 81, 143, 143, 5, 143, 143, 170, + 81, 36, 82, 44, 81, 41, 36, 100, 36, 36, 36, 36, 36, 61, 60, 81, + 0, 81, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 81, 36, 36, 36, + 36, 36, 36, 61, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 61, 0, + 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 100, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 36, 171, 36, 36, 36, 172, 36, 36, 36, 36, + 7, 7, 76, 0, 0, 0, 0, 0, 25, 25, 25, 173, 65, 44, 44, 174, + 25, 25, 25, 25, 25, 25, 25, 175, 36, 36, 36, 36, 176, 9, 0, 0, + 0, 0, 0, 0, 0, 96, 36, 36, 177, 25, 25, 25, 27, 25, 25, 25, + 25, 25, 25, 25, 15, 15, 26, 30, 25, 25, 178, 179, 25, 27, 25, 25, + 25, 25, 31, 133, 133, 0, 0, 0, 0, 0, 0, 0, 0, 96, 180, 36, + 181, 181, 67, 36, 36, 36, 36, 36, 67, 44, 0, 0, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 130, 0, 0, 75, 36, 36, 36, 36, 36, 36, 36, + 44, 112, 0, 130, 7, 7, 107, 0, 44, 44, 44, 44, 75, 36, 96, 0, + 36, 82, 44, 176, 36, 36, 36, 36, 36, 67, 44, 44, 44, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 36, 100, 36, 36, 36, 36, 67, 44, 44, 44, + 112, 0, 148, 96, 7, 7, 107, 0, 36, 80, 36, 36, 7, 7, 76, 61, + 36, 36, 86, 44, 44, 65, 0, 0, 67, 36, 36, 87, 7, 7, 107, 182, + 36, 36, 36, 36, 36, 61, 183, 75, 36, 36, 36, 36, 88, 73, 70, 82, + 128, 0, 0, 0, 0, 0, 96, 41, 36, 36, 67, 44, 184, 185, 0, 0, + 81, 61, 81, 61, 81, 61, 0, 0, 36, 61, 36, 61, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 24, 15, 0, 39, 0, 0, 0, 0, 0, 0, + 67, 44, 186, 87, 7, 7, 107, 0, 36, 0, 0, 0, 36, 36, 36, 36, + 36, 61, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 36, 36, 36, 41, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, - 15, 24, 0, 0, 186, 15, 0, 187, 36, 36, 87, 36, 36, 60, 36, 43, - 94, 87, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, 0, 0, - 0, 0, 0, 0, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 188, + 15, 24, 0, 0, 187, 15, 0, 188, 36, 36, 90, 36, 36, 61, 36, 43, + 94, 90, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, 0, 0, + 0, 0, 0, 0, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 189, 36, 36, 36, 36, 40, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 36, 36, 36, 0, 44, 44, 44, 44, 189, 4, 121, 0, - 44, 64, 0, 0, 190, 169, 142, 142, 142, 191, 121, 0, 6, 192, 193, 162, - 140, 0, 0, 0, 36, 87, 36, 36, 36, 36, 36, 36, 36, 36, 36, 194, - 56, 0, 5, 6, 0, 0, 195, 9, 14, 15, 15, 15, 15, 15, 16, 196, - 197, 198, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 81, - 36, 36, 36, 36, 36, 36, 36, 60, 40, 36, 40, 36, 40, 36, 40, 88, - 0, 0, 0, 0, 0, 0, 199, 0, 36, 36, 36, 80, 36, 36, 36, 36, - 36, 60, 36, 36, 36, 36, 60, 94, 36, 36, 36, 41, 36, 36, 36, 41, - 0, 0, 0, 0, 0, 0, 0, 98, 36, 36, 36, 36, 88, 0, 0, 0, - 36, 36, 60, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 41, - 36, 0, 36, 36, 80, 41, 0, 0, 11, 11, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 36, 36, 36, 36, 36, 41, 87, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 94, 88, 76, 36, 36, 36, 36, 36, 36, 0, 40, - 85, 59, 0, 44, 36, 80, 80, 36, 36, 36, 36, 36, 36, 0, 64, 93, - 0, 0, 0, 0, 0, 129, 0, 0, 36, 36, 36, 36, 60, 0, 0, 0, - 36, 36, 88, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 44, 44, - 44, 185, 117, 0, 0, 0, 0, 0, 36, 36, 36, 36, 44, 44, 64, 200, - 147, 0, 0, 0, 36, 36, 36, 36, 36, 36, 88, 0, 7, 7, 106, 0, - 36, 66, 44, 44, 44, 201, 7, 7, 181, 0, 0, 0, 0, 0, 0, 0, - 69, 202, 0, 0, 7, 7, 106, 0, 36, 36, 66, 44, 44, 44, 0, 0, - 60, 0, 0, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 88, 0, - 36, 88, 0, 0, 85, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 64, - 0, 0, 0, 93, 112, 36, 36, 36, 41, 0, 0, 0, 0, 0, 0, 0, - 0, 57, 86, 57, 203, 61, 204, 44, 64, 57, 44, 0, 0, 0, 0, 0, - 0, 0, 100, 86, 0, 0, 0, 0, 100, 111, 0, 0, 0, 0, 0, 0, - 11, 11, 11, 11, 11, 11, 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, - 11, 11, 11, 154, 15, 134, 15, 15, 15, 15, 11, 11, 11, 11, 11, 11, - 154, 15, 15, 15, 15, 15, 15, 48, 47, 205, 10, 48, 11, 154, 166, 14, - 15, 14, 15, 15, 11, 11, 11, 11, 11, 11, 154, 15, 15, 15, 15, 15, - 15, 49, 22, 10, 11, 48, 11, 206, 15, 15, 15, 15, 15, 15, 49, 22, - 11, 155, 161, 11, 206, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11, - 11, 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, 154, 15, 15, 15, 15, - 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11, 11, 154, 15, 15, - 15, 15, 15, 15, 11, 11, 11, 11, 15, 39, 11, 11, 11, 11, 11, 11, - 206, 15, 15, 15, 15, 15, 24, 15, 33, 11, 11, 11, 11, 11, 22, 15, - 15, 15, 15, 15, 15, 134, 15, 11, 11, 11, 11, 11, 11, 206, 15, 15, - 15, 15, 15, 24, 15, 33, 11, 11, 15, 15, 134, 15, 11, 11, 11, 11, - 11, 11, 206, 15, 15, 15, 15, 15, 24, 15, 27, 95, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 36, 80, 36, 36, 36, 36, 36, 36, - 97, 76, 80, 36, 60, 36, 107, 0, 103, 96, 107, 80, 97, 76, 107, 107, - 97, 76, 60, 36, 60, 36, 80, 43, 36, 36, 94, 36, 36, 36, 36, 0, - 80, 80, 94, 36, 36, 36, 36, 0, 20, 0, 0, 0, 0, 0, 0, 0, - 61, 61, 61, 61, 61, 61, 61, 61, 44, 44, 44, 44, 0, 0, 0, 0, + 0, 0, 0, 0, 36, 36, 36, 0, 44, 44, 44, 44, 190, 4, 122, 0, + 44, 44, 44, 87, 191, 170, 143, 143, 143, 192, 122, 0, 6, 193, 194, 195, + 141, 0, 0, 0, 36, 90, 36, 36, 36, 36, 36, 36, 36, 36, 36, 196, + 57, 0, 5, 6, 0, 0, 197, 9, 14, 15, 15, 15, 15, 15, 16, 198, + 199, 200, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 82, + 40, 36, 40, 36, 40, 36, 40, 100, 0, 0, 0, 0, 0, 0, 201, 0, + 36, 36, 36, 81, 36, 36, 36, 36, 36, 61, 36, 36, 36, 36, 61, 94, + 36, 36, 36, 41, 36, 36, 36, 41, 0, 0, 0, 0, 0, 0, 0, 98, + 36, 36, 36, 36, 100, 0, 0, 0, 112, 0, 0, 0, 0, 0, 0, 0, + 36, 36, 61, 0, 36, 36, 36, 36, 36, 36, 36, 36, 36, 82, 65, 0, + 36, 36, 36, 36, 36, 36, 36, 41, 36, 0, 36, 36, 81, 41, 0, 0, + 11, 11, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 36, 36, 36, 36, + 36, 36, 0, 0, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, 0, + 36, 41, 90, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 94, 100, 77, + 36, 36, 36, 36, 36, 36, 0, 40, 86, 60, 0, 44, 36, 81, 81, 36, + 36, 36, 36, 36, 36, 0, 65, 93, 0, 0, 0, 0, 0, 130, 0, 0, + 36, 185, 0, 0, 0, 0, 0, 0, 36, 36, 100, 0, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 44, 44, 44, 186, 118, 0, 0, 0, 0, 0, + 0, 95, 7, 7, 0, 0, 0, 93, 36, 36, 36, 36, 44, 44, 65, 202, + 148, 0, 0, 0, 36, 36, 36, 36, 36, 36, 100, 0, 7, 7, 107, 0, + 36, 67, 44, 44, 44, 203, 7, 7, 182, 0, 0, 0, 36, 36, 36, 36, + 36, 36, 36, 36, 67, 104, 0, 0, 70, 204, 0, 57, 7, 7, 205, 0, + 36, 36, 36, 36, 94, 36, 36, 36, 36, 36, 36, 44, 44, 44, 206, 118, + 36, 36, 36, 36, 36, 36, 36, 67, 44, 44, 65, 0, 7, 7, 107, 0, + 44, 91, 91, 87, 0, 93, 0, 81, 82, 101, 44, 112, 44, 112, 0, 0, + 44, 94, 0, 0, 7, 7, 107, 0, 36, 36, 36, 67, 44, 87, 44, 44, + 207, 0, 57, 0, 0, 0, 0, 0, 123, 100, 0, 0, 7, 7, 107, 0, + 36, 36, 67, 44, 44, 44, 0, 0, 7, 7, 107, 0, 0, 0, 0, 96, + 36, 36, 36, 36, 36, 36, 100, 0, 7, 7, 107, 130, 0, 0, 0, 0, + 36, 36, 36, 41, 44, 208, 0, 0, 36, 36, 36, 36, 44, 186, 118, 0, + 36, 118, 0, 0, 7, 7, 107, 0, 96, 36, 36, 36, 36, 36, 0, 81, + 36, 100, 0, 0, 86, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 65, + 0, 0, 0, 93, 113, 36, 36, 36, 41, 0, 0, 0, 0, 0, 0, 0, + 36, 36, 61, 0, 36, 36, 36, 100, 36, 36, 100, 0, 36, 36, 41, 209, + 62, 0, 0, 0, 0, 0, 0, 0, 0, 58, 87, 58, 210, 62, 211, 44, + 65, 58, 44, 0, 0, 0, 0, 0, 0, 0, 101, 87, 0, 0, 0, 0, + 101, 112, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 155, 15, + 15, 15, 15, 15, 15, 11, 11, 11, 11, 11, 11, 155, 15, 135, 15, 15, + 15, 15, 11, 11, 11, 11, 11, 11, 155, 15, 15, 15, 15, 15, 15, 49, + 48, 212, 10, 49, 11, 155, 166, 14, 15, 14, 15, 15, 11, 11, 11, 11, + 11, 11, 155, 15, 15, 15, 15, 15, 15, 50, 22, 10, 11, 49, 11, 213, + 15, 15, 15, 15, 15, 15, 50, 22, 11, 156, 162, 11, 213, 15, 15, 15, + 15, 15, 15, 11, 11, 11, 11, 11, 11, 155, 15, 15, 15, 15, 15, 15, + 11, 11, 11, 155, 15, 15, 15, 15, 155, 15, 15, 15, 15, 15, 15, 11, + 11, 11, 11, 11, 11, 155, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, + 15, 39, 11, 11, 11, 11, 11, 11, 213, 15, 15, 15, 15, 15, 24, 15, + 33, 11, 11, 11, 11, 11, 22, 15, 15, 15, 15, 15, 15, 135, 15, 11, + 11, 11, 11, 11, 11, 213, 15, 15, 15, 15, 15, 24, 15, 33, 11, 11, + 15, 15, 135, 15, 11, 11, 11, 11, 11, 11, 213, 15, 15, 15, 15, 15, + 24, 15, 27, 95, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 36, 100, 0, 0, 44, 65, 0, 0, 36, 81, 36, 36, 36, 36, 36, 36, + 97, 77, 81, 36, 61, 36, 108, 0, 104, 96, 108, 81, 97, 77, 108, 108, + 97, 77, 61, 36, 61, 36, 81, 43, 36, 36, 94, 36, 36, 36, 36, 0, + 81, 81, 94, 36, 36, 36, 36, 0, 0, 0, 0, 0, 11, 11, 11, 11, + 11, 11, 133, 0, 11, 11, 11, 11, 11, 11, 133, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 163, 122, 0, 20, 0, 0, 0, 0, 0, 0, 0, + 62, 62, 62, 62, 62, 62, 62, 62, 44, 44, 44, 44, 0, 0, 0, 0, }; static RE_UINT8 re_sentence_break_stage_5[] = { @@ -4615,50 +4980,52 @@ static RE_UINT8 re_sentence_break_stage_5[] = { 7, 8, 8, 8, 7, 7, 8, 8, 8, 7, 7, 7, 8, 7, 7, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 9, 9, 9, 7, 7, 0, 0, 0, 0, 9, 9, 9, 9, 0, 0, 7, 0, 0, 0, 9, 0, 9, 0, - 3, 3, 3, 3, 9, 0, 8, 7, 0, 0, 7, 7, 0, 0, 8, 0, - 8, 0, 8, 8, 8, 8, 0, 8, 7, 7, 7, 8, 8, 7, 0, 8, - 8, 7, 0, 3, 3, 3, 8, 7, 0, 9, 0, 0, 12, 14, 12, 0, - 0, 12, 0, 0, 0, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, - 9, 9, 9, 0, 5, 5, 5, 5, 5, 0, 0, 0, 14, 14, 0, 0, - 3, 3, 3, 0, 5, 0, 0, 12, 9, 9, 9, 3, 10, 10, 0, 10, - 10, 0, 9, 9, 3, 9, 9, 9, 12, 9, 3, 3, 3, 5, 0, 3, - 3, 9, 9, 3, 3, 0, 3, 3, 3, 3, 9, 9, 10, 10, 9, 9, - 9, 0, 0, 9, 12, 12, 12, 0, 0, 0, 0, 5, 9, 3, 9, 9, - 0, 9, 9, 9, 9, 9, 3, 3, 3, 9, 0, 0, 14, 12, 9, 0, - 3, 3, 9, 3, 9, 3, 3, 3, 3, 3, 0, 0, 9, 0, 9, 9, - 9, 0, 0, 0, 3, 9, 3, 3, 12, 12, 10, 10, 3, 0, 0, 3, + 3, 3, 3, 3, 9, 0, 8, 7, 0, 0, 7, 7, 7, 7, 0, 8, + 0, 0, 8, 0, 8, 0, 8, 8, 8, 8, 0, 8, 7, 7, 7, 8, + 8, 7, 0, 8, 8, 7, 0, 3, 3, 3, 8, 7, 0, 9, 0, 0, + 0, 14, 0, 0, 0, 12, 0, 0, 0, 3, 3, 3, 3, 3, 0, 3, + 0, 3, 3, 0, 9, 9, 9, 0, 5, 5, 5, 5, 5, 5, 0, 0, + 14, 14, 0, 0, 3, 3, 3, 0, 5, 0, 0, 12, 9, 9, 9, 3, + 10, 10, 0, 10, 10, 0, 9, 9, 3, 9, 9, 9, 12, 9, 3, 3, + 3, 5, 0, 3, 3, 9, 9, 3, 3, 0, 3, 3, 3, 3, 9, 9, + 10, 10, 9, 9, 9, 0, 0, 9, 12, 12, 12, 0, 0, 0, 0, 5, + 9, 3, 9, 9, 0, 9, 9, 9, 9, 9, 3, 3, 3, 9, 0, 0, + 14, 12, 9, 0, 3, 3, 9, 3, 9, 3, 3, 3, 3, 3, 0, 0, + 3, 9, 3, 3, 12, 12, 10, 10, 9, 0, 9, 9, 3, 0, 0, 3, 3, 3, 9, 0, 0, 0, 0, 3, 9, 9, 0, 9, 0, 0, 10, 10, 0, 0, 0, 9, 0, 9, 9, 0, 0, 3, 0, 0, 9, 3, 0, 0, - 0, 0, 3, 3, 0, 0, 3, 9, 0, 9, 3, 3, 0, 0, 9, 0, - 0, 0, 3, 0, 3, 0, 3, 0, 10, 10, 0, 0, 0, 9, 0, 9, - 0, 3, 0, 3, 0, 3, 13, 13, 13, 13, 3, 3, 3, 0, 0, 0, - 3, 3, 3, 9, 10, 10, 12, 12, 10, 10, 3, 3, 0, 8, 0, 0, - 0, 0, 12, 0, 12, 0, 0, 0, 9, 0, 12, 9, 6, 9, 9, 9, - 9, 9, 9, 13, 13, 0, 0, 0, 3, 12, 12, 0, 9, 0, 3, 3, - 0, 0, 14, 12, 14, 12, 0, 3, 3, 3, 5, 0, 9, 3, 9, 0, - 12, 12, 12, 12, 0, 0, 12, 12, 9, 9, 12, 12, 3, 9, 9, 0, - 8, 8, 0, 0, 0, 8, 0, 8, 7, 0, 7, 7, 8, 0, 7, 0, - 8, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 5, 3, 3, 5, 5, - 0, 0, 0, 14, 14, 0, 0, 0, 13, 13, 13, 13, 11, 0, 0, 0, - 4, 4, 5, 5, 5, 5, 5, 6, 0, 13, 13, 0, 12, 12, 0, 0, - 0, 13, 13, 12, 0, 0, 0, 6, 5, 0, 5, 5, 0, 13, 13, 7, - 0, 0, 0, 8, 0, 0, 7, 8, 8, 8, 7, 7, 8, 0, 8, 0, - 8, 8, 0, 7, 9, 7, 0, 0, 0, 8, 7, 7, 0, 0, 7, 0, - 9, 9, 9, 8, 0, 0, 8, 8, 13, 13, 13, 0, 0, 0, 13, 13, + 9, 0, 0, 0, 0, 0, 3, 3, 0, 0, 3, 9, 0, 9, 3, 3, + 0, 0, 9, 0, 0, 0, 3, 0, 3, 0, 3, 0, 10, 10, 0, 0, + 0, 9, 0, 9, 0, 3, 0, 3, 0, 3, 13, 13, 13, 13, 3, 3, + 3, 0, 0, 0, 3, 3, 3, 9, 10, 10, 12, 12, 10, 10, 3, 3, + 0, 8, 0, 0, 0, 0, 12, 0, 12, 0, 0, 0, 9, 0, 12, 9, + 6, 9, 9, 9, 9, 9, 9, 13, 13, 0, 0, 0, 3, 12, 12, 0, + 9, 0, 3, 3, 0, 0, 14, 12, 14, 12, 0, 3, 3, 3, 5, 0, + 9, 3, 9, 0, 12, 12, 12, 12, 0, 0, 12, 12, 9, 9, 12, 12, + 3, 9, 9, 0, 8, 8, 0, 0, 0, 8, 0, 8, 7, 0, 7, 7, + 8, 0, 7, 0, 8, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 5, + 3, 3, 5, 5, 0, 0, 0, 14, 14, 0, 0, 0, 13, 13, 13, 13, + 11, 0, 0, 0, 4, 4, 5, 5, 5, 5, 5, 6, 0, 13, 13, 0, + 12, 12, 0, 0, 0, 13, 13, 12, 0, 0, 0, 6, 5, 0, 5, 5, + 0, 13, 13, 7, 0, 0, 0, 8, 0, 0, 7, 8, 8, 8, 7, 7, + 8, 0, 8, 0, 8, 8, 0, 7, 9, 7, 0, 0, 0, 8, 7, 7, + 0, 0, 7, 0, 9, 9, 9, 8, 0, 0, 8, 8, 0, 0, 13, 13, 8, 7, 7, 8, 7, 8, 7, 3, 7, 7, 0, 7, 0, 0, 12, 9, - 6, 14, 12, 0, 0, 13, 13, 13, 9, 9, 0, 12, 9, 0, 12, 12, - 8, 7, 9, 3, 3, 3, 0, 9, 3, 3, 0, 12, 0, 0, 8, 7, - 9, 0, 0, 8, 7, 8, 7, 0, 8, 7, 8, 0, 7, 7, 7, 9, - 9, 9, 3, 9, 0, 12, 12, 12, 0, 0, 9, 3, 12, 12, 9, 9, - 9, 3, 3, 0, 3, 3, 3, 12, 0, 0, 0, 7, 0, 9, 3, 9, - 9, 9, 13, 13, 14, 14, 0, 14, 0, 14, 14, 0, 13, 0, 0, 13, - 0, 14, 12, 12, 14, 13, 13, 13, 9, 0, 0, 5, 0, 0, 14, 0, - 0, 13, 0, 13, 13, 12, 13, 13, 14, 0, 9, 9, 0, 5, 5, 5, - 0, 5, 12, 12, 3, 0, 10, 10, 9, 12, 12, 0, 3, 3, 3, 5, - 5, 5, 5, 3, 0, 8, 8, 0, 8, 0, 7, 7, + 0, 0, 13, 0, 6, 14, 12, 0, 0, 13, 13, 13, 9, 9, 0, 12, + 9, 0, 12, 12, 8, 7, 9, 3, 3, 3, 0, 9, 7, 7, 0, 3, + 3, 3, 0, 12, 0, 0, 8, 7, 9, 0, 0, 8, 7, 8, 7, 0, + 7, 7, 7, 9, 9, 9, 3, 9, 0, 12, 12, 12, 0, 0, 9, 3, + 12, 12, 9, 9, 9, 3, 3, 0, 3, 3, 3, 12, 0, 0, 0, 7, + 0, 9, 3, 9, 9, 9, 13, 13, 14, 14, 0, 14, 0, 14, 14, 0, + 13, 0, 0, 13, 0, 14, 12, 12, 14, 13, 13, 13, 13, 13, 13, 0, + 9, 0, 0, 5, 0, 0, 14, 0, 0, 13, 0, 13, 13, 12, 13, 13, + 14, 0, 9, 9, 0, 5, 5, 5, 0, 5, 12, 12, 3, 0, 10, 10, + 9, 12, 12, 0, 10, 10, 9, 0, 12, 12, 0, 12, 3, 0, 12, 12, + 3, 12, 0, 0, 0, 3, 3, 12, 3, 3, 3, 5, 5, 5, 5, 3, + 0, 8, 8, 0, 8, 0, 7, 7, }; -/* Sentence_Break: 5596 bytes. */ +/* Sentence_Break: 6120 bytes. */ RE_UINT32 re_get_sentence_break(RE_UINT32 ch) { RE_UINT32 code; @@ -4771,10 +5138,10 @@ static RE_UINT8 re_alphabetic_stage_1[] = { static RE_UINT8 re_alphabetic_stage_2[] = { 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 15, 16, 17, 18, 19, 13, 20, 13, 13, 13, 13, 13, 13, 21, 13, 13, + 13, 13, 13, 13, 13, 13, 22, 23, 13, 13, 24, 13, 13, 25, 26, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 7, 7, 7, 7, 27, 7, 28, 29, 13, 13, 13, 13, 13, 13, 13, 30, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, }; @@ -4787,13 +5154,15 @@ static RE_UINT8 re_alphabetic_stage_3[] = { 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 31, 31, 31, - 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 31, 31, 31, 31, 65, 31, - 1, 1, 1, 66, 67, 31, 31, 31, 1, 1, 1, 1, 68, 31, 31, 31, - 1, 1, 69, 31, 31, 31, 31, 70, 71, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 72, 73, 74, 75, 31, 31, 31, 31, 31, 31, 76, 31, - 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, 1, 1, 1, 1, 78, - 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, 31, 31, 31, 31, + 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 1, 59, + 60, 61, 62, 63, 64, 31, 31, 31, 65, 66, 67, 68, 69, 70, 71, 31, + 72, 31, 73, 31, 31, 31, 31, 31, 1, 1, 1, 74, 75, 31, 31, 31, + 1, 1, 1, 1, 76, 31, 31, 31, 1, 1, 77, 78, 31, 31, 31, 79, + 80, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 81, 31, 31, 31, + 31, 31, 31, 31, 82, 83, 84, 85, 86, 31, 31, 31, 31, 31, 87, 31, + 31, 88, 31, 31, 31, 31, 31, 31, 1, 1, 1, 1, 1, 1, 89, 1, + 1, 1, 1, 1, 1, 1, 1, 90, 91, 31, 31, 31, 31, 31, 31, 31, + 1, 1, 91, 31, 31, 31, 31, 31, }; static RE_UINT8 re_alphabetic_stage_4[] = { @@ -4802,98 +5171,108 @@ static RE_UINT8 re_alphabetic_stage_4[] = { 4, 4, 4, 4, 12, 4, 4, 4, 4, 13, 14, 15, 16, 17, 18, 19, 20, 4, 21, 22, 4, 4, 23, 24, 25, 4, 26, 4, 4, 27, 28, 29, 30, 31, 32, 0, 0, 33, 0, 34, 4, 35, 36, 37, 38, 39, 40, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 38, 47, 50, 51, 52, 53, 54, 0, - 55, 56, 57, 49, 58, 56, 59, 60, 58, 61, 62, 63, 64, 65, 66, 67, - 15, 68, 69, 0, 70, 71, 72, 0, 73, 0, 74, 75, 76, 77, 0, 0, - 4, 78, 25, 79, 80, 4, 81, 82, 4, 4, 83, 4, 84, 85, 86, 4, - 87, 4, 88, 0, 89, 4, 4, 90, 15, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 91, 1, 4, 4, 92, 93, 94, 94, 95, 4, 96, 97, 0, - 0, 4, 4, 98, 4, 99, 4, 100, 77, 101, 25, 102, 4, 103, 104, 0, - 105, 4, 106, 107, 0, 108, 0, 0, 4, 109, 110, 0, 4, 111, 4, 112, - 4, 100, 113, 114, 0, 0, 0, 115, 4, 4, 4, 4, 4, 4, 0, 0, - 116, 4, 117, 114, 4, 118, 119, 120, 0, 0, 0, 121, 122, 0, 0, 0, - 123, 124, 125, 4, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 127, 4, 104, 4, 128, 106, 4, 4, 4, 4, 129, - 4, 81, 4, 130, 131, 132, 132, 4, 0, 133, 0, 0, 0, 0, 0, 0, - 134, 135, 15, 4, 136, 15, 4, 82, 137, 138, 4, 4, 139, 68, 0, 25, - 4, 4, 4, 4, 4, 100, 0, 0, 4, 4, 4, 4, 4, 4, 31, 0, - 4, 4, 4, 4, 31, 0, 25, 114, 140, 141, 4, 142, 143, 4, 4, 89, - 144, 145, 4, 4, 146, 147, 0, 148, 149, 16, 4, 94, 4, 4, 49, 150, - 28, 99, 151, 77, 4, 152, 133, 0, 4, 131, 153, 154, 4, 106, 155, 156, - 157, 158, 0, 0, 0, 0, 4, 147, 4, 4, 4, 4, 4, 159, 160, 105, - 4, 4, 4, 161, 4, 4, 162, 0, 163, 164, 165, 4, 4, 27, 166, 4, - 4, 114, 25, 4, 167, 4, 16, 168, 0, 0, 0, 169, 4, 4, 4, 77, - 0, 1, 1, 170, 4, 106, 171, 0, 172, 173, 174, 0, 4, 4, 4, 68, - 0, 0, 4, 90, 0, 0, 0, 0, 0, 0, 0, 0, 77, 4, 175, 0, - 106, 25, 147, 0, 114, 4, 176, 0, 4, 4, 4, 4, 114, 0, 0, 0, - 177, 178, 100, 0, 0, 0, 0, 0, 100, 162, 0, 0, 4, 179, 0, 0, - 180, 94, 0, 77, 0, 0, 0, 0, 4, 100, 100, 151, 0, 0, 0, 0, - 4, 4, 126, 0, 0, 0, 0, 0, 4, 4, 181, 0, 145, 32, 25, 126, - 4, 151, 0, 0, 4, 4, 182, 0, 0, 0, 0, 0, 4, 100, 0, 0, - 4, 4, 4, 139, 0, 0, 0, 0, 4, 4, 4, 183, 0, 0, 0, 0, - 4, 139, 0, 0, 0, 0, 0, 0, 4, 32, 0, 0, 0, 0, 0, 0, - 4, 4, 184, 106, 166, 0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 186, 4, 187, 188, 189, 4, 190, 191, 192, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 193, 194, 82, 186, 186, 128, 128, 195, 195, 196, 0, - 189, 197, 198, 199, 200, 201, 0, 0, 4, 4, 4, 4, 4, 4, 131, 0, - 4, 90, 4, 4, 4, 4, 4, 4, 114, 0, 0, 0, 0, 0, 0, 0, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 47, 51, 52, 53, 54, 55, 0, + 56, 57, 58, 49, 59, 60, 61, 62, 59, 63, 64, 65, 66, 67, 68, 69, + 15, 70, 71, 0, 72, 73, 74, 0, 75, 0, 76, 77, 78, 79, 0, 0, + 4, 80, 25, 81, 82, 4, 83, 84, 4, 4, 85, 4, 86, 87, 88, 4, + 89, 4, 90, 0, 91, 4, 4, 92, 15, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 93, 1, 4, 4, 94, 95, 96, 96, 97, 4, 98, 99, 0, + 0, 4, 4, 100, 4, 101, 4, 102, 103, 104, 25, 105, 4, 106, 107, 0, + 108, 4, 103, 109, 0, 110, 0, 0, 4, 111, 112, 0, 4, 113, 4, 114, + 4, 102, 115, 116, 0, 0, 0, 117, 4, 4, 4, 4, 4, 4, 0, 118, + 119, 4, 120, 116, 4, 121, 122, 123, 0, 0, 0, 124, 125, 0, 0, 0, + 126, 127, 128, 4, 129, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 130, 4, 107, 4, 131, 103, 4, 4, 4, 4, 132, + 4, 83, 4, 133, 134, 135, 135, 4, 0, 136, 0, 0, 0, 0, 0, 0, + 137, 138, 15, 4, 139, 15, 4, 84, 140, 141, 4, 4, 142, 70, 0, 25, + 4, 4, 4, 4, 4, 102, 0, 0, 4, 4, 4, 4, 4, 4, 31, 0, + 4, 4, 4, 4, 31, 0, 25, 116, 143, 144, 4, 145, 146, 4, 4, 91, + 147, 148, 4, 4, 149, 150, 0, 147, 151, 16, 4, 96, 4, 4, 49, 152, + 28, 101, 33, 79, 4, 153, 136, 154, 4, 134, 155, 156, 4, 103, 157, 158, + 159, 160, 84, 161, 0, 0, 4, 162, 4, 4, 4, 4, 4, 163, 164, 108, + 4, 4, 4, 165, 4, 4, 166, 0, 167, 168, 169, 4, 4, 27, 170, 4, + 4, 116, 25, 4, 171, 4, 16, 172, 0, 0, 0, 173, 4, 4, 4, 79, + 0, 1, 1, 174, 4, 103, 175, 0, 176, 177, 178, 0, 4, 4, 4, 70, + 0, 0, 4, 92, 0, 0, 0, 0, 0, 0, 0, 0, 79, 4, 179, 0, + 4, 25, 101, 70, 116, 4, 180, 0, 4, 4, 4, 4, 116, 0, 0, 0, + 4, 181, 4, 49, 0, 0, 0, 0, 4, 134, 102, 16, 0, 0, 0, 0, + 182, 183, 102, 134, 103, 0, 0, 0, 102, 166, 0, 0, 4, 184, 0, 0, + 185, 96, 0, 79, 79, 0, 76, 186, 4, 102, 102, 33, 27, 0, 0, 0, + 4, 4, 129, 0, 0, 0, 0, 0, 4, 4, 187, 0, 148, 32, 25, 129, + 4, 33, 25, 188, 4, 4, 189, 0, 190, 191, 0, 0, 0, 25, 4, 129, + 50, 47, 192, 49, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 193, 0, + 0, 0, 0, 0, 4, 194, 0, 0, 4, 103, 195, 0, 4, 102, 0, 0, + 0, 0, 0, 0, 0, 4, 4, 196, 0, 0, 0, 0, 0, 0, 4, 32, + 4, 4, 4, 4, 32, 0, 0, 0, 4, 4, 4, 142, 0, 0, 0, 0, + 4, 142, 0, 0, 0, 0, 0, 0, 4, 32, 103, 0, 0, 0, 25, 155, + 4, 134, 49, 197, 91, 0, 0, 0, 4, 4, 198, 103, 170, 0, 0, 0, + 199, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 200, 201, 0, 0, 0, + 4, 4, 202, 4, 203, 204, 205, 4, 206, 207, 208, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 209, 210, 84, 202, 202, 131, 131, 211, 211, 212, 0, + 4, 4, 4, 4, 4, 4, 186, 0, 205, 213, 214, 215, 216, 217, 0, 0, + 0, 25, 218, 218, 107, 0, 0, 0, 4, 4, 4, 4, 4, 4, 134, 0, + 4, 92, 4, 4, 4, 4, 4, 4, 116, 0, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_alphabetic_stage_5[] = { 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 32, 0, 0, 0, - 0, 0, 223, 60, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, - 3, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, + 0, 0, 223, 188, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, + 3, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, 0, 0, 255, 7, 255, 255, 255, 254, 0, 192, 255, 255, 255, 255, 239, 31, 254, 225, 0, 156, 0, 0, 255, 255, 0, 224, 255, 255, 255, 255, 3, 0, 0, 252, 255, 255, 255, 7, 48, 4, 255, 255, 255, 252, 255, 31, 0, 0, - 255, 255, 255, 1, 253, 31, 0, 0, 240, 3, 255, 127, 255, 255, 255, 239, - 255, 223, 225, 255, 15, 0, 254, 254, 238, 159, 249, 255, 255, 253, 197, 227, + 255, 255, 255, 1, 255, 255, 7, 0, 240, 3, 255, 255, 255, 255, 255, 239, + 255, 223, 225, 255, 15, 0, 254, 255, 239, 159, 249, 255, 255, 253, 197, 227, 159, 89, 128, 176, 15, 0, 3, 0, 238, 135, 249, 255, 255, 253, 109, 195, 135, 25, 2, 94, 0, 0, 63, 0, 238, 191, 251, 255, 255, 253, 237, 227, - 191, 27, 1, 0, 15, 0, 0, 0, 159, 25, 192, 176, 15, 0, 2, 0, - 236, 199, 61, 214, 24, 199, 255, 195, 199, 29, 129, 0, 238, 223, 253, 255, - 255, 253, 239, 227, 223, 29, 96, 3, 236, 223, 253, 255, 223, 29, 96, 64, - 15, 0, 6, 0, 255, 255, 255, 231, 223, 93, 128, 0, 15, 0, 0, 252, - 236, 255, 127, 252, 255, 255, 251, 47, 127, 128, 95, 255, 0, 0, 12, 0, - 255, 255, 255, 7, 127, 32, 0, 0, 150, 37, 240, 254, 174, 236, 255, 59, - 95, 32, 0, 240, 1, 0, 0, 0, 255, 254, 255, 255, 255, 31, 254, 255, - 3, 255, 255, 254, 255, 255, 255, 31, 255, 255, 127, 249, 231, 193, 255, 255, - 127, 64, 0, 48, 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, - 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, - 255, 255, 255, 135, 255, 255, 0, 0, 255, 255, 31, 0, 255, 159, 255, 255, - 255, 199, 1, 0, 255, 223, 15, 0, 255, 255, 15, 0, 255, 223, 13, 0, - 255, 255, 207, 255, 255, 1, 128, 16, 255, 255, 255, 0, 255, 7, 255, 255, - 255, 255, 63, 0, 255, 15, 255, 1, 255, 63, 31, 0, 255, 15, 255, 255, - 255, 3, 0, 0, 255, 255, 255, 15, 255, 255, 255, 127, 254, 255, 31, 0, - 128, 0, 0, 0, 255, 255, 239, 255, 239, 15, 0, 0, 255, 243, 0, 252, - 191, 255, 3, 0, 0, 224, 0, 252, 255, 255, 255, 63, 0, 222, 111, 0, - 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, - 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, - 80, 189, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, 0, 0, 192, 255, - 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, 255, 255, 127, 0, - 127, 127, 127, 127, 0, 128, 0, 0, 224, 0, 0, 0, 254, 3, 62, 31, - 255, 255, 127, 224, 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, - 255, 31, 255, 255, 0, 12, 0, 0, 255, 127, 240, 143, 255, 255, 255, 128, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, - 0, 0, 0, 255, 187, 247, 255, 255, 0, 0, 252, 8, 255, 255, 7, 0, - 255, 255, 247, 255, 255, 63, 0, 0, 255, 255, 127, 4, 5, 0, 0, 56, - 255, 255, 60, 0, 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, + 191, 27, 1, 0, 15, 0, 0, 0, 238, 159, 249, 255, 159, 25, 192, 176, + 15, 0, 2, 0, 236, 199, 61, 214, 24, 199, 255, 195, 199, 29, 129, 0, + 239, 223, 253, 255, 255, 253, 255, 227, 223, 29, 96, 3, 238, 223, 253, 255, + 255, 253, 239, 227, 223, 29, 96, 64, 15, 0, 6, 0, 255, 255, 255, 231, + 223, 93, 128, 0, 15, 0, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, + 127, 128, 95, 255, 0, 0, 12, 0, 255, 255, 255, 7, 127, 32, 0, 0, + 150, 37, 240, 254, 174, 236, 255, 59, 95, 32, 0, 240, 1, 0, 0, 0, + 255, 254, 255, 255, 255, 31, 254, 255, 3, 255, 255, 254, 255, 255, 255, 31, + 255, 255, 127, 249, 231, 193, 255, 255, 127, 64, 0, 48, 191, 32, 255, 255, + 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 135, 255, 255, 0, 0, + 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 255, 1, 255, 223, 15, 0, + 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 207, 255, 255, 1, 128, 16, + 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, 255, 255, 255, 127, + 255, 15, 255, 1, 255, 63, 31, 0, 255, 15, 255, 255, 255, 3, 0, 0, + 255, 255, 255, 15, 254, 255, 31, 0, 128, 0, 0, 0, 255, 255, 239, 255, + 239, 15, 0, 0, 255, 243, 0, 252, 191, 255, 3, 0, 0, 224, 0, 252, + 255, 255, 255, 63, 0, 222, 111, 0, 128, 255, 31, 0, 255, 255, 63, 63, + 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, + 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, 80, 189, 255, 243, + 224, 67, 0, 0, 255, 1, 0, 0, 0, 0, 192, 255, 255, 127, 255, 255, + 31, 120, 12, 0, 255, 128, 0, 0, 255, 255, 127, 0, 127, 127, 127, 127, + 0, 128, 0, 0, 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 224, + 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, + 0, 12, 0, 0, 255, 127, 240, 143, 255, 255, 255, 191, 0, 0, 128, 255, + 252, 255, 255, 255, 255, 121, 255, 255, 255, 63, 3, 0, 187, 247, 255, 255, + 0, 0, 252, 8, 255, 255, 247, 255, 223, 255, 0, 124, 255, 63, 0, 0, + 255, 255, 127, 196, 5, 0, 0, 56, 255, 255, 60, 0, 126, 126, 126, 0, + 127, 127, 255, 255, 48, 0, 0, 0, 255, 7, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 15, 0, 0, 223, 255, 192, 255, 255, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, - 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, - 111, 240, 239, 254, 63, 0, 0, 0, 30, 0, 0, 0, 7, 0, 0, 0, - 31, 0, 255, 255, 3, 0, 0, 0, 255, 255, 223, 255, 255, 255, 255, 223, + 15, 255, 62, 0, 255, 0, 255, 255, 63, 253, 255, 255, 255, 255, 191, 145, + 255, 255, 255, 192, 111, 240, 239, 254, 31, 0, 0, 0, 63, 0, 0, 0, + 255, 255, 71, 0, 30, 0, 0, 4, 255, 255, 251, 255, 255, 255, 159, 0, + 159, 25, 128, 224, 179, 0, 0, 0, 255, 255, 63, 127, 17, 0, 0, 0, + 0, 0, 0, 128, 248, 255, 255, 224, 31, 0, 255, 255, 3, 0, 0, 0, + 255, 7, 255, 31, 255, 1, 255, 67, 255, 255, 223, 255, 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 247, 15, 0, 0, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, - 255, 251, 255, 15, 238, 251, 255, 15, + 255, 251, 255, 15, 238, 251, 255, 15, 255, 3, 255, 255, }; -/* Alphabetic: 1817 bytes. */ +/* Alphabetic: 2005 bytes. */ RE_UINT32 re_get_alphabetic(RE_UINT32 ch) { RE_UINT32 code; @@ -4927,34 +5306,37 @@ static RE_UINT8 re_lowercase_stage_1[] = { }; static RE_UINT8 re_lowercase_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, + 8, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_lowercase_stage_3[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 5, - 6, 3, 7, 3, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 10, 3, 11, - 3, 3, 12, 3, 3, 3, 3, 3, 3, 3, 13, 14, 3, 3, 3, 3, + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 7, 8, 9, 10, 11, 6, 6, 12, 6, 6, 6, + 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 6, 15, 16, + 6, 6, 6, 17, 6, 6, 6, 6, 6, 6, 6, 18, 6, 6, 6, 19, + 6, 6, 6, 6, 20, 6, 6, 6, 21, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 22, 23, 24, 25, }; static RE_UINT8 re_lowercase_stage_4[] = { 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 5, 13, 14, 15, 16, 17, 18, 19, 0, 0, 20, 21, 22, 23, 24, 25, 0, 26, 15, 5, 27, 5, 28, 5, 5, 29, 0, 30, 31, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 0, 0, 5, 5, 5, 5, 32, 5, 5, 5, 33, 34, 35, 36, 34, 37, 38, 39, 0, 0, 0, 40, 41, 0, 0, 0, 42, 43, 44, 26, 45, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 26, 46, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 26, 47, 48, 5, 5, 5, 49, 15, 50, 0, 0, 0, 0, 0, 0, - 0, 0, 5, 51, 52, 0, 0, 0, 0, 53, 5, 54, 55, 56, 0, 57, - 0, 0, 0, 0, 0, 0, 0, 0, 58, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 59, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 61, 62, 63, 31, 64, 65, 66, 67, 68, 69, 70, 71, 72, 61, 62, 73, - 31, 64, 74, 60, 67, 75, 76, 77, 78, 74, 79, 26, 80, 67, 81, 0, + 0, 0, 0, 0, 0, 0, 26, 46, 0, 26, 47, 48, 5, 5, 5, 49, + 15, 50, 0, 0, 0, 0, 0, 0, 0, 0, 5, 51, 52, 0, 0, 0, + 0, 53, 5, 54, 55, 56, 0, 57, 0, 26, 58, 59, 0, 0, 0, 0, + 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 61, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, + 63, 64, 65, 31, 66, 67, 68, 69, 70, 71, 72, 73, 74, 63, 64, 75, + 31, 66, 76, 62, 69, 77, 78, 79, 80, 76, 81, 26, 82, 69, 83, 0, }; static RE_UINT8 re_lowercase_stage_5[] = { @@ -4965,23 +5347,23 @@ static RE_UINT8 re_lowercase_stage_5[] = { 255, 255, 239, 255, 255, 255, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, 32, 0, 0, 0, 0, 0, 138, 60, 0, 0, 1, 0, 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 47, 25, 0, 0, 255, 255, 2, 168, 170, 170, - 84, 213, 170, 170, 170, 0, 0, 0, 254, 255, 255, 255, 255, 0, 0, 0, + 84, 213, 170, 170, 170, 170, 0, 0, 254, 255, 255, 255, 255, 0, 0, 0, 170, 170, 234, 191, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, 255, 0, 255, 63, 255, 0, 223, 64, 220, 0, 207, 0, 255, 0, 220, 0, 0, 0, 2, 128, 0, 0, 255, 31, 0, 196, 8, 0, 0, 128, 16, 50, 192, 67, 0, 0, 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, 98, 21, 218, 63, 26, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, - 170, 170, 170, 0, 168, 170, 171, 170, 170, 170, 255, 149, 170, 80, 10, 0, - 170, 2, 0, 0, 0, 0, 0, 7, 127, 0, 248, 0, 0, 255, 255, 255, - 255, 255, 0, 0, 0, 0, 0, 252, 255, 255, 15, 0, 0, 192, 223, 255, - 252, 255, 255, 15, 0, 0, 192, 235, 239, 255, 0, 0, 0, 252, 255, 255, - 15, 0, 0, 192, 255, 255, 255, 0, 0, 0, 252, 255, 255, 15, 0, 0, - 192, 255, 255, 255, 0, 192, 255, 255, 0, 0, 192, 255, 63, 0, 0, 0, - 252, 255, 255, 247, 3, 0, 0, 240, 255, 255, 223, 15, 255, 127, 63, 0, - 255, 253, 0, 0, 247, 11, 0, 0, + 170, 170, 170, 58, 168, 170, 171, 170, 170, 170, 255, 149, 170, 80, 186, 170, + 170, 2, 0, 0, 0, 0, 0, 7, 255, 255, 255, 247, 48, 0, 0, 0, + 127, 0, 248, 0, 0, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 252, + 255, 255, 15, 0, 0, 192, 223, 255, 252, 255, 255, 15, 0, 0, 192, 235, + 239, 255, 0, 0, 0, 252, 255, 255, 15, 0, 0, 192, 255, 255, 255, 0, + 0, 0, 252, 255, 255, 15, 0, 0, 192, 255, 255, 255, 0, 192, 255, 255, + 0, 0, 192, 255, 63, 0, 0, 0, 252, 255, 255, 247, 3, 0, 0, 240, + 255, 255, 223, 15, 255, 127, 63, 0, 255, 253, 0, 0, 247, 11, 0, 0, }; -/* Lowercase: 697 bytes. */ +/* Lowercase: 745 bytes. */ RE_UINT32 re_get_lowercase(RE_UINT32 ch) { RE_UINT32 code; @@ -4991,13 +5373,13 @@ RE_UINT32 re_get_lowercase(RE_UINT32 ch) { f = ch >> 16; code = ch ^ (f << 16); - pos = (RE_UINT32)re_lowercase_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; + pos = (RE_UINT32)re_lowercase_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; pos = (RE_UINT32)re_lowercase_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_lowercase_stage_3[pos + f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_lowercase_stage_3[pos + f] << 3; f = code >> 5; code ^= f << 5; pos = (RE_UINT32)re_lowercase_stage_4[pos + f] << 5; @@ -5015,57 +5397,60 @@ static RE_UINT8 re_uppercase_stage_1[] = { }; static RE_UINT8 re_uppercase_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, + 8, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 1, 1, 1, 11, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_uppercase_stage_3[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 5, - 6, 3, 7, 3, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 10, - 3, 3, 11, 3, 3, 3, 3, 3, 3, 3, 12, 13, 3, 3, 3, 3, + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, + 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 15, + 6, 6, 6, 6, 16, 6, 6, 6, 17, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 18, 19, 20, 21, 6, 22, 6, 6, 6, 6, 6, 6, }; static RE_UINT8 re_uppercase_stage_4[] = { 0, 0, 1, 0, 0, 0, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, 3, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, 18, 19, 0, 3, 20, 3, 21, 3, 3, 22, 23, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 18, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 24, 0, 3, 3, 3, 3, 25, 3, 3, 3, 26, 27, 28, 29, 0, 30, 31, 32, - 0, 0, 0, 0, 0, 0, 0, 0, 33, 34, 35, 19, 36, 0, 0, 0, - 0, 0, 0, 0, 0, 37, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 18, 38, 0, 39, 3, 3, 3, 40, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 3, 41, 42, 0, 0, 0, 0, 43, 3, 44, 45, 46, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 18, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 33, 34, 35, 19, 36, 0, 0, 0, 0, 0, 0, 0, 0, 37, 19, 0, + 18, 38, 0, 39, 3, 3, 3, 40, 0, 0, 3, 41, 42, 0, 0, 0, + 0, 43, 3, 44, 45, 46, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 18, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 48, 49, 50, 51, 61, 62, 54, 55, 51, 63, 64, 65, 66, 37, 38, 54, 67, 68, 0, + 0, 54, 69, 69, 55, 0, 0, 0, }; static RE_UINT8 re_uppercase_stage_5[] = { 0, 0, 0, 0, 254, 255, 255, 7, 255, 255, 127, 127, 85, 85, 85, 85, 85, 85, 85, 170, 170, 84, 85, 85, 85, 85, 85, 43, 214, 206, 219, 177, 213, 210, 174, 17, 144, 164, 170, 74, 85, 85, 210, 85, 85, 85, 5, 108, - 122, 85, 0, 0, 0, 0, 69, 0, 64, 215, 254, 255, 251, 15, 0, 0, + 122, 85, 0, 0, 0, 0, 69, 128, 64, 215, 254, 255, 251, 15, 0, 0, 0, 128, 28, 85, 85, 85, 144, 230, 255, 255, 255, 255, 255, 255, 0, 0, - 1, 84, 85, 85, 171, 42, 85, 85, 85, 0, 254, 255, 255, 255, 127, 0, + 1, 84, 85, 85, 171, 42, 85, 85, 85, 85, 254, 255, 255, 255, 127, 0, 191, 32, 0, 0, 85, 85, 21, 64, 0, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 170, 0, 255, 0, 0, 0, 0, 0, 15, 0, 15, 0, 15, 0, 31, 0, 15, 132, 56, 39, 62, 80, 61, 15, 192, 32, 0, 0, 0, 8, 0, 0, 0, 0, 0, 192, 255, 255, 127, 0, 0, 157, 234, 37, 192, - 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 0, 84, 85, 84, 85, - 85, 85, 0, 106, 85, 40, 5, 0, 85, 5, 0, 0, 255, 0, 0, 0, + 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 5, 84, 85, 84, 85, + 85, 85, 0, 106, 85, 40, 69, 85, 85, 61, 3, 0, 255, 0, 0, 0, 255, 255, 255, 3, 0, 0, 240, 255, 255, 63, 0, 0, 0, 255, 255, 255, 3, 0, 0, 208, 100, 222, 63, 0, 0, 0, 255, 255, 255, 3, 0, 0, 176, 231, 223, 31, 0, 0, 0, 123, 95, 252, 1, 0, 0, 240, 255, 255, 63, 0, 0, 0, 3, 0, 0, 240, 255, 255, 63, 0, 1, 0, 0, 0, 252, 255, 255, 7, 0, 0, 0, 240, 255, 255, 31, 0, 255, 1, 0, 0, - 0, 4, 0, 0, + 0, 4, 0, 0, 255, 3, 255, 255, }; -/* Uppercase: 629 bytes. */ +/* Uppercase: 673 bytes. */ RE_UINT32 re_get_uppercase(RE_UINT32 ch) { RE_UINT32 code; @@ -5075,13 +5460,13 @@ RE_UINT32 re_get_uppercase(RE_UINT32 ch) { f = ch >> 16; code = ch ^ (f << 16); - pos = (RE_UINT32)re_uppercase_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; + pos = (RE_UINT32)re_uppercase_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; pos = (RE_UINT32)re_uppercase_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_uppercase_stage_3[pos + f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_uppercase_stage_3[pos + f] << 3; f = code >> 5; code ^= f << 5; pos = (RE_UINT32)re_uppercase_stage_4[pos + f] << 5; @@ -5099,56 +5484,60 @@ static RE_UINT8 re_cased_stage_1[] = { }; static RE_UINT8 re_cased_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, + 9, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, 1, 1, 1, 12, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_cased_stage_3[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 5, 6, - 7, 3, 8, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 3, 12, - 3, 3, 13, 3, 3, 3, 3, 3, 3, 3, 14, 15, 3, 3, 3, 3, + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, + 11, 12, 6, 6, 13, 6, 6, 6, 6, 6, 6, 6, 14, 15, 6, 6, + 6, 6, 6, 6, 6, 6, 16, 17, 6, 6, 6, 18, 6, 6, 6, 6, + 6, 6, 6, 19, 6, 6, 6, 20, 6, 6, 6, 6, 21, 6, 6, 6, + 22, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 23, 24, 25, 26, + 6, 27, 6, 6, 6, 6, 6, 6, }; static RE_UINT8 re_cased_stage_4[] = { 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 5, 6, 4, 4, 4, 4, 4, 7, 8, 9, 10, 0, 0, 11, 12, 13, 14, 4, 15, 4, 4, 4, 4, 16, 4, 4, 4, 4, 17, 18, 19, 20, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 4, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, - 4, 4, 4, 4, 4, 4, 4, 4, 22, 4, 23, 24, 4, 25, 26, 27, - 0, 0, 0, 28, 29, 0, 0, 0, 30, 31, 32, 4, 33, 0, 0, 0, - 0, 0, 0, 0, 0, 34, 4, 35, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 21, 0, + 4, 4, 4, 4, 4, 4, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 22, 4, 23, 24, 4, 25, 26, 27, 0, 0, 0, 28, 29, 0, 0, 0, + 30, 31, 32, 4, 33, 0, 0, 0, 0, 0, 0, 0, 0, 34, 4, 35, 4, 36, 37, 4, 4, 4, 4, 38, 4, 21, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 39, 40, 0, 0, 0, 0, 41, 4, 4, 42, 43, 0, 44, - 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, - 4, 4, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 47, 4, 48, 49, 50, 4, 51, 52, 53, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 54, 55, 5, 47, 47, 36, 36, 56, 56, 57, 0, + 0, 0, 4, 39, 24, 0, 0, 0, 0, 40, 4, 4, 41, 42, 0, 43, + 0, 44, 5, 45, 0, 0, 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 0, 0, 0, 0, 0, 4, 4, 47, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 4, 0, 4, 4, 48, 4, 49, 50, 51, 4, + 52, 53, 54, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 55, 56, 5, + 48, 48, 36, 36, 57, 57, 58, 0, 0, 44, 59, 59, 35, 0, 0, 0, }; static RE_UINT8 re_cased_stage_5[] = { 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 255, 247, 240, 255, 255, 255, 255, 255, 239, 255, 255, 255, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, 32, 0, 0, 0, - 0, 0, 207, 60, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, - 3, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, + 0, 0, 207, 188, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, + 3, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, 255, 0, 0, 0, 191, 32, 0, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 255, 63, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, 80, 189, 31, 242, 224, 67, 0, 0, 24, 0, 0, 0, 0, 0, 192, 255, 255, 3, 0, 0, 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 63, 0, 0, - 255, 255, 255, 0, 252, 255, 255, 255, 255, 120, 15, 0, 255, 7, 0, 0, - 0, 0, 0, 7, 127, 0, 248, 0, 255, 255, 0, 0, 255, 255, 223, 255, - 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, - 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, - 255, 253, 255, 255, 247, 15, 0, 0, + 252, 255, 255, 255, 255, 120, 255, 255, 255, 63, 3, 0, 0, 0, 0, 7, + 0, 0, 255, 255, 48, 0, 0, 0, 127, 0, 248, 0, 255, 255, 0, 0, + 255, 255, 223, 255, 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, + 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, + 253, 255, 255, 247, 255, 253, 255, 255, 247, 15, 0, 0, 255, 3, 255, 255, }; -/* Cased: 617 bytes. */ +/* Cased: 681 bytes. */ RE_UINT32 re_get_cased(RE_UINT32 ch) { RE_UINT32 code; @@ -5158,13 +5547,13 @@ RE_UINT32 re_get_cased(RE_UINT32 ch) { f = ch >> 16; code = ch ^ (f << 16); - pos = (RE_UINT32)re_cased_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; + pos = (RE_UINT32)re_cased_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; pos = (RE_UINT32)re_cased_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_cased_stage_3[pos + f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_cased_stage_3[pos + f] << 3; f = code >> 5; code ^= f << 5; pos = (RE_UINT32)re_cased_stage_4[pos + f] << 5; @@ -5186,9 +5575,9 @@ static RE_UINT8 re_case_ignorable_stage_2[] = { 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 11, 12, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, 16, 7, 7, 17, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 16, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 18, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, }; static RE_UINT8 re_case_ignorable_stage_3[] = { @@ -5197,10 +5586,11 @@ static RE_UINT8 re_case_ignorable_stage_3[] = { 26, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 28, 29, 1, 30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 31, 1, 1, 1, 32, 1, 33, 34, 35, 36, 37, 38, 1, 1, 1, 1, - 1, 1, 1, 39, 1, 1, 40, 41, 1, 42, 1, 1, 1, 1, 1, 1, - 1, 1, 43, 1, 1, 1, 1, 1, 44, 45, 1, 1, 1, 1, 46, 1, - 1, 1, 1, 1, 1, 1, 1, 47, 1, 48, 49, 1, 1, 1, 1, 1, - 50, 51, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 39, 1, 1, 40, 41, 1, 42, 43, 44, 1, 1, 1, 1, + 1, 1, 45, 1, 1, 1, 1, 1, 46, 47, 48, 49, 50, 51, 52, 1, + 1, 1, 53, 54, 1, 1, 1, 55, 1, 1, 1, 1, 56, 1, 1, 1, + 1, 57, 58, 1, 1, 1, 1, 1, 59, 1, 1, 1, 1, 1, 1, 1, + 60, 61, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_case_ignorable_stage_4[] = { @@ -5210,7 +5600,7 @@ static RE_UINT8 re_case_ignorable_stage_4[] = { 15, 0, 16, 17, 0, 0, 18, 19, 20, 5, 21, 0, 0, 22, 0, 23, 24, 25, 26, 0, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 33, 37, 38, 36, 33, 39, 35, 32, 40, 41, 35, 42, 0, 43, 0, - 0, 44, 45, 35, 0, 40, 46, 35, 0, 0, 34, 35, 0, 0, 47, 0, + 3, 44, 45, 35, 32, 40, 46, 35, 32, 0, 34, 35, 0, 0, 47, 0, 0, 48, 49, 0, 0, 50, 51, 0, 52, 53, 0, 54, 55, 56, 57, 0, 0, 58, 59, 60, 61, 0, 0, 33, 0, 0, 62, 0, 0, 0, 0, 0, 63, 63, 64, 64, 0, 65, 66, 0, 67, 0, 68, 0, 0, 69, 0, 0, @@ -5222,24 +5612,29 @@ static RE_UINT8 re_case_ignorable_stage_4[] = { 101, 102, 0, 0, 103, 0, 0, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 106, 0, 0, 107, 108, 0, 0, 109, 6, 78, 0, 17, 110, 0, 0, 52, 111, 112, 0, 0, 0, 0, 113, 114, - 0, 115, 116, 0, 28, 117, 100, 0, 0, 118, 119, 17, 0, 120, 121, 122, - 0, 0, 0, 0, 0, 0, 0, 123, 2, 0, 0, 0, 0, 124, 78, 0, - 125, 126, 127, 0, 0, 0, 0, 108, 1, 2, 3, 17, 44, 0, 0, 128, - 0, 0, 0, 0, 0, 0, 0, 129, 130, 131, 0, 0, 0, 0, 0, 0, - 32, 132, 126, 0, 78, 133, 0, 0, 28, 134, 0, 0, 78, 135, 0, 0, - 0, 0, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 137, 0, 0, 0, - 0, 0, 0, 138, 139, 140, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, - 32, 6, 6, 6, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 142, + 0, 115, 116, 0, 28, 117, 100, 112, 0, 118, 119, 120, 0, 121, 122, 123, + 0, 0, 87, 0, 0, 0, 0, 124, 2, 0, 0, 0, 0, 125, 78, 0, + 126, 25, 127, 0, 0, 0, 0, 128, 1, 2, 3, 17, 44, 0, 0, 129, + 0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 0, 0, 0, 0, 3, + 0, 0, 0, 131, 0, 0, 0, 0, 132, 133, 0, 0, 0, 0, 0, 112, + 32, 134, 135, 128, 78, 136, 0, 0, 28, 137, 0, 138, 78, 139, 0, 0, + 0, 140, 0, 0, 0, 0, 128, 141, 32, 33, 3, 142, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 143, 144, 0, 0, 0, 0, 0, 0, 145, 3, 0, + 0, 146, 3, 0, 0, 147, 0, 0, 0, 0, 0, 0, 0, 0, 0, 148, + 0, 149, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, + 0, 0, 0, 0, 151, 75, 0, 0, 0, 0, 0, 152, 153, 154, 0, 0, + 0, 0, 155, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 149, 0, + 32, 6, 6, 6, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 156, }; static RE_UINT8 re_case_ignorable_stage_5[] = { 0, 0, 0, 0, 128, 64, 0, 4, 0, 0, 0, 64, 1, 0, 0, 0, 0, 161, 144, 1, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 48, 4, 176, 0, 0, 0, 248, 3, 0, 0, 0, 0, 0, 2, 0, 0, 254, 255, - 255, 255, 255, 191, 182, 0, 0, 0, 0, 0, 16, 0, 31, 0, 255, 23, + 255, 255, 255, 191, 182, 0, 0, 0, 0, 0, 16, 0, 63, 0, 255, 23, 1, 248, 255, 255, 0, 0, 1, 0, 0, 0, 192, 191, 255, 61, 0, 0, 0, 128, 2, 0, 255, 7, 0, 0, 192, 255, 1, 0, 0, 248, 63, 4, - 0, 0, 192, 255, 255, 63, 0, 0, 0, 0, 0, 14, 240, 255, 255, 127, + 0, 0, 192, 255, 255, 63, 0, 0, 0, 0, 0, 14, 240, 255, 255, 255, 7, 0, 0, 0, 0, 0, 0, 20, 254, 33, 254, 0, 12, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 16, 30, 32, 0, 0, 12, 0, 0, 0, 6, 0, 0, 0, 134, 57, 2, 0, 0, 0, 35, 0, 190, 33, 0, 0, @@ -5251,27 +5646,31 @@ static RE_UINT8 re_case_ignorable_stage_5[] = { 1, 0, 30, 0, 100, 32, 0, 32, 0, 0, 0, 224, 0, 0, 28, 0, 0, 0, 12, 0, 0, 0, 176, 63, 64, 254, 143, 32, 0, 120, 0, 0, 8, 0, 0, 0, 0, 2, 0, 0, 135, 1, 4, 14, 0, 0, 128, 9, - 0, 0, 64, 127, 229, 31, 248, 159, 128, 0, 0, 0, 15, 0, 0, 0, - 0, 0, 208, 23, 0, 248, 15, 0, 3, 0, 0, 0, 60, 11, 0, 0, + 0, 0, 64, 127, 229, 31, 248, 159, 128, 0, 255, 127, 15, 0, 0, 0, + 0, 0, 208, 23, 0, 248, 15, 0, 3, 0, 0, 0, 60, 59, 0, 0, 64, 163, 3, 0, 0, 240, 207, 0, 0, 0, 0, 63, 0, 0, 247, 255, - 253, 33, 16, 0, 0, 240, 255, 255, 255, 7, 0, 1, 0, 0, 0, 248, - 127, 0, 0, 240, 0, 0, 0, 160, 3, 224, 0, 224, 0, 224, 0, 96, + 253, 33, 16, 3, 0, 240, 255, 255, 255, 7, 0, 1, 0, 0, 0, 248, + 255, 255, 63, 240, 0, 0, 0, 160, 3, 224, 0, 224, 0, 224, 0, 96, 0, 248, 0, 3, 144, 124, 0, 0, 223, 255, 2, 128, 0, 0, 255, 31, 255, 255, 1, 0, 0, 0, 0, 48, 0, 128, 3, 0, 0, 128, 0, 128, 0, 128, 0, 0, 32, 0, 0, 0, 0, 60, 62, 8, 0, 0, 0, 126, 0, 0, 0, 112, 0, 0, 32, 0, 0, 16, 0, 0, 0, 128, 247, 191, - 0, 0, 0, 128, 0, 0, 3, 0, 0, 7, 0, 0, 68, 8, 0, 0, + 0, 0, 0, 176, 0, 0, 3, 0, 0, 7, 0, 0, 68, 8, 0, 0, 96, 0, 0, 0, 16, 0, 0, 0, 255, 255, 3, 0, 192, 63, 0, 0, 128, 255, 3, 0, 0, 0, 200, 19, 0, 126, 102, 0, 8, 16, 0, 0, - 0, 0, 157, 193, 2, 0, 0, 32, 0, 48, 88, 0, 32, 33, 0, 0, - 0, 0, 252, 255, 255, 255, 8, 0, 127, 0, 0, 0, 0, 0, 36, 0, - 8, 0, 0, 14, 0, 0, 0, 32, 110, 240, 0, 0, 0, 0, 0, 135, - 0, 0, 0, 255, 0, 0, 120, 38, 128, 239, 31, 0, 0, 0, 192, 127, - 0, 40, 191, 0, 0, 128, 255, 255, 128, 3, 248, 255, 231, 15, 0, 0, - 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 0, 0, + 0, 0, 1, 16, 0, 0, 157, 193, 2, 0, 0, 32, 0, 48, 88, 0, + 32, 33, 0, 0, 0, 0, 252, 255, 255, 255, 8, 0, 0, 0, 36, 0, + 0, 0, 0, 128, 8, 0, 0, 14, 0, 0, 0, 32, 0, 0, 192, 7, + 110, 240, 0, 0, 0, 0, 0, 135, 0, 0, 0, 255, 127, 0, 0, 0, + 0, 0, 120, 38, 128, 239, 31, 0, 0, 0, 8, 0, 0, 0, 192, 127, + 0, 128, 211, 0, 248, 7, 0, 0, 192, 31, 31, 0, 0, 0, 248, 133, + 13, 0, 0, 0, 0, 0, 60, 176, 0, 0, 248, 167, 0, 40, 191, 0, + 0, 0, 31, 0, 0, 0, 127, 0, 0, 128, 255, 255, 0, 0, 0, 96, + 128, 3, 248, 255, 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, + 255, 255, 0, 0, }; -/* Case_Ignorable: 1254 bytes. */ +/* Case_Ignorable: 1406 bytes. */ RE_UINT32 re_get_case_ignorable(RE_UINT32 ch) { RE_UINT32 code; @@ -5308,7 +5707,7 @@ static RE_UINT8 re_changes_when_lowercased_stage_1[] = { static RE_UINT8 re_changes_when_lowercased_stage_2[] = { 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, - 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 8, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; @@ -5317,7 +5716,7 @@ static RE_UINT8 re_changes_when_lowercased_stage_3[] = { 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 15, - 6, 6, 6, 6, 16, 6, 6, 6, + 6, 6, 6, 6, 16, 6, 6, 6, 17, 6, 6, 6, 6, 6, 6, 6, }; static RE_UINT8 re_changes_when_lowercased_stage_4[] = { @@ -5329,25 +5728,25 @@ static RE_UINT8 re_changes_when_lowercased_stage_4[] = { 0, 33, 0, 19, 34, 0, 0, 0, 0, 0, 0, 0, 0, 35, 19, 0, 18, 36, 0, 37, 3, 3, 3, 38, 0, 0, 3, 39, 40, 0, 0, 0, 0, 41, 3, 42, 43, 44, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 18, 45, 0, 0, 0, 0, 0, 0, + 18, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, }; static RE_UINT8 re_changes_when_lowercased_stage_5[] = { 0, 0, 0, 0, 254, 255, 255, 7, 255, 255, 127, 127, 85, 85, 85, 85, 85, 85, 85, 170, 170, 84, 85, 85, 85, 85, 85, 43, 214, 206, 219, 177, 213, 210, 174, 17, 176, 173, 170, 74, 85, 85, 214, 85, 85, 85, 5, 108, - 122, 85, 0, 0, 0, 0, 69, 0, 64, 215, 254, 255, 251, 15, 0, 0, + 122, 85, 0, 0, 0, 0, 69, 128, 64, 215, 254, 255, 251, 15, 0, 0, 0, 128, 0, 85, 85, 85, 144, 230, 255, 255, 255, 255, 255, 255, 0, 0, - 1, 84, 85, 85, 171, 42, 85, 85, 85, 0, 254, 255, 255, 255, 127, 0, + 1, 84, 85, 85, 171, 42, 85, 85, 85, 85, 254, 255, 255, 255, 127, 0, 191, 32, 0, 0, 85, 85, 21, 64, 0, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 170, 0, 255, 0, 0, 0, 255, 0, 31, 0, 31, 0, 15, 0, 31, 0, 31, 64, 12, 4, 0, 8, 0, 0, 0, 0, 0, 192, 255, 255, 127, 0, 0, 157, 234, 37, 192, 5, 40, 4, 0, 85, 21, 0, 0, - 85, 85, 85, 0, 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 5, 0, - 85, 5, 0, 0, 255, 0, 0, 0, + 85, 85, 85, 5, 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 69, 85, + 85, 61, 3, 0, 255, 0, 0, 0, }; -/* Changes_When_Lowercased: 490 bytes. */ +/* Changes_When_Lowercased: 506 bytes. */ RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch) { RE_UINT32 code; @@ -5384,7 +5783,7 @@ static RE_UINT8 re_changes_when_uppercased_stage_1[] = { static RE_UINT8 re_changes_when_uppercased_stage_2[] = { 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, - 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 7, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; @@ -5393,6 +5792,7 @@ static RE_UINT8 re_changes_when_uppercased_stage_3[] = { 6, 6, 6, 6, 6, 7, 8, 9, 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 13, 6, 6, 6, 6, 6, 6, 6, 6, 14, 15, 6, 6, 6, 16, 6, 6, 6, 17, 6, 6, 6, 6, 18, 6, 6, 6, + 19, 6, 6, 6, 6, 6, 6, 6, }; static RE_UINT8 re_changes_when_uppercased_stage_4[] = { @@ -5405,27 +5805,27 @@ static RE_UINT8 re_changes_when_uppercased_stage_4[] = { 0, 23, 42, 43, 5, 5, 5, 44, 24, 45, 0, 0, 0, 0, 0, 0, 0, 0, 5, 46, 47, 0, 0, 0, 0, 48, 5, 49, 50, 51, 0, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 53, 54, 0, 0, 0, 0, 0, + 0, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, }; static RE_UINT8 re_changes_when_uppercased_stage_5[] = { 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 0, 0, 0, 128, 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 84, 85, 171, 170, 170, 170, 170, 170, 212, 41, 17, 36, 70, 42, 33, 81, 162, 96, 91, 85, 181, - 170, 170, 45, 170, 168, 170, 10, 144, 133, 170, 223, 10, 105, 139, 38, 32, - 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, - 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 39, 9, 0, 0, 255, 255, - 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 0, 0, 0, + 170, 170, 45, 170, 168, 170, 10, 144, 133, 170, 223, 26, 107, 155, 38, 32, + 137, 31, 4, 64, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, + 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 47, 9, 0, 0, 255, 255, + 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 170, 0, 0, 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 34, 170, 170, 234, 15, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, 255, 0, 255, 63, 255, 255, 223, 80, 220, 16, 207, 0, 255, 0, 220, 16, 0, 64, 0, 0, 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, 98, 21, 72, 0, - 10, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 0, - 168, 170, 168, 170, 170, 170, 0, 148, 170, 16, 10, 0, 170, 2, 0, 0, + 10, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 10, + 168, 170, 168, 170, 170, 170, 0, 148, 170, 16, 138, 170, 170, 2, 0, 0, 127, 0, 248, 0, 0, 255, 255, 255, 255, 255, 0, 0, }; -/* Changes_When_Uppercased: 534 bytes. */ +/* Changes_When_Uppercased: 550 bytes. */ RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch) { RE_UINT32 code; @@ -5462,7 +5862,7 @@ static RE_UINT8 re_changes_when_titlecased_stage_1[] = { static RE_UINT8 re_changes_when_titlecased_stage_2[] = { 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, - 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 7, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; @@ -5471,6 +5871,7 @@ static RE_UINT8 re_changes_when_titlecased_stage_3[] = { 6, 6, 6, 6, 6, 7, 8, 9, 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 13, 6, 6, 6, 6, 6, 6, 6, 6, 14, 15, 6, 6, 6, 16, 6, 6, 6, 17, 6, 6, 6, 6, 18, 6, 6, 6, + 19, 6, 6, 6, 6, 6, 6, 6, }; static RE_UINT8 re_changes_when_titlecased_stage_4[] = { @@ -5483,27 +5884,27 @@ static RE_UINT8 re_changes_when_titlecased_stage_4[] = { 0, 23, 42, 43, 5, 5, 5, 44, 24, 45, 0, 0, 0, 0, 0, 0, 0, 0, 5, 46, 47, 0, 0, 0, 0, 48, 5, 49, 50, 51, 0, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 53, 54, 0, 0, 0, 0, 0, + 0, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, }; static RE_UINT8 re_changes_when_titlecased_stage_5[] = { 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 0, 0, 0, 128, 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 84, 85, 171, 170, 170, 170, 170, 170, 212, 41, 17, 36, 70, 42, 33, 81, 162, 208, 86, 85, 181, - 170, 170, 43, 170, 168, 170, 10, 144, 133, 170, 223, 10, 105, 139, 38, 32, - 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, - 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 39, 9, 0, 0, 255, 255, - 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 0, 0, 0, + 170, 170, 43, 170, 168, 170, 10, 144, 133, 170, 223, 26, 107, 155, 38, 32, + 137, 31, 4, 64, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, + 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 47, 9, 0, 0, 255, 255, + 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 170, 0, 0, 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 34, 170, 170, 234, 15, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, 255, 0, 255, 63, 255, 0, 223, 64, 220, 0, 207, 0, 255, 0, 220, 0, 0, 64, 0, 0, 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, 98, 21, 72, 0, - 10, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 0, - 168, 170, 168, 170, 170, 170, 0, 148, 170, 16, 10, 0, 170, 2, 0, 0, + 10, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 10, + 168, 170, 168, 170, 170, 170, 0, 148, 170, 16, 138, 170, 170, 2, 0, 0, 127, 0, 248, 0, 0, 255, 255, 255, 255, 255, 0, 0, }; -/* Changes_When_Titlecased: 534 bytes. */ +/* Changes_When_Titlecased: 550 bytes. */ RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch) { RE_UINT32 code; @@ -5540,7 +5941,7 @@ static RE_UINT8 re_changes_when_casefolded_stage_1[] = { static RE_UINT8 re_changes_when_casefolded_stage_2[] = { 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, - 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 8, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; @@ -5549,7 +5950,7 @@ static RE_UINT8 re_changes_when_casefolded_stage_3[] = { 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 15, 6, 6, 6, 16, - 6, 6, 6, 6, 17, 6, 6, 6, + 6, 6, 6, 6, 17, 6, 6, 6, 18, 6, 6, 6, 6, 6, 6, 6, }; static RE_UINT8 re_changes_when_casefolded_stage_4[] = { @@ -5562,25 +5963,26 @@ static RE_UINT8 re_changes_when_casefolded_stage_4[] = { 20, 39, 0, 40, 4, 4, 4, 41, 0, 0, 4, 42, 43, 0, 0, 0, 0, 44, 4, 45, 46, 47, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 20, 49, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 20, 0, 0, }; static RE_UINT8 re_changes_when_casefolded_stage_5[] = { 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 255, 255, 127, 255, 85, 85, 85, 85, 85, 85, 85, 170, 170, 86, 85, 85, 85, 85, 85, 171, 214, 206, 219, 177, 213, 210, 174, 17, 176, 173, 170, 74, 85, 85, 214, 85, - 85, 85, 5, 108, 122, 85, 0, 0, 32, 0, 0, 0, 0, 0, 69, 0, + 85, 85, 5, 108, 122, 85, 0, 0, 32, 0, 0, 0, 0, 0, 69, 128, 64, 215, 254, 255, 251, 15, 0, 0, 4, 128, 99, 85, 85, 85, 179, 230, 255, 255, 255, 255, 255, 255, 0, 0, 1, 84, 85, 85, 171, 42, 85, 85, - 85, 0, 254, 255, 255, 255, 127, 0, 128, 0, 0, 0, 191, 32, 0, 0, + 85, 85, 254, 255, 255, 255, 127, 0, 128, 0, 0, 0, 191, 32, 0, 0, 85, 85, 21, 76, 0, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 170, 0, 255, 0, 0, 255, 255, 156, 31, 156, 31, 0, 15, 0, 31, 156, 31, 64, 12, 4, 0, 8, 0, 0, 0, 0, 0, 192, 255, 255, 127, 0, 0, - 157, 234, 37, 192, 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 0, - 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 5, 0, 85, 5, 0, 0, + 157, 234, 37, 192, 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 5, + 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 69, 85, 85, 61, 3, 0, 127, 0, 248, 0, 255, 0, 0, 0, }; -/* Changes_When_Casefolded: 514 bytes. */ +/* Changes_When_Casefolded: 530 bytes. */ RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch) { RE_UINT32 code; @@ -5617,7 +6019,7 @@ static RE_UINT8 re_changes_when_casemapped_stage_1[] = { static RE_UINT8 re_changes_when_casemapped_stage_2[] = { 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, - 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 8, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; @@ -5626,7 +6028,7 @@ static RE_UINT8 re_changes_when_casemapped_stage_3[] = { 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, 6, 11, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, 6, 17, 6, 6, 6, 18, - 6, 6, 6, 6, 19, 6, 6, 6, + 6, 6, 6, 6, 19, 6, 6, 6, 20, 6, 6, 6, 6, 6, 6, 6, }; static RE_UINT8 re_changes_when_casemapped_stage_4[] = { @@ -5640,25 +6042,26 @@ static RE_UINT8 re_changes_when_casemapped_stage_4[] = { 4, 24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 42, 43, 0, 0, 0, 0, 44, 4, 45, 46, 47, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 4, 4, 49, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 4, 0, }; static RE_UINT8 re_changes_when_casemapped_stage_5[] = { 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 255, 254, 255, 223, 255, 247, 255, 243, 255, 179, - 240, 255, 255, 255, 253, 255, 15, 252, 255, 255, 223, 10, 105, 139, 38, 32, - 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 207, 56, 64, 215, 255, 255, - 251, 255, 255, 255, 255, 255, 227, 255, 255, 255, 183, 239, 3, 252, 255, 255, - 255, 0, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, 255, 0, 0, 0, + 240, 255, 255, 255, 253, 255, 15, 252, 255, 255, 223, 26, 107, 155, 38, 32, + 137, 31, 4, 64, 32, 0, 0, 0, 0, 0, 207, 184, 64, 215, 255, 255, + 251, 255, 255, 255, 255, 255, 227, 255, 255, 255, 191, 239, 3, 252, 255, 255, + 255, 255, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, 255, 0, 0, 0, 191, 32, 0, 0, 0, 0, 0, 34, 255, 255, 255, 79, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 255, 63, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, 64, 12, 4, 0, 0, 64, 0, 0, 24, 0, 0, 0, 0, 0, 192, 255, 255, 3, 0, 0, 255, 127, 255, 255, 255, 255, 255, 127, - 255, 255, 109, 192, 15, 120, 12, 0, 255, 63, 0, 0, 255, 255, 255, 0, - 252, 255, 252, 255, 255, 255, 0, 254, 255, 56, 15, 0, 255, 7, 0, 0, + 255, 255, 109, 192, 15, 120, 12, 0, 255, 63, 0, 0, 255, 255, 255, 15, + 252, 255, 252, 255, 255, 255, 0, 254, 255, 56, 207, 255, 255, 63, 3, 0, 127, 0, 248, 0, 255, 255, 0, 0, }; -/* Changes_When_Casemapped: 530 bytes. */ +/* Changes_When_Casemapped: 546 bytes. */ RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch) { RE_UINT32 code; @@ -5694,10 +6097,10 @@ static RE_UINT8 re_id_start_stage_1[] = { static RE_UINT8 re_id_start_stage_2[] = { 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 15, 16, 17, 18, 19, 13, 20, 13, 13, 13, 13, 13, 13, 21, 13, 13, + 13, 13, 13, 13, 13, 13, 22, 23, 13, 13, 24, 13, 13, 25, 13, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 7, 7, 7, 7, 26, 7, 27, 28, 13, 13, 13, 13, 13, 13, 13, 29, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, }; @@ -5710,13 +6113,14 @@ static RE_UINT8 re_id_start_stage_3[] = { 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 31, 31, 31, - 56, 57, 58, 59, 60, 31, 31, 31, 61, 62, 31, 31, 31, 31, 63, 31, - 1, 1, 1, 64, 65, 31, 31, 31, 1, 1, 1, 1, 66, 31, 31, 31, - 1, 1, 67, 31, 31, 31, 31, 68, 69, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 70, 71, 72, 73, 31, 31, 31, 31, 31, 31, 74, 31, - 1, 1, 1, 1, 1, 1, 75, 1, 1, 1, 1, 1, 1, 1, 1, 76, - 77, 31, 31, 31, 31, 31, 31, 31, 1, 1, 77, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 56, 1, 57, + 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 65, 66, 67, 68, 69, 31, + 70, 31, 71, 31, 31, 31, 31, 31, 1, 1, 1, 72, 73, 31, 31, 31, + 1, 1, 1, 1, 74, 31, 31, 31, 1, 1, 75, 76, 31, 31, 31, 77, + 78, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 79, 31, 31, 31, + 31, 31, 31, 31, 80, 81, 82, 83, 84, 31, 31, 31, 31, 31, 85, 31, + 1, 1, 1, 1, 1, 1, 86, 1, 1, 1, 1, 1, 1, 1, 1, 87, + 88, 31, 31, 31, 31, 31, 31, 31, 1, 1, 88, 31, 31, 31, 31, 31, }; static RE_UINT8 re_id_start_stage_4[] = { @@ -5725,94 +6129,104 @@ static RE_UINT8 re_id_start_stage_4[] = { 4, 4, 4, 4, 11, 4, 4, 4, 4, 12, 13, 14, 15, 0, 16, 17, 0, 4, 18, 19, 4, 4, 20, 21, 22, 23, 24, 4, 4, 25, 26, 27, 28, 29, 30, 0, 0, 31, 0, 0, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 36, 45, 48, 49, 50, 51, 46, 0, - 52, 53, 54, 47, 52, 53, 55, 56, 52, 57, 58, 59, 60, 61, 62, 0, - 14, 63, 62, 0, 64, 65, 66, 0, 67, 0, 68, 69, 70, 0, 0, 0, - 4, 71, 72, 73, 74, 4, 75, 76, 4, 4, 77, 4, 78, 79, 80, 4, - 81, 4, 82, 0, 23, 4, 4, 83, 14, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 84, 1, 4, 4, 85, 86, 87, 87, 88, 4, 89, 90, 0, - 0, 4, 4, 91, 4, 92, 4, 93, 94, 0, 16, 95, 4, 96, 97, 0, - 98, 4, 83, 0, 0, 99, 0, 0, 100, 89, 101, 0, 102, 103, 4, 104, - 4, 105, 106, 107, 0, 0, 0, 108, 4, 4, 4, 4, 4, 4, 0, 0, - 109, 4, 110, 107, 4, 111, 112, 113, 0, 0, 0, 114, 115, 0, 0, 0, - 116, 117, 118, 4, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 120, 121, 4, 4, 4, 4, 122, 4, 75, 4, 123, 98, 124, 124, 0, - 125, 126, 14, 4, 127, 14, 4, 76, 100, 128, 4, 4, 129, 82, 0, 16, - 4, 4, 4, 4, 4, 93, 0, 0, 4, 4, 4, 4, 4, 4, 69, 0, - 4, 4, 4, 4, 69, 0, 16, 107, 130, 131, 4, 132, 91, 4, 4, 23, - 133, 134, 4, 4, 135, 18, 0, 136, 137, 138, 4, 89, 134, 89, 0, 139, - 26, 140, 62, 94, 32, 141, 142, 0, 4, 119, 143, 144, 4, 145, 146, 147, - 148, 149, 0, 0, 0, 0, 4, 138, 4, 4, 4, 4, 4, 150, 151, 152, - 4, 4, 4, 153, 4, 4, 154, 0, 155, 156, 157, 4, 4, 87, 158, 4, - 4, 107, 16, 4, 159, 4, 15, 160, 0, 0, 0, 161, 4, 4, 4, 94, - 0, 1, 1, 162, 4, 121, 163, 0, 164, 165, 166, 0, 4, 4, 4, 82, - 0, 0, 4, 83, 0, 0, 0, 0, 0, 0, 0, 0, 94, 4, 167, 0, - 121, 16, 18, 0, 107, 4, 168, 0, 4, 4, 4, 4, 107, 0, 0, 0, - 169, 170, 93, 0, 0, 0, 0, 0, 93, 154, 0, 0, 4, 171, 0, 0, - 172, 89, 0, 94, 0, 0, 0, 0, 4, 93, 93, 141, 0, 0, 0, 0, - 4, 4, 119, 0, 0, 0, 0, 0, 102, 91, 0, 0, 102, 23, 16, 119, - 102, 62, 0, 0, 102, 141, 173, 0, 0, 0, 0, 0, 4, 18, 0, 0, - 4, 4, 4, 129, 0, 0, 0, 0, 4, 4, 4, 138, 0, 0, 0, 0, - 4, 129, 0, 0, 0, 0, 0, 0, 4, 30, 0, 0, 0, 0, 0, 0, - 4, 4, 174, 0, 158, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 175, 4, 176, 177, 178, 4, 179, 180, 181, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 182, 183, 76, 175, 175, 120, 120, 184, 184, 143, 0, - 178, 185, 186, 187, 188, 189, 0, 0, 4, 4, 4, 4, 4, 4, 98, 0, - 4, 83, 4, 4, 4, 4, 4, 4, 107, 0, 0, 0, 0, 0, 0, 0, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 45, 49, 50, 51, 52, 46, 0, + 53, 54, 55, 47, 53, 56, 57, 58, 53, 59, 60, 61, 62, 63, 64, 0, + 14, 65, 64, 0, 66, 67, 68, 0, 69, 0, 70, 71, 72, 0, 0, 0, + 4, 73, 74, 75, 76, 4, 77, 78, 4, 4, 79, 4, 80, 81, 82, 4, + 83, 4, 84, 0, 23, 4, 4, 85, 14, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 86, 1, 4, 4, 87, 88, 89, 89, 90, 4, 91, 92, 0, + 0, 4, 4, 93, 4, 94, 4, 95, 96, 0, 16, 97, 4, 98, 99, 0, + 100, 4, 85, 0, 0, 101, 0, 0, 102, 91, 103, 0, 104, 105, 4, 106, + 4, 107, 108, 109, 0, 0, 0, 110, 4, 4, 4, 4, 4, 4, 0, 0, + 111, 4, 112, 109, 4, 113, 114, 115, 0, 0, 0, 116, 117, 0, 0, 0, + 118, 119, 120, 4, 121, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 122, 96, 4, 4, 4, 4, 123, 4, 77, 4, 124, 100, 125, 125, 0, + 126, 127, 14, 4, 128, 14, 4, 78, 102, 129, 4, 4, 130, 84, 0, 16, + 4, 4, 4, 4, 4, 95, 0, 0, 4, 4, 4, 4, 4, 4, 71, 0, + 4, 4, 4, 4, 71, 0, 16, 109, 131, 132, 4, 133, 109, 4, 4, 23, + 134, 135, 4, 4, 136, 137, 0, 134, 138, 139, 4, 91, 135, 91, 0, 140, + 26, 141, 64, 142, 32, 31, 143, 144, 4, 121, 145, 146, 4, 147, 148, 149, + 150, 151, 78, 152, 0, 0, 4, 139, 4, 4, 4, 4, 4, 153, 154, 155, + 4, 4, 4, 156, 4, 4, 157, 0, 158, 159, 160, 4, 4, 89, 161, 4, + 4, 109, 16, 4, 162, 4, 15, 163, 0, 0, 0, 164, 4, 4, 4, 142, + 0, 1, 1, 165, 4, 96, 166, 0, 167, 168, 169, 0, 4, 4, 4, 84, + 0, 0, 4, 85, 0, 0, 0, 0, 0, 0, 0, 0, 142, 4, 170, 0, + 4, 16, 171, 95, 109, 4, 172, 0, 4, 4, 4, 4, 109, 0, 0, 0, + 4, 173, 4, 107, 0, 0, 0, 0, 4, 100, 95, 15, 0, 0, 0, 0, + 174, 175, 95, 100, 96, 0, 0, 0, 95, 157, 0, 0, 4, 176, 0, 0, + 177, 91, 0, 142, 142, 0, 70, 178, 4, 95, 95, 31, 89, 0, 0, 0, + 4, 4, 121, 0, 0, 0, 0, 0, 104, 93, 0, 0, 104, 23, 16, 121, + 104, 64, 16, 179, 104, 31, 180, 0, 181, 98, 0, 0, 0, 16, 96, 0, + 48, 45, 182, 47, 0, 0, 0, 0, 0, 0, 0, 0, 4, 23, 183, 0, + 0, 0, 0, 0, 4, 130, 0, 0, 4, 23, 184, 0, 4, 18, 0, 0, + 0, 0, 0, 0, 0, 4, 4, 185, 0, 0, 0, 0, 0, 0, 4, 30, + 4, 4, 4, 4, 30, 0, 0, 0, 4, 4, 4, 130, 0, 0, 0, 0, + 4, 130, 0, 0, 0, 0, 0, 0, 4, 30, 96, 0, 0, 0, 16, 186, + 4, 23, 107, 187, 23, 0, 0, 0, 4, 4, 188, 0, 161, 0, 0, 0, + 47, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 189, 190, 0, 0, 0, + 4, 4, 191, 4, 192, 193, 194, 4, 195, 196, 197, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 198, 199, 78, 191, 191, 122, 122, 200, 200, 145, 0, + 4, 4, 4, 4, 4, 4, 178, 0, 194, 201, 202, 203, 204, 205, 0, 0, + 4, 4, 4, 4, 4, 4, 100, 0, 4, 85, 4, 4, 4, 4, 4, 4, + 109, 0, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_id_start_stage_5[] = { 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, - 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 60, + 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 188, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, 3, 252, 255, 255, - 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, + 255, 255, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 255, 255, 255, 7, 7, 0, 255, 7, 0, 0, 0, 192, 254, 255, 255, 255, 47, 0, 96, 192, 0, 156, 0, 0, 253, 255, 255, 255, 0, 0, 0, 224, 255, 255, 63, 0, 2, 0, 0, 252, 255, 255, 255, 7, 48, 4, - 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 253, 31, 0, 0, - 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 254, - 224, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, + 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 255, 255, 7, 0, + 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 255, + 225, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, 224, 135, 249, 255, 255, 253, 109, 3, 0, 0, 0, 94, 0, 0, 28, 0, 224, 191, 251, 255, 255, 253, 237, 35, 0, 0, 1, 0, 3, 0, 0, 0, - 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, 24, 199, 255, 3, - 224, 223, 253, 255, 255, 253, 239, 35, 0, 0, 0, 3, 0, 0, 0, 64, - 3, 0, 6, 0, 255, 255, 255, 39, 0, 64, 0, 0, 3, 0, 0, 252, - 224, 255, 127, 252, 255, 255, 251, 47, 127, 0, 0, 0, 255, 255, 13, 0, - 150, 37, 240, 254, 174, 236, 13, 32, 95, 0, 0, 240, 1, 0, 0, 0, - 255, 254, 255, 255, 255, 31, 0, 0, 0, 31, 0, 0, 255, 7, 0, 128, - 0, 0, 63, 60, 98, 192, 225, 255, 3, 64, 0, 0, 191, 32, 255, 255, - 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 31, 0, - 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 3, 0, 255, 255, 3, 0, - 255, 223, 1, 0, 255, 255, 15, 0, 0, 0, 128, 16, 255, 255, 255, 0, - 255, 5, 255, 255, 255, 255, 63, 0, 255, 255, 255, 31, 255, 63, 31, 0, - 255, 15, 0, 0, 254, 0, 0, 0, 255, 255, 127, 0, 128, 0, 0, 0, - 224, 255, 255, 255, 224, 15, 0, 0, 248, 255, 255, 255, 1, 192, 0, 252, - 63, 0, 0, 0, 15, 0, 0, 0, 0, 224, 0, 252, 255, 255, 255, 63, - 0, 222, 99, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, - 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, - 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, - 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 128, 0, 0, - 127, 127, 127, 127, 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 248, - 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, - 255, 127, 0, 128, 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, - 0, 0, 0, 255, 187, 247, 255, 255, 7, 0, 0, 0, 0, 0, 252, 8, - 63, 0, 255, 255, 255, 255, 7, 0, 0, 128, 0, 0, 247, 15, 0, 0, - 255, 255, 127, 4, 255, 255, 98, 62, 5, 0, 0, 56, 255, 7, 28, 0, - 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, - 255, 255, 255, 15, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 160, - 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, - 0, 0, 255, 15, 0, 0, 223, 255, 192, 255, 255, 255, 252, 252, 252, 28, - 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, - 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, - 1, 0, 239, 254, 30, 0, 0, 0, 31, 0, 1, 0, 255, 255, 223, 255, + 224, 159, 249, 255, 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, + 24, 199, 255, 3, 224, 223, 253, 255, 255, 253, 255, 35, 0, 0, 0, 3, + 255, 253, 239, 35, 0, 0, 0, 64, 3, 0, 6, 0, 255, 255, 255, 39, + 0, 64, 0, 0, 3, 0, 0, 252, 224, 255, 127, 252, 255, 255, 251, 47, + 127, 0, 0, 0, 255, 255, 13, 0, 150, 37, 240, 254, 174, 236, 13, 32, + 95, 0, 0, 240, 1, 0, 0, 0, 255, 254, 255, 255, 255, 31, 0, 0, + 0, 31, 0, 0, 255, 7, 0, 128, 0, 0, 63, 60, 98, 192, 225, 255, + 3, 64, 0, 0, 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, + 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, + 255, 255, 255, 7, 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 255, 1, + 255, 223, 3, 0, 255, 255, 3, 0, 255, 223, 1, 0, 255, 255, 15, 0, + 0, 0, 128, 16, 255, 255, 255, 0, 255, 5, 255, 255, 255, 255, 63, 0, + 255, 255, 255, 127, 255, 63, 31, 0, 255, 15, 0, 0, 254, 0, 0, 0, + 255, 255, 127, 0, 128, 0, 0, 0, 224, 255, 255, 255, 224, 15, 0, 0, + 248, 255, 255, 255, 1, 192, 0, 252, 63, 0, 0, 0, 15, 0, 0, 0, + 0, 224, 0, 252, 255, 255, 255, 63, 0, 222, 99, 0, 255, 255, 63, 63, + 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, + 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 63, 80, 253, 255, 243, + 224, 67, 0, 0, 255, 1, 0, 0, 255, 127, 255, 255, 31, 120, 12, 0, + 255, 128, 0, 0, 127, 127, 127, 127, 224, 0, 0, 0, 254, 3, 62, 31, + 255, 255, 127, 248, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, + 0, 12, 0, 0, 255, 127, 0, 128, 0, 0, 128, 255, 252, 255, 255, 255, + 255, 121, 255, 255, 255, 63, 3, 0, 187, 247, 255, 255, 7, 0, 0, 0, + 0, 0, 252, 8, 63, 0, 255, 255, 255, 255, 255, 31, 0, 128, 0, 0, + 223, 255, 0, 124, 247, 15, 0, 0, 255, 255, 127, 196, 255, 255, 98, 62, + 5, 0, 0, 56, 255, 7, 28, 0, 126, 126, 126, 0, 127, 127, 255, 255, + 48, 0, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 15, + 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 160, 255, 253, 127, 95, + 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 15, + 0, 0, 223, 255, 192, 255, 255, 255, 252, 252, 252, 28, 255, 239, 255, 255, + 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, 255, 7, 255, 255, + 15, 255, 62, 0, 255, 0, 255, 255, 63, 253, 255, 255, 255, 255, 191, 145, + 255, 255, 255, 192, 1, 0, 239, 254, 31, 0, 0, 0, 255, 255, 71, 0, + 30, 0, 0, 4, 255, 255, 251, 255, 0, 0, 0, 224, 176, 0, 0, 0, + 16, 0, 0, 0, 0, 0, 0, 128, 255, 63, 0, 0, 248, 255, 255, 224, + 31, 0, 1, 0, 255, 7, 255, 31, 255, 1, 255, 3, 255, 255, 223, 255, 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, }; -/* ID_Start: 1753 bytes. */ +/* ID_Start: 1921 bytes. */ RE_UINT32 re_get_id_start(RE_UINT32 ch) { RE_UINT32 code; @@ -5849,12 +6263,12 @@ static RE_UINT8 re_id_continue_stage_1[] = { static RE_UINT8 re_id_continue_stage_2[] = { 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 15, 16, 17, 18, 19, 13, 20, 13, 13, 13, 13, 13, 13, 21, 13, 13, + 13, 13, 13, 13, 13, 13, 22, 23, 13, 13, 24, 13, 13, 25, 13, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 7, 7, 7, 7, 26, 7, 27, 28, 13, 13, 13, 13, 13, 13, 13, 29, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 28, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 30, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, }; static RE_UINT8 re_id_continue_stage_3[] = { @@ -5865,118 +6279,128 @@ static RE_UINT8 re_id_continue_stage_3[] = { 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 31, 31, 31, - 56, 57, 58, 59, 60, 31, 31, 31, 61, 62, 31, 31, 31, 31, 63, 31, - 1, 1, 1, 64, 65, 31, 31, 31, 1, 1, 1, 1, 66, 31, 31, 31, - 1, 1, 67, 31, 31, 31, 31, 68, 69, 31, 31, 31, 31, 31, 31, 31, - 31, 70, 71, 31, 72, 73, 74, 75, 31, 31, 31, 31, 31, 31, 76, 31, - 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, 1, 1, 1, 1, 78, - 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, 31, 31, 31, 31, - 31, 80, 31, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 56, 1, 57, + 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 65, 66, 67, 68, 69, 31, + 70, 31, 71, 31, 31, 31, 31, 31, 1, 1, 1, 72, 73, 31, 31, 31, + 1, 1, 1, 1, 74, 31, 31, 31, 1, 1, 75, 76, 31, 31, 31, 77, + 78, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 79, 31, 31, 31, + 31, 80, 81, 31, 82, 83, 84, 85, 86, 31, 31, 31, 31, 31, 87, 31, + 1, 1, 1, 1, 1, 1, 88, 1, 1, 1, 1, 1, 1, 1, 1, 89, + 90, 31, 31, 31, 31, 31, 31, 31, 1, 1, 90, 31, 31, 31, 31, 31, + 31, 91, 31, 31, 31, 31, 31, 31, }; static RE_UINT8 re_id_continue_stage_4[] = { 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, - 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 14, 17, 18, 19, - 20, 6, 6, 21, 6, 6, 22, 23, 24, 6, 25, 6, 6, 26, 6, 27, - 6, 28, 29, 0, 0, 30, 0, 31, 6, 6, 6, 32, 33, 34, 35, 36, - 37, 38, 39, 40, 41, 42, 43, 44, 33, 42, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 44, 54, 55, 56, 57, 54, 58, 59, 60, 61, 62, 63, 64, - 16, 65, 66, 0, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 0, - 6, 6, 77, 6, 78, 6, 79, 80, 6, 6, 81, 6, 82, 83, 84, 6, - 85, 6, 58, 86, 87, 6, 6, 88, 16, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 89, 3, 6, 6, 90, 91, 88, 92, 93, 6, 6, 94, 95, - 96, 6, 6, 97, 6, 98, 6, 99, 75, 100, 101, 102, 6, 103, 104, 0, - 29, 6, 105, 106, 107, 108, 0, 0, 6, 6, 109, 110, 6, 6, 6, 92, - 6, 97, 111, 78, 0, 0, 112, 113, 6, 6, 6, 6, 6, 6, 6, 114, - 115, 6, 116, 78, 6, 117, 118, 119, 0, 120, 121, 122, 123, 0, 123, 124, - 125, 126, 127, 6, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 129, 105, 6, 6, 6, 6, 130, 6, 79, 6, 131, 113, 132, 132, 6, - 133, 134, 16, 6, 135, 16, 6, 80, 136, 137, 6, 6, 138, 65, 0, 24, - 6, 6, 6, 6, 6, 99, 0, 0, 6, 6, 6, 6, 6, 6, 139, 0, - 6, 6, 6, 6, 139, 0, 24, 78, 140, 141, 6, 142, 143, 6, 6, 26, - 144, 145, 6, 6, 146, 147, 0, 148, 6, 149, 6, 92, 6, 6, 150, 151, - 6, 152, 92, 75, 6, 6, 153, 0, 6, 113, 154, 155, 6, 6, 156, 157, - 158, 159, 0, 0, 0, 0, 6, 160, 6, 6, 6, 6, 6, 161, 162, 29, - 6, 6, 6, 152, 6, 6, 163, 0, 164, 165, 166, 6, 6, 26, 167, 6, - 6, 78, 24, 6, 168, 6, 149, 169, 87, 170, 171, 172, 6, 6, 6, 75, - 1, 2, 3, 101, 6, 105, 173, 0, 174, 175, 176, 0, 6, 6, 6, 65, - 0, 0, 6, 88, 0, 0, 0, 177, 0, 0, 0, 0, 75, 6, 178, 0, - 105, 24, 147, 0, 78, 6, 179, 0, 6, 6, 6, 6, 78, 95, 0, 0, - 180, 181, 99, 0, 0, 0, 0, 0, 99, 163, 0, 0, 6, 182, 0, 0, - 183, 184, 0, 75, 0, 0, 0, 0, 6, 99, 99, 185, 0, 0, 0, 0, - 6, 6, 128, 0, 0, 0, 0, 0, 6, 6, 186, 50, 6, 65, 24, 187, - 6, 188, 0, 0, 6, 6, 150, 0, 0, 0, 0, 0, 6, 97, 95, 0, - 6, 6, 6, 138, 0, 0, 0, 0, 6, 6, 6, 189, 0, 0, 0, 0, - 6, 138, 0, 0, 0, 0, 0, 0, 6, 190, 0, 0, 0, 0, 0, 0, - 6, 6, 191, 105, 192, 0, 0, 0, 193, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 194, 195, 196, 0, 0, 0, 0, 197, 0, 0, 0, 0, 0, - 6, 6, 188, 6, 198, 199, 200, 6, 201, 202, 203, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 204, 205, 80, 188, 188, 129, 129, 206, 206, 207, 6, - 200, 208, 209, 210, 211, 212, 0, 0, 6, 6, 6, 6, 6, 6, 113, 0, - 6, 88, 6, 6, 6, 6, 6, 6, 78, 0, 0, 0, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 6, 87, + 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 17, 18, 19, 20, + 21, 6, 6, 22, 6, 6, 23, 24, 25, 6, 26, 6, 6, 27, 6, 28, + 6, 29, 30, 0, 0, 31, 0, 32, 6, 6, 6, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 43, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 45, 56, 57, 58, 59, 56, 60, 61, 62, 63, 64, 65, 66, + 16, 67, 68, 0, 69, 70, 71, 0, 72, 73, 74, 75, 76, 77, 78, 0, + 6, 6, 79, 6, 80, 6, 81, 82, 6, 6, 83, 6, 84, 85, 86, 6, + 87, 6, 60, 88, 89, 6, 6, 90, 16, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 91, 3, 6, 6, 92, 93, 90, 94, 95, 6, 6, 96, 97, + 98, 6, 6, 99, 6, 100, 6, 101, 102, 103, 104, 105, 6, 106, 107, 0, + 30, 6, 102, 108, 109, 110, 0, 0, 6, 6, 111, 112, 6, 6, 6, 94, + 6, 99, 113, 80, 0, 0, 114, 115, 6, 6, 6, 6, 6, 6, 6, 116, + 117, 6, 118, 80, 6, 119, 120, 121, 0, 122, 123, 124, 125, 0, 125, 126, + 127, 128, 129, 6, 130, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 131, 102, 6, 6, 6, 6, 132, 6, 81, 6, 133, 134, 135, 135, 6, + 136, 137, 16, 6, 138, 16, 6, 82, 139, 140, 6, 6, 141, 67, 0, 25, + 6, 6, 6, 6, 6, 101, 0, 0, 6, 6, 6, 6, 6, 6, 142, 0, + 6, 6, 6, 6, 142, 0, 25, 80, 143, 144, 6, 145, 18, 6, 6, 27, + 146, 147, 6, 6, 148, 149, 0, 146, 6, 150, 6, 94, 6, 6, 151, 152, + 6, 153, 94, 77, 6, 6, 154, 102, 6, 134, 155, 156, 6, 6, 157, 158, + 159, 160, 82, 161, 0, 0, 6, 162, 6, 6, 6, 6, 6, 163, 164, 30, + 6, 6, 6, 153, 6, 6, 165, 0, 166, 167, 168, 6, 6, 27, 169, 6, + 6, 80, 25, 6, 170, 6, 150, 171, 89, 172, 173, 174, 6, 6, 6, 77, + 1, 2, 3, 104, 6, 102, 175, 0, 176, 177, 178, 0, 6, 6, 6, 67, + 0, 0, 6, 90, 0, 0, 0, 179, 0, 0, 0, 0, 77, 6, 180, 181, + 6, 25, 100, 67, 80, 6, 182, 0, 6, 6, 6, 6, 80, 97, 0, 0, + 6, 183, 6, 184, 0, 0, 0, 0, 6, 134, 101, 150, 0, 0, 0, 0, + 185, 186, 101, 134, 102, 0, 0, 0, 101, 165, 0, 0, 6, 187, 0, 0, + 188, 189, 0, 77, 77, 0, 74, 190, 6, 101, 101, 31, 27, 0, 0, 0, + 6, 6, 130, 0, 0, 0, 0, 0, 6, 6, 190, 191, 6, 67, 25, 192, + 6, 193, 25, 194, 6, 6, 195, 0, 196, 99, 0, 0, 0, 25, 6, 197, + 46, 43, 198, 199, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 200, 0, + 0, 0, 0, 0, 6, 201, 181, 0, 6, 6, 202, 0, 6, 99, 97, 0, + 0, 0, 0, 0, 0, 6, 6, 203, 0, 0, 0, 0, 0, 0, 6, 204, + 6, 6, 6, 6, 204, 0, 0, 0, 6, 6, 6, 141, 0, 0, 0, 0, + 6, 141, 0, 0, 0, 0, 0, 0, 6, 204, 102, 97, 0, 0, 25, 105, + 6, 134, 205, 206, 89, 0, 0, 0, 6, 6, 207, 102, 208, 0, 0, 0, + 209, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 210, 211, 0, 0, 0, + 0, 0, 0, 212, 213, 214, 0, 0, 0, 0, 215, 0, 0, 0, 0, 0, + 6, 6, 193, 6, 216, 217, 218, 6, 219, 220, 221, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 222, 223, 82, 193, 193, 131, 131, 224, 224, 225, 6, + 6, 6, 6, 6, 6, 6, 226, 0, 218, 227, 228, 229, 230, 231, 0, 0, + 6, 6, 6, 6, 6, 6, 134, 0, 6, 90, 6, 6, 6, 6, 6, 6, + 80, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 89, }; static RE_UINT8 re_id_continue_stage_5[] = { 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, 0, 4, 160, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, - 31, 80, 0, 0, 255, 255, 223, 60, 192, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 251, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, - 254, 255, 255, 255, 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, - 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, - 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, - 255, 63, 0, 0, 255, 255, 255, 15, 253, 31, 0, 0, 240, 255, 255, 127, - 207, 255, 254, 254, 238, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, - 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, - 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, - 207, 255, 0, 0, 159, 57, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, - 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 0, 0, 238, 223, 253, 255, - 255, 253, 239, 227, 223, 61, 96, 3, 236, 223, 253, 255, 255, 253, 239, 243, - 223, 61, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, 223, 125, 128, 0, - 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, - 0, 0, 12, 0, 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, - 174, 236, 255, 59, 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, - 255, 254, 255, 255, 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, - 64, 0, 0, 0, 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, - 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 0, 254, 3, 0, 255, 255, 0, 0, - 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 31, 0, - 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, - 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, - 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, - 255, 3, 255, 7, 255, 255, 255, 127, 255, 255, 255, 159, 255, 3, 255, 3, - 128, 0, 0, 0, 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, - 0, 0, 247, 255, 255, 255, 127, 0, 127, 0, 0, 240, 255, 255, 63, 63, - 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, - 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, - 226, 255, 1, 0, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, - 255, 1, 0, 0, 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, - 127, 127, 127, 127, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 254, - 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, - 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 240, 191, 255, 255, 255, 128, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, - 0, 0, 0, 255, 255, 0, 0, 0, 31, 0, 255, 3, 255, 255, 255, 8, - 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, 255, 255, 127, 12, - 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, 127, 127, 0, 0, - 255, 55, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, - 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, - 255, 255, 252, 255, 0, 0, 255, 15, 127, 0, 24, 0, 0, 224, 0, 0, - 0, 0, 223, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, - 255, 63, 255, 63, 0, 0, 0, 32, 255, 255, 1, 0, 15, 255, 62, 0, - 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, - 255, 255, 15, 135, 255, 255, 7, 0, 127, 0, 0, 0, 255, 1, 255, 3, - 255, 255, 223, 255, 7, 0, 0, 0, 255, 255, 255, 1, 31, 0, 255, 255, - 0, 128, 255, 255, 3, 0, 0, 0, 224, 227, 7, 248, 231, 15, 0, 0, - 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, - 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, - 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 247, 207, 255, 255, - 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, - 238, 251, 255, 15, + 31, 80, 0, 0, 255, 255, 223, 188, 192, 215, 255, 255, 251, 255, 255, 255, + 255, 255, 191, 255, 251, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 2, + 254, 255, 255, 255, 255, 0, 254, 255, 255, 255, 255, 191, 182, 0, 255, 255, + 255, 7, 7, 0, 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, + 255, 253, 255, 159, 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, + 255, 255, 63, 4, 255, 63, 0, 0, 255, 255, 255, 15, 255, 255, 7, 0, + 240, 255, 255, 255, 207, 255, 254, 255, 239, 159, 249, 255, 255, 253, 197, 243, + 159, 121, 128, 176, 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, + 135, 57, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, + 191, 59, 1, 0, 207, 255, 0, 0, 238, 159, 249, 255, 159, 57, 192, 176, + 207, 255, 2, 0, 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, + 192, 255, 0, 0, 239, 223, 253, 255, 255, 253, 255, 227, 223, 61, 96, 3, + 238, 223, 253, 255, 255, 253, 239, 243, 223, 61, 96, 64, 207, 255, 6, 0, + 255, 255, 255, 231, 223, 125, 128, 0, 207, 255, 0, 252, 236, 255, 127, 252, + 255, 255, 251, 47, 127, 132, 95, 255, 192, 255, 12, 0, 255, 255, 255, 7, + 255, 127, 255, 3, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, 255, 243, + 1, 0, 0, 3, 255, 3, 160, 194, 255, 254, 255, 255, 255, 31, 254, 255, + 223, 255, 255, 254, 255, 255, 255, 31, 64, 0, 0, 0, 255, 3, 255, 255, + 255, 255, 255, 63, 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, + 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, + 0, 254, 3, 0, 255, 255, 0, 0, 255, 255, 31, 0, 255, 159, 255, 255, + 255, 199, 255, 1, 255, 223, 31, 0, 255, 255, 15, 0, 255, 223, 13, 0, + 255, 255, 143, 48, 255, 3, 0, 0, 0, 56, 255, 3, 255, 255, 255, 0, + 255, 7, 255, 255, 255, 255, 63, 0, 255, 255, 255, 127, 255, 15, 255, 15, + 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, 255, 3, 255, 7, + 255, 255, 255, 159, 255, 3, 255, 3, 128, 0, 255, 63, 255, 15, 255, 3, + 0, 248, 15, 0, 255, 227, 255, 255, 0, 0, 247, 255, 255, 255, 127, 3, + 255, 255, 63, 240, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, + 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 0, 128, 1, 0, 16, 0, + 0, 0, 2, 128, 0, 0, 255, 31, 226, 255, 1, 0, 132, 252, 47, 63, + 80, 253, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, 255, 127, 255, 255, + 31, 248, 15, 0, 255, 128, 0, 128, 255, 255, 127, 0, 127, 127, 127, 127, + 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 254, 224, 255, 255, 255, + 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, 255, 31, 255, 255, + 255, 15, 0, 0, 255, 255, 240, 191, 0, 0, 128, 255, 252, 255, 255, 255, + 255, 121, 255, 255, 255, 63, 3, 0, 255, 0, 0, 0, 31, 0, 255, 3, + 255, 255, 255, 8, 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, + 255, 255, 127, 252, 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, + 127, 127, 255, 255, 48, 0, 0, 0, 255, 55, 255, 3, 15, 0, 255, 255, + 127, 248, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, 255, 253, 127, 95, + 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 15, + 255, 63, 24, 0, 0, 224, 0, 0, 0, 0, 223, 255, 252, 252, 252, 28, + 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 0, 0, 0, 32, + 255, 255, 1, 0, 1, 0, 0, 0, 15, 255, 62, 0, 255, 0, 255, 255, + 15, 0, 0, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, + 111, 240, 239, 254, 255, 255, 15, 135, 127, 0, 0, 0, 192, 255, 0, 128, + 255, 1, 255, 3, 255, 255, 223, 255, 255, 255, 79, 0, 31, 0, 255, 7, + 255, 255, 251, 255, 255, 7, 255, 3, 159, 57, 128, 224, 207, 31, 31, 0, + 191, 0, 255, 3, 255, 255, 63, 255, 17, 0, 255, 3, 255, 3, 0, 128, + 255, 255, 255, 1, 15, 0, 255, 3, 248, 255, 255, 224, 31, 0, 255, 255, + 0, 128, 255, 255, 3, 0, 0, 0, 255, 7, 255, 31, 255, 1, 255, 99, + 224, 227, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, + 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, + 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, + 255, 253, 255, 255, 247, 207, 255, 255, 31, 0, 127, 0, 150, 254, 247, 10, + 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, }; -/* ID_Continue: 1894 bytes. */ +/* ID_Continue: 2074 bytes. */ RE_UINT32 re_get_id_continue(RE_UINT32 ch) { RE_UINT32 code; @@ -6012,10 +6436,10 @@ static RE_UINT8 re_xid_start_stage_1[] = { static RE_UINT8 re_xid_start_stage_2[] = { 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 15, 16, 17, 18, 19, 13, 20, 13, 13, 13, 13, 13, 13, 21, 13, 13, + 13, 13, 13, 13, 13, 13, 22, 23, 13, 13, 24, 13, 13, 25, 13, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 7, 7, 7, 7, 26, 7, 27, 28, 13, 13, 13, 13, 13, 13, 13, 29, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, }; @@ -6028,13 +6452,14 @@ static RE_UINT8 re_xid_start_stage_3[] = { 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 31, 31, 31, - 57, 58, 59, 60, 61, 31, 31, 31, 62, 63, 31, 31, 31, 31, 64, 31, - 1, 1, 1, 65, 66, 31, 31, 31, 1, 1, 1, 1, 67, 31, 31, 31, - 1, 1, 68, 31, 31, 31, 31, 69, 70, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 71, 72, 73, 74, 31, 31, 31, 31, 31, 31, 75, 31, - 1, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, 1, 1, 77, - 78, 31, 31, 31, 31, 31, 31, 31, 1, 1, 78, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 1, 58, + 59, 60, 61, 62, 63, 31, 31, 31, 64, 65, 66, 67, 68, 69, 70, 31, + 71, 31, 72, 31, 31, 31, 31, 31, 1, 1, 1, 73, 74, 31, 31, 31, + 1, 1, 1, 1, 75, 31, 31, 31, 1, 1, 76, 77, 31, 31, 31, 78, + 79, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 80, 31, 31, 31, + 31, 31, 31, 31, 81, 82, 83, 84, 85, 31, 31, 31, 31, 31, 86, 31, + 1, 1, 1, 1, 1, 1, 87, 1, 1, 1, 1, 1, 1, 1, 1, 88, + 89, 31, 31, 31, 31, 31, 31, 31, 1, 1, 89, 31, 31, 31, 31, 31, }; static RE_UINT8 re_xid_start_stage_4[] = { @@ -6043,95 +6468,104 @@ static RE_UINT8 re_xid_start_stage_4[] = { 4, 4, 4, 4, 11, 4, 4, 4, 4, 12, 13, 14, 15, 0, 16, 17, 0, 4, 18, 19, 4, 4, 20, 21, 22, 23, 24, 4, 4, 25, 26, 27, 28, 29, 30, 0, 0, 31, 0, 0, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 36, 45, 48, 49, 50, 51, 46, 0, - 52, 53, 54, 47, 52, 53, 55, 56, 52, 57, 58, 59, 60, 61, 62, 0, - 14, 63, 62, 0, 64, 65, 66, 0, 67, 0, 68, 69, 70, 0, 0, 0, - 4, 71, 72, 73, 74, 4, 75, 76, 4, 4, 77, 4, 78, 79, 80, 4, - 81, 4, 82, 0, 23, 4, 4, 83, 14, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 84, 1, 4, 4, 85, 86, 87, 87, 88, 4, 89, 90, 0, - 0, 4, 4, 91, 4, 92, 4, 93, 94, 0, 16, 95, 4, 96, 97, 0, - 98, 4, 83, 0, 0, 99, 0, 0, 100, 89, 101, 0, 102, 103, 4, 104, - 4, 105, 106, 107, 0, 0, 0, 108, 4, 4, 4, 4, 4, 4, 0, 0, - 109, 4, 110, 107, 4, 111, 112, 113, 0, 0, 0, 114, 115, 0, 0, 0, - 116, 117, 118, 4, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 120, 121, 4, 4, 4, 4, 122, 4, 75, 4, 123, 98, 124, 124, 0, - 125, 126, 14, 4, 127, 14, 4, 76, 100, 128, 4, 4, 129, 82, 0, 16, - 4, 4, 4, 4, 4, 93, 0, 0, 4, 4, 4, 4, 4, 4, 69, 0, - 4, 4, 4, 4, 69, 0, 16, 107, 130, 131, 4, 132, 91, 4, 4, 23, - 133, 134, 4, 4, 135, 18, 0, 136, 137, 138, 4, 89, 134, 89, 0, 139, - 26, 140, 62, 94, 32, 141, 142, 0, 4, 119, 143, 144, 4, 145, 146, 147, - 148, 149, 0, 0, 0, 0, 4, 138, 4, 4, 4, 4, 4, 150, 151, 152, - 4, 4, 4, 153, 4, 4, 154, 0, 155, 156, 157, 4, 4, 87, 158, 4, - 4, 4, 107, 32, 4, 4, 4, 4, 4, 107, 16, 4, 159, 4, 15, 160, - 0, 0, 0, 161, 4, 4, 4, 94, 0, 1, 1, 162, 107, 121, 163, 0, - 164, 165, 166, 0, 4, 4, 4, 82, 0, 0, 4, 83, 0, 0, 0, 0, - 0, 0, 0, 0, 94, 4, 167, 0, 121, 16, 18, 0, 107, 4, 168, 0, - 4, 4, 4, 4, 107, 0, 0, 0, 169, 170, 93, 0, 0, 0, 0, 0, - 93, 154, 0, 0, 4, 171, 0, 0, 172, 89, 0, 94, 0, 0, 0, 0, - 4, 93, 93, 141, 0, 0, 0, 0, 4, 4, 119, 0, 0, 0, 0, 0, - 102, 91, 0, 0, 102, 23, 16, 119, 102, 62, 0, 0, 102, 141, 173, 0, - 0, 0, 0, 0, 4, 18, 0, 0, 4, 4, 4, 129, 0, 0, 0, 0, - 4, 4, 4, 138, 0, 0, 0, 0, 4, 129, 0, 0, 0, 0, 0, 0, - 4, 30, 0, 0, 0, 0, 0, 0, 4, 4, 174, 0, 158, 0, 0, 0, - 47, 0, 0, 0, 0, 0, 0, 0, 4, 4, 175, 4, 176, 177, 178, 4, - 179, 180, 181, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 182, 183, 76, - 175, 175, 120, 120, 184, 184, 143, 0, 178, 185, 186, 187, 188, 189, 0, 0, - 4, 4, 4, 4, 4, 4, 98, 0, 4, 83, 4, 4, 4, 4, 4, 4, - 107, 0, 0, 0, 0, 0, 0, 0, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 45, 49, 50, 51, 52, 46, 0, + 53, 54, 55, 47, 53, 56, 57, 58, 53, 59, 60, 61, 62, 63, 64, 0, + 14, 65, 64, 0, 66, 67, 68, 0, 69, 0, 70, 71, 72, 0, 0, 0, + 4, 73, 74, 75, 76, 4, 77, 78, 4, 4, 79, 4, 80, 81, 82, 4, + 83, 4, 84, 0, 23, 4, 4, 85, 14, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 86, 1, 4, 4, 87, 88, 89, 89, 90, 4, 91, 92, 0, + 0, 4, 4, 93, 4, 94, 4, 95, 96, 0, 16, 97, 4, 98, 99, 0, + 100, 4, 85, 0, 0, 101, 0, 0, 102, 91, 103, 0, 104, 105, 4, 106, + 4, 107, 108, 109, 0, 0, 0, 110, 4, 4, 4, 4, 4, 4, 0, 0, + 111, 4, 112, 109, 4, 113, 114, 115, 0, 0, 0, 116, 117, 0, 0, 0, + 118, 119, 120, 4, 121, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 122, 96, 4, 4, 4, 4, 123, 4, 77, 4, 124, 100, 125, 125, 0, + 126, 127, 14, 4, 128, 14, 4, 78, 102, 129, 4, 4, 130, 84, 0, 16, + 4, 4, 4, 4, 4, 95, 0, 0, 4, 4, 4, 4, 4, 4, 71, 0, + 4, 4, 4, 4, 71, 0, 16, 109, 131, 132, 4, 133, 109, 4, 4, 23, + 134, 135, 4, 4, 136, 137, 0, 134, 138, 139, 4, 91, 135, 91, 0, 140, + 26, 141, 64, 142, 32, 31, 143, 144, 4, 121, 145, 146, 4, 147, 148, 149, + 150, 151, 78, 152, 0, 0, 4, 139, 4, 4, 4, 4, 4, 153, 154, 155, + 4, 4, 4, 156, 4, 4, 157, 0, 158, 159, 160, 4, 4, 89, 161, 4, + 4, 4, 109, 32, 4, 4, 4, 4, 4, 109, 16, 4, 162, 4, 15, 163, + 0, 0, 0, 164, 4, 4, 4, 142, 0, 1, 1, 165, 109, 96, 166, 0, + 167, 168, 169, 0, 4, 4, 4, 84, 0, 0, 4, 85, 0, 0, 0, 0, + 0, 0, 0, 0, 142, 4, 170, 0, 4, 16, 171, 95, 109, 4, 172, 0, + 4, 4, 4, 4, 109, 0, 0, 0, 4, 173, 4, 107, 0, 0, 0, 0, + 4, 100, 95, 15, 0, 0, 0, 0, 174, 175, 95, 100, 96, 0, 0, 0, + 95, 157, 0, 0, 4, 176, 0, 0, 177, 91, 0, 142, 142, 0, 70, 178, + 4, 95, 95, 31, 89, 0, 0, 0, 4, 4, 121, 0, 0, 0, 0, 0, + 104, 93, 0, 0, 104, 23, 16, 121, 104, 64, 16, 179, 104, 31, 180, 0, + 181, 98, 0, 0, 0, 16, 96, 0, 48, 45, 182, 47, 0, 0, 0, 0, + 0, 0, 0, 0, 4, 23, 183, 0, 0, 0, 0, 0, 4, 130, 0, 0, + 4, 23, 184, 0, 4, 18, 0, 0, 0, 0, 0, 0, 0, 4, 4, 185, + 0, 0, 0, 0, 0, 0, 4, 30, 4, 4, 4, 4, 30, 0, 0, 0, + 4, 4, 4, 130, 0, 0, 0, 0, 4, 130, 0, 0, 0, 0, 0, 0, + 4, 30, 96, 0, 0, 0, 16, 186, 4, 23, 107, 187, 23, 0, 0, 0, + 4, 4, 188, 0, 161, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 4, 189, 190, 0, 0, 0, 4, 4, 191, 4, 192, 193, 194, 4, + 195, 196, 197, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 198, 199, 78, + 191, 191, 122, 122, 200, 200, 145, 0, 4, 4, 4, 4, 4, 4, 178, 0, + 194, 201, 202, 203, 204, 205, 0, 0, 4, 4, 4, 4, 4, 4, 100, 0, + 4, 85, 4, 4, 4, 4, 4, 4, 109, 0, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_xid_start_stage_5[] = { 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, - 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 56, + 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 184, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, 3, 252, 255, 255, - 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, + 255, 255, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 255, 255, 255, 7, 7, 0, 255, 7, 0, 0, 0, 192, 254, 255, 255, 255, 47, 0, 96, 192, 0, 156, 0, 0, 253, 255, 255, 255, 0, 0, 0, 224, 255, 255, 63, 0, 2, 0, 0, 252, 255, 255, 255, 7, 48, 4, - 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 253, 31, 0, 0, - 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 254, - 224, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, + 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 255, 255, 7, 0, + 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 255, + 225, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, 224, 135, 249, 255, 255, 253, 109, 3, 0, 0, 0, 94, 0, 0, 28, 0, 224, 191, 251, 255, 255, 253, 237, 35, 0, 0, 1, 0, 3, 0, 0, 0, - 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, 24, 199, 255, 3, - 224, 223, 253, 255, 255, 253, 239, 35, 0, 0, 0, 3, 0, 0, 0, 64, - 3, 0, 6, 0, 255, 255, 255, 39, 0, 64, 0, 0, 3, 0, 0, 252, - 224, 255, 127, 252, 255, 255, 251, 47, 127, 0, 0, 0, 255, 255, 5, 0, - 150, 37, 240, 254, 174, 236, 5, 32, 95, 0, 0, 240, 1, 0, 0, 0, - 255, 254, 255, 255, 255, 31, 0, 0, 0, 31, 0, 0, 255, 7, 0, 128, - 0, 0, 63, 60, 98, 192, 225, 255, 3, 64, 0, 0, 191, 32, 255, 255, - 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 31, 0, - 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 3, 0, 255, 255, 3, 0, - 255, 223, 1, 0, 255, 255, 15, 0, 0, 0, 128, 16, 255, 255, 255, 0, - 255, 5, 255, 255, 255, 255, 63, 0, 255, 255, 255, 31, 255, 63, 31, 0, - 255, 15, 0, 0, 254, 0, 0, 0, 255, 255, 127, 0, 128, 0, 0, 0, - 224, 255, 255, 255, 224, 15, 0, 0, 248, 255, 255, 255, 1, 192, 0, 252, - 63, 0, 0, 0, 15, 0, 0, 0, 0, 224, 0, 252, 255, 255, 255, 63, - 0, 222, 99, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, - 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, - 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, - 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 128, 0, 0, - 127, 127, 127, 127, 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 224, - 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, - 255, 127, 0, 128, 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, - 0, 0, 0, 255, 187, 247, 255, 255, 7, 0, 0, 0, 0, 0, 252, 8, - 63, 0, 255, 255, 255, 255, 7, 0, 0, 128, 0, 0, 247, 15, 0, 0, - 255, 255, 127, 4, 255, 255, 98, 62, 5, 0, 0, 56, 255, 7, 28, 0, - 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, - 255, 255, 255, 15, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 160, - 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, - 0, 0, 255, 3, 0, 0, 138, 170, 192, 255, 255, 255, 252, 252, 252, 28, - 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, - 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, - 1, 0, 239, 254, 30, 0, 0, 0, 31, 0, 1, 0, 255, 255, 223, 255, + 224, 159, 249, 255, 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, + 24, 199, 255, 3, 224, 223, 253, 255, 255, 253, 255, 35, 0, 0, 0, 3, + 255, 253, 239, 35, 0, 0, 0, 64, 3, 0, 6, 0, 255, 255, 255, 39, + 0, 64, 0, 0, 3, 0, 0, 252, 224, 255, 127, 252, 255, 255, 251, 47, + 127, 0, 0, 0, 255, 255, 5, 0, 150, 37, 240, 254, 174, 236, 5, 32, + 95, 0, 0, 240, 1, 0, 0, 0, 255, 254, 255, 255, 255, 31, 0, 0, + 0, 31, 0, 0, 255, 7, 0, 128, 0, 0, 63, 60, 98, 192, 225, 255, + 3, 64, 0, 0, 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, + 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, + 255, 255, 255, 7, 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 255, 1, + 255, 223, 3, 0, 255, 255, 3, 0, 255, 223, 1, 0, 255, 255, 15, 0, + 0, 0, 128, 16, 255, 255, 255, 0, 255, 5, 255, 255, 255, 255, 63, 0, + 255, 255, 255, 127, 255, 63, 31, 0, 255, 15, 0, 0, 254, 0, 0, 0, + 255, 255, 127, 0, 128, 0, 0, 0, 224, 255, 255, 255, 224, 15, 0, 0, + 248, 255, 255, 255, 1, 192, 0, 252, 63, 0, 0, 0, 15, 0, 0, 0, + 0, 224, 0, 252, 255, 255, 255, 63, 0, 222, 99, 0, 255, 255, 63, 63, + 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, + 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 63, 80, 253, 255, 243, + 224, 67, 0, 0, 255, 1, 0, 0, 255, 127, 255, 255, 31, 120, 12, 0, + 255, 128, 0, 0, 127, 127, 127, 127, 224, 0, 0, 0, 254, 3, 62, 31, + 255, 255, 127, 224, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, + 0, 12, 0, 0, 255, 127, 0, 128, 0, 0, 128, 255, 252, 255, 255, 255, + 255, 121, 255, 255, 255, 63, 3, 0, 187, 247, 255, 255, 7, 0, 0, 0, + 0, 0, 252, 8, 63, 0, 255, 255, 255, 255, 255, 31, 0, 128, 0, 0, + 223, 255, 0, 124, 247, 15, 0, 0, 255, 255, 127, 196, 255, 255, 98, 62, + 5, 0, 0, 56, 255, 7, 28, 0, 126, 126, 126, 0, 127, 127, 255, 255, + 48, 0, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 15, + 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 160, 255, 253, 127, 95, + 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 3, + 0, 0, 138, 170, 192, 255, 255, 255, 252, 252, 252, 28, 255, 239, 255, 255, + 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, 255, 7, 255, 255, + 15, 255, 62, 0, 255, 0, 255, 255, 63, 253, 255, 255, 255, 255, 191, 145, + 255, 255, 255, 192, 1, 0, 239, 254, 31, 0, 0, 0, 255, 255, 71, 0, + 30, 0, 0, 4, 255, 255, 251, 255, 0, 0, 0, 224, 176, 0, 0, 0, + 16, 0, 0, 0, 0, 0, 0, 128, 255, 63, 0, 0, 248, 255, 255, 224, + 31, 0, 1, 0, 255, 7, 255, 31, 255, 1, 255, 3, 255, 255, 223, 255, 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, }; -/* XID_Start: 1761 bytes. */ +/* XID_Start: 1929 bytes. */ RE_UINT32 re_get_xid_start(RE_UINT32 ch) { RE_UINT32 code; @@ -6168,12 +6602,12 @@ static RE_UINT8 re_xid_continue_stage_1[] = { static RE_UINT8 re_xid_continue_stage_2[] = { 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 15, 16, 17, 18, 19, 13, 20, 13, 13, 13, 13, 13, 13, 21, 13, 13, + 13, 13, 13, 13, 13, 13, 22, 23, 13, 13, 24, 13, 13, 25, 13, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 7, 7, 7, 7, 26, 7, 27, 28, 13, 13, 13, 13, 13, 13, 13, 29, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 28, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 30, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, }; static RE_UINT8 re_xid_continue_stage_3[] = { @@ -6184,118 +6618,129 @@ static RE_UINT8 re_xid_continue_stage_3[] = { 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 31, 31, 31, - 57, 58, 59, 60, 61, 31, 31, 31, 62, 63, 31, 31, 31, 31, 64, 31, - 1, 1, 1, 65, 66, 31, 31, 31, 1, 1, 1, 1, 67, 31, 31, 31, - 1, 1, 68, 31, 31, 31, 31, 69, 70, 31, 31, 31, 31, 31, 31, 31, - 31, 71, 72, 31, 73, 74, 75, 76, 31, 31, 31, 31, 31, 31, 77, 31, - 1, 1, 1, 1, 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 79, - 80, 31, 31, 31, 31, 31, 31, 31, 1, 1, 80, 31, 31, 31, 31, 31, - 31, 81, 31, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 1, 58, + 59, 60, 61, 62, 63, 31, 31, 31, 64, 65, 66, 67, 68, 69, 70, 31, + 71, 31, 72, 31, 31, 31, 31, 31, 1, 1, 1, 73, 74, 31, 31, 31, + 1, 1, 1, 1, 75, 31, 31, 31, 1, 1, 76, 77, 31, 31, 31, 78, + 79, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 80, 31, 31, 31, + 31, 81, 82, 31, 83, 84, 85, 86, 87, 31, 31, 31, 31, 31, 88, 31, + 1, 1, 1, 1, 1, 1, 89, 1, 1, 1, 1, 1, 1, 1, 1, 90, + 91, 31, 31, 31, 31, 31, 31, 31, 1, 1, 91, 31, 31, 31, 31, 31, + 31, 92, 31, 31, 31, 31, 31, 31, }; static RE_UINT8 re_xid_continue_stage_4[] = { 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, - 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 14, 17, 18, 19, - 20, 6, 6, 21, 6, 6, 22, 23, 24, 6, 25, 6, 6, 26, 6, 27, - 6, 28, 29, 0, 0, 30, 0, 31, 6, 6, 6, 32, 33, 34, 35, 36, - 37, 38, 39, 40, 41, 42, 43, 44, 33, 42, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 44, 54, 55, 56, 57, 54, 58, 59, 60, 61, 62, 63, 64, - 16, 65, 66, 0, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 0, - 6, 6, 77, 6, 78, 6, 79, 80, 6, 6, 81, 6, 82, 83, 84, 6, - 85, 6, 58, 86, 87, 6, 6, 88, 16, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 89, 3, 6, 6, 90, 91, 88, 92, 93, 6, 6, 94, 95, - 96, 6, 6, 97, 6, 98, 6, 99, 75, 100, 101, 102, 6, 103, 104, 0, - 29, 6, 105, 106, 107, 108, 0, 0, 6, 6, 109, 110, 6, 6, 6, 92, - 6, 97, 111, 78, 0, 0, 112, 113, 6, 6, 6, 6, 6, 6, 6, 114, - 115, 6, 116, 78, 6, 117, 118, 119, 0, 120, 121, 122, 123, 0, 123, 124, - 125, 126, 127, 6, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 129, 105, 6, 6, 6, 6, 130, 6, 79, 6, 131, 113, 132, 132, 6, - 133, 134, 16, 6, 135, 16, 6, 80, 136, 137, 6, 6, 138, 65, 0, 24, - 6, 6, 6, 6, 6, 99, 0, 0, 6, 6, 6, 6, 6, 6, 139, 0, - 6, 6, 6, 6, 139, 0, 24, 78, 140, 141, 6, 142, 143, 6, 6, 26, - 144, 145, 6, 6, 146, 147, 0, 148, 6, 149, 6, 92, 6, 6, 150, 151, - 6, 152, 92, 75, 6, 6, 153, 0, 6, 113, 154, 155, 6, 6, 156, 157, - 158, 159, 0, 0, 0, 0, 6, 160, 6, 6, 6, 6, 6, 161, 162, 29, - 6, 6, 6, 152, 6, 6, 163, 0, 164, 165, 166, 6, 6, 26, 167, 6, - 6, 6, 78, 168, 6, 6, 6, 6, 6, 78, 24, 6, 169, 6, 149, 1, - 87, 170, 171, 172, 6, 6, 6, 75, 1, 2, 3, 101, 6, 105, 173, 0, - 174, 175, 176, 0, 6, 6, 6, 65, 0, 0, 6, 88, 0, 0, 0, 177, - 0, 0, 0, 0, 75, 6, 178, 0, 105, 24, 147, 0, 78, 6, 179, 0, - 6, 6, 6, 6, 78, 95, 0, 0, 180, 181, 99, 0, 0, 0, 0, 0, - 99, 163, 0, 0, 6, 182, 0, 0, 183, 184, 0, 75, 0, 0, 0, 0, - 6, 99, 99, 185, 0, 0, 0, 0, 6, 6, 128, 0, 0, 0, 0, 0, - 6, 6, 186, 50, 6, 65, 24, 187, 6, 188, 0, 0, 6, 6, 150, 0, - 0, 0, 0, 0, 6, 97, 95, 0, 6, 6, 6, 138, 0, 0, 0, 0, - 6, 6, 6, 189, 0, 0, 0, 0, 6, 138, 0, 0, 0, 0, 0, 0, - 6, 190, 0, 0, 0, 0, 0, 0, 6, 6, 191, 105, 192, 0, 0, 0, - 193, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 195, 196, 0, 0, - 0, 0, 197, 0, 0, 0, 0, 0, 6, 6, 188, 6, 198, 199, 200, 6, - 201, 202, 203, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 204, 205, 80, - 188, 188, 129, 129, 206, 206, 207, 6, 200, 208, 209, 210, 211, 212, 0, 0, - 6, 6, 6, 6, 6, 6, 113, 0, 6, 88, 6, 6, 6, 6, 6, 6, - 78, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 87, + 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 17, 18, 19, 20, + 21, 6, 6, 22, 6, 6, 23, 24, 25, 6, 26, 6, 6, 27, 6, 28, + 6, 29, 30, 0, 0, 31, 0, 32, 6, 6, 6, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 43, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 45, 56, 57, 58, 59, 56, 60, 61, 62, 63, 64, 65, 66, + 16, 67, 68, 0, 69, 70, 71, 0, 72, 73, 74, 75, 76, 77, 78, 0, + 6, 6, 79, 6, 80, 6, 81, 82, 6, 6, 83, 6, 84, 85, 86, 6, + 87, 6, 60, 88, 89, 6, 6, 90, 16, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 91, 3, 6, 6, 92, 93, 90, 94, 95, 6, 6, 96, 97, + 98, 6, 6, 99, 6, 100, 6, 101, 102, 103, 104, 105, 6, 106, 107, 0, + 30, 6, 102, 108, 109, 110, 0, 0, 6, 6, 111, 112, 6, 6, 6, 94, + 6, 99, 113, 80, 0, 0, 114, 115, 6, 6, 6, 6, 6, 6, 6, 116, + 117, 6, 118, 80, 6, 119, 120, 121, 0, 122, 123, 124, 125, 0, 125, 126, + 127, 128, 129, 6, 130, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 131, 102, 6, 6, 6, 6, 132, 6, 81, 6, 133, 134, 135, 135, 6, + 136, 137, 16, 6, 138, 16, 6, 82, 139, 140, 6, 6, 141, 67, 0, 25, + 6, 6, 6, 6, 6, 101, 0, 0, 6, 6, 6, 6, 6, 6, 142, 0, + 6, 6, 6, 6, 142, 0, 25, 80, 143, 144, 6, 145, 18, 6, 6, 27, + 146, 147, 6, 6, 148, 149, 0, 146, 6, 150, 6, 94, 6, 6, 151, 152, + 6, 153, 94, 77, 6, 6, 154, 102, 6, 134, 155, 156, 6, 6, 157, 158, + 159, 160, 82, 161, 0, 0, 6, 162, 6, 6, 6, 6, 6, 163, 164, 30, + 6, 6, 6, 153, 6, 6, 165, 0, 166, 167, 168, 6, 6, 27, 169, 6, + 6, 6, 80, 32, 6, 6, 6, 6, 6, 80, 25, 6, 170, 6, 150, 1, + 89, 171, 172, 173, 6, 6, 6, 77, 1, 2, 3, 104, 6, 102, 174, 0, + 175, 176, 177, 0, 6, 6, 6, 67, 0, 0, 6, 90, 0, 0, 0, 178, + 0, 0, 0, 0, 77, 6, 179, 180, 6, 25, 100, 67, 80, 6, 181, 0, + 6, 6, 6, 6, 80, 97, 0, 0, 6, 182, 6, 183, 0, 0, 0, 0, + 6, 134, 101, 150, 0, 0, 0, 0, 184, 185, 101, 134, 102, 0, 0, 0, + 101, 165, 0, 0, 6, 186, 0, 0, 187, 188, 0, 77, 77, 0, 74, 189, + 6, 101, 101, 31, 27, 0, 0, 0, 6, 6, 130, 0, 0, 0, 0, 0, + 6, 6, 189, 190, 6, 67, 25, 191, 6, 192, 25, 193, 6, 6, 194, 0, + 195, 99, 0, 0, 0, 25, 6, 196, 46, 43, 197, 198, 0, 0, 0, 0, + 0, 0, 0, 0, 6, 6, 199, 0, 0, 0, 0, 0, 6, 200, 180, 0, + 6, 6, 201, 0, 6, 99, 97, 0, 0, 0, 0, 0, 0, 6, 6, 202, + 0, 0, 0, 0, 0, 0, 6, 203, 6, 6, 6, 6, 203, 0, 0, 0, + 6, 6, 6, 141, 0, 0, 0, 0, 6, 141, 0, 0, 0, 0, 0, 0, + 6, 203, 102, 97, 0, 0, 25, 105, 6, 134, 204, 205, 89, 0, 0, 0, + 6, 6, 206, 102, 207, 0, 0, 0, 208, 0, 0, 0, 0, 0, 0, 0, + 6, 6, 6, 209, 210, 0, 0, 0, 0, 0, 0, 211, 212, 213, 0, 0, + 0, 0, 214, 0, 0, 0, 0, 0, 6, 6, 192, 6, 215, 216, 217, 6, + 218, 219, 220, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 221, 222, 82, + 192, 192, 131, 131, 223, 223, 224, 6, 6, 6, 6, 6, 6, 6, 225, 0, + 217, 226, 227, 228, 229, 230, 0, 0, 6, 6, 6, 6, 6, 6, 134, 0, + 6, 90, 6, 6, 6, 6, 6, 6, 80, 0, 0, 0, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 6, 89, }; static RE_UINT8 re_xid_continue_stage_5[] = { 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, 0, 4, 160, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, - 31, 80, 0, 0, 255, 255, 223, 56, 192, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 251, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, - 254, 255, 255, 255, 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, - 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, - 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, - 255, 63, 0, 0, 255, 255, 255, 15, 253, 31, 0, 0, 240, 255, 255, 127, - 207, 255, 254, 254, 238, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, - 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, - 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, - 207, 255, 0, 0, 159, 57, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, - 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 0, 0, 238, 223, 253, 255, - 255, 253, 239, 227, 223, 61, 96, 3, 236, 223, 253, 255, 255, 253, 239, 243, - 223, 61, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, 223, 125, 128, 0, - 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, - 0, 0, 12, 0, 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, - 174, 236, 255, 59, 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, - 255, 254, 255, 255, 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, - 64, 0, 0, 0, 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, - 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 0, 254, 3, 0, 255, 255, 0, 0, - 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 31, 0, - 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, - 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, - 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, - 255, 3, 255, 7, 255, 255, 255, 127, 255, 255, 255, 159, 255, 3, 255, 3, - 128, 0, 0, 0, 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, - 0, 0, 247, 255, 255, 255, 127, 0, 127, 0, 0, 240, 255, 255, 63, 63, - 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, - 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, - 226, 255, 1, 0, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, - 255, 1, 0, 0, 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, - 127, 127, 127, 127, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 230, - 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, - 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 240, 191, 255, 255, 255, 128, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, - 0, 0, 0, 255, 255, 0, 0, 0, 31, 0, 255, 3, 255, 255, 255, 8, - 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, 255, 255, 127, 12, - 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, 127, 127, 0, 0, - 255, 55, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, - 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, - 240, 255, 255, 255, 255, 255, 252, 255, 127, 0, 24, 0, 0, 224, 0, 0, - 0, 0, 138, 170, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, - 255, 63, 255, 63, 0, 0, 0, 32, 255, 255, 1, 0, 15, 255, 62, 0, + 31, 80, 0, 0, 255, 255, 223, 184, 192, 215, 255, 255, 251, 255, 255, 255, + 255, 255, 191, 255, 251, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 2, + 254, 255, 255, 255, 255, 0, 254, 255, 255, 255, 255, 191, 182, 0, 255, 255, + 255, 7, 7, 0, 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, + 255, 253, 255, 159, 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, + 255, 255, 63, 4, 255, 63, 0, 0, 255, 255, 255, 15, 255, 255, 7, 0, + 240, 255, 255, 255, 207, 255, 254, 255, 239, 159, 249, 255, 255, 253, 197, 243, + 159, 121, 128, 176, 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, + 135, 57, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, + 191, 59, 1, 0, 207, 255, 0, 0, 238, 159, 249, 255, 159, 57, 192, 176, + 207, 255, 2, 0, 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, + 192, 255, 0, 0, 239, 223, 253, 255, 255, 253, 255, 227, 223, 61, 96, 3, + 238, 223, 253, 255, 255, 253, 239, 243, 223, 61, 96, 64, 207, 255, 6, 0, + 255, 255, 255, 231, 223, 125, 128, 0, 207, 255, 0, 252, 236, 255, 127, 252, + 255, 255, 251, 47, 127, 132, 95, 255, 192, 255, 12, 0, 255, 255, 255, 7, + 255, 127, 255, 3, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, 255, 243, + 1, 0, 0, 3, 255, 3, 160, 194, 255, 254, 255, 255, 255, 31, 254, 255, + 223, 255, 255, 254, 255, 255, 255, 31, 64, 0, 0, 0, 255, 3, 255, 255, + 255, 255, 255, 63, 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, + 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, + 0, 254, 3, 0, 255, 255, 0, 0, 255, 255, 31, 0, 255, 159, 255, 255, + 255, 199, 255, 1, 255, 223, 31, 0, 255, 255, 15, 0, 255, 223, 13, 0, + 255, 255, 143, 48, 255, 3, 0, 0, 0, 56, 255, 3, 255, 255, 255, 0, + 255, 7, 255, 255, 255, 255, 63, 0, 255, 255, 255, 127, 255, 15, 255, 15, + 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, 255, 3, 255, 7, + 255, 255, 255, 159, 255, 3, 255, 3, 128, 0, 255, 63, 255, 15, 255, 3, + 0, 248, 15, 0, 255, 227, 255, 255, 0, 0, 247, 255, 255, 255, 127, 3, + 255, 255, 63, 240, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, + 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 0, 128, 1, 0, 16, 0, + 0, 0, 2, 128, 0, 0, 255, 31, 226, 255, 1, 0, 132, 252, 47, 63, + 80, 253, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, 255, 127, 255, 255, + 31, 248, 15, 0, 255, 128, 0, 128, 255, 255, 127, 0, 127, 127, 127, 127, + 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 230, 224, 255, 255, 255, + 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, 255, 31, 255, 255, + 255, 15, 0, 0, 255, 255, 240, 191, 0, 0, 128, 255, 252, 255, 255, 255, + 255, 121, 255, 255, 255, 63, 3, 0, 255, 0, 0, 0, 31, 0, 255, 3, + 255, 255, 255, 8, 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, + 255, 255, 127, 252, 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, + 127, 127, 255, 255, 48, 0, 0, 0, 255, 55, 255, 3, 15, 0, 255, 255, + 127, 248, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, 255, 253, 127, 95, + 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, 255, 63, 24, 0, + 0, 224, 0, 0, 0, 0, 138, 170, 252, 252, 252, 28, 255, 239, 255, 255, + 127, 255, 255, 183, 255, 63, 255, 63, 0, 0, 0, 32, 255, 255, 1, 0, + 1, 0, 0, 0, 15, 255, 62, 0, 255, 0, 255, 255, 15, 0, 0, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, - 255, 255, 15, 135, 255, 255, 7, 0, 127, 0, 0, 0, 255, 1, 255, 3, - 255, 255, 223, 255, 7, 0, 0, 0, 255, 255, 255, 1, 31, 0, 255, 255, - 0, 128, 255, 255, 3, 0, 0, 0, 224, 227, 7, 248, 231, 15, 0, 0, - 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, - 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, - 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 247, 207, 255, 255, - 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, - 238, 251, 255, 15, + 255, 255, 15, 135, 127, 0, 0, 0, 192, 255, 0, 128, 255, 1, 255, 3, + 255, 255, 223, 255, 255, 255, 79, 0, 31, 0, 255, 7, 255, 255, 251, 255, + 255, 7, 255, 3, 159, 57, 128, 224, 207, 31, 31, 0, 191, 0, 255, 3, + 255, 255, 63, 255, 17, 0, 255, 3, 255, 3, 0, 128, 255, 255, 255, 1, + 15, 0, 255, 3, 248, 255, 255, 224, 31, 0, 255, 255, 0, 128, 255, 255, + 3, 0, 0, 0, 255, 7, 255, 31, 255, 1, 255, 99, 224, 227, 7, 248, + 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 255, 223, + 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, + 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, + 247, 207, 255, 255, 31, 0, 127, 0, 150, 254, 247, 10, 132, 234, 150, 170, + 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, }; -/* XID_Continue: 1902 bytes. */ +/* XID_Continue: 2078 bytes. */ RE_UINT32 re_get_xid_continue(RE_UINT32 ch) { RE_UINT32 code; @@ -6324,25 +6769,25 @@ RE_UINT32 re_get_xid_continue(RE_UINT32 ch) { /* Default_Ignorable_Code_Point. */ static RE_UINT8 re_default_ignorable_code_point_stage_1[] = { - 0, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, + 0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, + 2, 2, }; static RE_UINT8 re_default_ignorable_code_point_stage_2[] = { 0, 1, 2, 3, 4, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 7, 1, 1, 1, 1, 1, - 8, 8, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 8, 1, 1, 1, 1, 1, + 9, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_default_ignorable_code_point_stage_3[] = { 0, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 9, 10, 1, 11, 1, 1, 1, 1, 1, 1, - 12, 12, 12, 12, 12, 12, 12, 12, + 1, 1, 1, 1, 1, 1, 9, 10, 1, 1, 1, 1, 11, 1, 1, 1, + 1, 12, 1, 1, 1, 1, 1, 1, 13, 13, 13, 13, 13, 13, 13, 13, }; static RE_UINT8 re_default_ignorable_code_point_stage_4[] = { @@ -6351,18 +6796,18 @@ static RE_UINT8 re_default_ignorable_code_point_stage_4[] = { 0, 0, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 8, 9, 0, 10, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 5, 0, 12, 0, 0, 0, 13, 0, 0, 0, 0, - 14, 14, 14, 14, 14, 14, 14, 14, + 0, 0, 0, 0, 0, 5, 0, 12, 0, 0, 0, 0, 0, 13, 0, 0, + 0, 0, 0, 14, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 15, 15, }; static RE_UINT8 re_default_ignorable_code_point_stage_5[] = { 0, 0, 0, 0, 0, 32, 0, 0, 0, 128, 0, 0, 0, 0, 0, 16, 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 48, 0, 0, 120, 0, 0, 0, 248, 0, 0, 0, 124, 0, 0, 255, 255, 0, 0, 16, 0, 0, 0, - 0, 0, 255, 1, 0, 0, 248, 7, 255, 255, 255, 255, + 0, 0, 255, 1, 15, 0, 0, 0, 0, 0, 248, 7, 255, 255, 255, 255, }; -/* Default_Ignorable_Code_Point: 344 bytes. */ +/* Default_Ignorable_Code_Point: 370 bytes. */ RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch) { RE_UINT32 code; @@ -6370,9 +6815,9 @@ RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch) { RE_UINT32 pos; RE_UINT32 value; - f = ch >> 14; - code = ch ^ (f << 14); - pos = (RE_UINT32)re_default_ignorable_code_point_stage_1[f] << 3; + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_default_ignorable_code_point_stage_1[f] << 4; f = code >> 11; code ^= f << 11; pos = (RE_UINT32)re_default_ignorable_code_point_stage_2[pos + f] << 3; @@ -6400,9 +6845,9 @@ static RE_UINT8 re_grapheme_extend_stage_2[] = { 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 11, 12, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, 16, 7, 7, 17, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 16, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 18, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, }; static RE_UINT8 re_grapheme_extend_stage_3[] = { @@ -6411,10 +6856,11 @@ static RE_UINT8 re_grapheme_extend_stage_3[] = { 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 28, 29, 30, 31, 0, 0, 0, 0, - 0, 0, 0, 32, 0, 0, 33, 34, 0, 35, 0, 0, 0, 0, 0, 0, - 0, 0, 36, 0, 0, 0, 0, 0, 37, 38, 0, 0, 0, 0, 39, 0, - 0, 0, 0, 0, 0, 0, 0, 40, 0, 41, 42, 0, 0, 0, 0, 0, - 0, 43, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 32, 0, 0, 33, 34, 0, 35, 36, 37, 0, 0, 0, 0, + 0, 0, 38, 0, 0, 0, 0, 0, 39, 40, 41, 42, 43, 44, 45, 0, + 0, 0, 46, 47, 0, 0, 0, 48, 0, 0, 0, 0, 49, 0, 0, 0, + 0, 50, 51, 0, 0, 0, 0, 0, 52, 0, 0, 0, 0, 0, 0, 0, + 0, 53, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_grapheme_extend_stage_4[] = { @@ -6423,23 +6869,28 @@ static RE_UINT8 re_grapheme_extend_stage_4[] = { 7, 0, 8, 9, 0, 0, 10, 11, 12, 13, 14, 0, 0, 15, 0, 16, 17, 18, 19, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 24, 28, 29, 30, 31, 28, 29, 32, 24, 25, 33, 34, 24, 35, 36, 37, 0, - 0, 38, 39, 24, 0, 40, 41, 24, 0, 36, 27, 24, 0, 0, 42, 0, - 0, 43, 44, 0, 0, 45, 46, 0, 47, 48, 0, 49, 50, 51, 52, 0, - 0, 53, 54, 55, 56, 0, 0, 0, 0, 0, 57, 0, 0, 0, 0, 0, - 58, 58, 59, 59, 0, 60, 61, 0, 62, 0, 0, 0, 0, 63, 0, 0, - 0, 64, 0, 0, 0, 0, 0, 0, 65, 0, 66, 67, 0, 0, 0, 0, - 68, 69, 35, 16, 70, 71, 0, 72, 0, 73, 0, 0, 0, 0, 74, 75, - 0, 0, 0, 0, 0, 0, 1, 76, 77, 0, 0, 0, 0, 0, 13, 78, - 0, 0, 0, 0, 0, 0, 0, 79, 0, 0, 0, 80, 0, 0, 0, 1, - 0, 81, 0, 0, 82, 0, 0, 0, 0, 0, 0, 83, 80, 0, 0, 84, - 85, 86, 0, 0, 0, 0, 87, 88, 0, 89, 90, 0, 21, 91, 0, 0, - 0, 92, 93, 0, 0, 94, 25, 95, 0, 0, 0, 0, 0, 0, 0, 96, - 36, 0, 0, 0, 0, 0, 0, 0, 2, 97, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98, - 99, 100, 0, 0, 0, 0, 0, 0, 25, 101, 97, 0, 70, 102, 0, 0, - 21, 103, 0, 0, 70, 104, 0, 0, 0, 0, 0, 0, 0, 105, 0, 0, - 0, 0, 0, 0, 106, 0, 0, 0, 0, 0, 0, 107, 108, 109, 0, 0, - 0, 0, 110, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, + 38, 39, 40, 24, 25, 41, 42, 24, 25, 36, 27, 24, 0, 0, 43, 0, + 0, 44, 45, 0, 0, 46, 47, 0, 48, 49, 0, 50, 51, 52, 53, 0, + 0, 54, 55, 56, 57, 0, 0, 0, 0, 0, 58, 0, 0, 0, 0, 0, + 59, 59, 60, 60, 0, 61, 62, 0, 63, 0, 0, 0, 0, 64, 0, 0, + 0, 65, 0, 0, 0, 0, 0, 0, 66, 0, 67, 68, 0, 69, 0, 0, + 70, 71, 35, 16, 72, 73, 0, 74, 0, 75, 0, 0, 0, 0, 76, 77, + 0, 0, 0, 0, 0, 0, 1, 78, 79, 0, 0, 0, 0, 0, 13, 80, + 0, 0, 0, 0, 0, 0, 0, 81, 0, 0, 0, 82, 0, 0, 0, 1, + 0, 83, 0, 0, 84, 0, 0, 0, 0, 0, 0, 85, 82, 0, 0, 86, + 87, 88, 0, 0, 0, 0, 89, 90, 0, 91, 92, 0, 21, 93, 0, 94, + 0, 95, 96, 29, 0, 97, 25, 98, 0, 0, 0, 0, 0, 0, 0, 99, + 36, 0, 0, 0, 0, 0, 0, 0, 2, 100, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101, + 0, 0, 0, 0, 0, 0, 0, 38, 0, 0, 0, 102, 0, 0, 0, 0, + 103, 104, 0, 0, 0, 0, 0, 88, 25, 105, 106, 82, 72, 107, 0, 0, + 21, 108, 0, 109, 72, 110, 0, 0, 0, 111, 0, 0, 0, 0, 82, 112, + 25, 26, 113, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 116, 0, + 0, 0, 0, 0, 0, 117, 38, 0, 0, 118, 38, 0, 0, 119, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 120, 0, 121, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 122, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, + 0, 0, 0, 124, 125, 126, 0, 0, 0, 0, 127, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 121, 0, 1, 1, 1, 1, 1, 1, 1, 2, }; static RE_UINT8 re_grapheme_extend_stage_5[] = { @@ -6448,32 +6899,36 @@ static RE_UINT8 re_grapheme_extend_stage_5[] = { 0, 248, 255, 255, 0, 0, 1, 0, 0, 0, 192, 159, 159, 61, 0, 0, 0, 0, 2, 0, 0, 0, 255, 255, 255, 7, 0, 0, 192, 255, 1, 0, 0, 248, 15, 0, 0, 0, 192, 251, 239, 62, 0, 0, 0, 0, 0, 14, - 240, 255, 255, 127, 7, 0, 0, 0, 0, 0, 0, 20, 254, 33, 254, 0, + 240, 255, 255, 255, 7, 0, 0, 0, 0, 0, 0, 20, 254, 33, 254, 0, 12, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 80, 30, 32, 128, 0, 6, 0, 0, 0, 0, 0, 0, 16, 134, 57, 2, 0, 0, 0, 35, 0, 190, 33, 0, 0, 0, 0, 0, 208, 30, 32, 192, 0, 4, 0, 0, 0, - 0, 0, 0, 64, 1, 32, 128, 0, 0, 0, 0, 192, 193, 61, 96, 0, - 0, 0, 0, 144, 68, 48, 96, 0, 0, 132, 92, 128, 0, 0, 242, 7, - 128, 127, 0, 0, 0, 0, 242, 27, 0, 63, 0, 0, 0, 0, 0, 3, - 0, 0, 160, 2, 0, 0, 254, 127, 223, 224, 255, 254, 255, 255, 255, 31, - 64, 0, 0, 0, 0, 224, 253, 102, 0, 0, 0, 195, 1, 0, 30, 0, - 100, 32, 0, 32, 0, 0, 0, 224, 0, 0, 28, 0, 0, 0, 12, 0, - 0, 0, 176, 63, 64, 254, 15, 32, 0, 56, 0, 0, 0, 2, 0, 0, - 135, 1, 4, 14, 0, 0, 128, 9, 0, 0, 64, 127, 229, 31, 248, 159, - 15, 0, 0, 0, 0, 0, 208, 23, 3, 0, 0, 0, 60, 11, 0, 0, - 64, 163, 3, 0, 0, 240, 207, 0, 0, 0, 247, 255, 253, 33, 16, 0, - 127, 0, 0, 240, 0, 48, 0, 0, 255, 255, 1, 0, 0, 128, 3, 0, - 0, 0, 0, 128, 0, 252, 0, 0, 0, 0, 0, 6, 0, 128, 247, 63, - 0, 0, 3, 0, 68, 8, 0, 0, 96, 0, 0, 0, 16, 0, 0, 0, - 255, 255, 3, 0, 192, 63, 0, 0, 128, 255, 3, 0, 0, 0, 200, 19, - 0, 126, 102, 0, 8, 16, 0, 0, 0, 0, 157, 193, 0, 48, 64, 0, - 32, 33, 0, 0, 127, 0, 0, 0, 0, 0, 0, 32, 110, 240, 0, 0, - 0, 0, 0, 135, 0, 0, 0, 255, 0, 0, 120, 6, 128, 239, 31, 0, - 0, 0, 192, 127, 0, 40, 191, 0, 0, 128, 7, 0, 160, 195, 7, 248, - 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, + 0, 0, 0, 64, 1, 32, 128, 0, 1, 0, 0, 0, 0, 0, 0, 192, + 193, 61, 96, 0, 0, 0, 0, 144, 68, 48, 96, 0, 0, 132, 92, 128, + 0, 0, 242, 7, 128, 127, 0, 0, 0, 0, 242, 27, 0, 63, 0, 0, + 0, 0, 0, 3, 0, 0, 160, 2, 0, 0, 254, 127, 223, 224, 255, 254, + 255, 255, 255, 31, 64, 0, 0, 0, 0, 224, 253, 102, 0, 0, 0, 195, + 1, 0, 30, 0, 100, 32, 0, 32, 0, 0, 0, 224, 0, 0, 28, 0, + 0, 0, 12, 0, 0, 0, 176, 63, 64, 254, 15, 32, 0, 56, 0, 0, + 0, 2, 0, 0, 135, 1, 4, 14, 0, 0, 128, 9, 0, 0, 64, 127, + 229, 31, 248, 159, 0, 0, 255, 127, 15, 0, 0, 0, 0, 0, 208, 23, + 3, 0, 0, 0, 60, 59, 0, 0, 64, 163, 3, 0, 0, 240, 207, 0, + 0, 0, 247, 255, 253, 33, 16, 3, 255, 255, 63, 240, 0, 48, 0, 0, + 255, 255, 1, 0, 0, 128, 3, 0, 0, 0, 0, 128, 0, 252, 0, 0, + 0, 0, 0, 6, 0, 128, 247, 63, 0, 0, 3, 0, 68, 8, 0, 0, + 96, 0, 0, 0, 16, 0, 0, 0, 255, 255, 3, 0, 192, 63, 0, 0, + 128, 255, 3, 0, 0, 0, 200, 19, 32, 0, 0, 0, 0, 126, 102, 0, + 8, 16, 0, 0, 0, 0, 157, 193, 0, 48, 64, 0, 32, 33, 0, 0, + 255, 63, 0, 0, 0, 0, 0, 32, 0, 0, 192, 7, 110, 240, 0, 0, + 0, 0, 0, 135, 0, 0, 0, 255, 127, 0, 0, 0, 0, 0, 120, 6, + 128, 239, 31, 0, 0, 0, 8, 0, 0, 0, 192, 127, 0, 128, 211, 0, + 248, 7, 0, 0, 1, 0, 128, 0, 192, 31, 31, 0, 0, 0, 249, 165, + 13, 0, 0, 0, 0, 128, 60, 176, 0, 0, 248, 167, 0, 40, 191, 0, + 0, 0, 31, 0, 0, 0, 127, 0, 0, 128, 7, 0, 0, 0, 0, 96, + 160, 195, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, }; -/* Grapheme_Extend: 1062 bytes. */ +/* Grapheme_Extend: 1226 bytes. */ RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch) { RE_UINT32 code; @@ -6502,158 +6957,175 @@ RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch) { /* Grapheme_Base. */ static RE_UINT8 re_grapheme_base_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 10, 12, 13, 14, + 3, 3, 3, 3, 3, 15, 10, 16, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, }; static RE_UINT8 re_grapheme_base_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 24, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 25, 7, 26, 27, 13, 13, 13, 13, 13, 13, 13, 28, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 10, 10, 19, 20, 21, 22, 23, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 24, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 25, + 10, 10, 26, 27, 28, 29, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 30, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 31, 31, + 10, 50, 51, 31, 31, 31, 31, 31, 10, 10, 52, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 10, 53, 31, 54, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 55, 31, 31, 31, 31, 31, 56, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 57, 58, 59, 60, 31, 31, 31, 31, + 31, 31, 31, 31, 61, 31, 31, 62, 63, 64, 65, 66, 67, 31, 31, 31, + 10, 10, 10, 68, 10, 10, 10, 10, 10, 10, 10, 69, 70, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 10, 70, 31, 31, }; static RE_UINT8 re_grapheme_base_stage_3[] = { - 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 1, 16, 17, 1, 1, 18, 19, 20, 21, 22, 23, 24, 25, 1, 26, - 27, 28, 1, 29, 30, 1, 1, 31, 1, 1, 1, 32, 33, 34, 35, 36, - 37, 38, 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41, - 1, 1, 1, 1, 42, 1, 43, 44, 45, 46, 47, 48, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 49, 50, 50, 50, 50, 50, 50, 50, 50, - 50, 1, 51, 52, 1, 53, 54, 55, 56, 57, 58, 59, 60, 50, 50, 50, - 61, 62, 63, 64, 65, 50, 66, 50, 67, 68, 50, 50, 50, 50, 69, 50, - 1, 1, 1, 70, 71, 50, 50, 50, 1, 1, 1, 1, 72, 50, 50, 50, - 1, 1, 73, 50, 50, 50, 50, 74, 75, 50, 50, 50, 50, 50, 50, 50, - 76, 77, 78, 79, 80, 81, 82, 83, 50, 50, 50, 50, 50, 50, 84, 50, - 85, 86, 87, 88, 89, 90, 91, 92, 1, 1, 1, 1, 1, 1, 93, 1, - 1, 1, 1, 1, 1, 1, 1, 94, 95, 50, 50, 50, 50, 50, 50, 50, - 1, 1, 95, 50, 50, 50, 50, 50, + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 3, + 3, 3, 7, 3, 8, 9, 10, 11, 12, 13, 3, 14, 15, 16, 17, 18, + 19, 20, 21, 4, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 3, 3, 3, 3, 3, 54, 55, 56, 57, 58, 59, 60, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 61, 62, 63, 64, 65, 66, + 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 4, 78, 79, 80, 81, + 82, 83, 4, 84, 3, 3, 3, 4, 3, 3, 3, 3, 85, 86, 87, 88, + 89, 90, 91, 4, 3, 3, 92, 3, 3, 3, 3, 3, 3, 3, 3, 93, + 94, 95, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 96, 97, 98, + 99, 100, 3, 101, 102, 103, 104, 105, 3, 106, 107, 108, 3, 3, 3, 109, + 110, 111, 112, 3, 113, 3, 114, 115, 100, 3, 3, 1, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 70, 3, 3, 3, 3, 3, 3, 3, 3, 116, + 3, 3, 117, 118, 3, 3, 3, 3, 119, 120, 121, 122, 3, 3, 123, 124, + 125, 68, 3, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 4, 137, + 3, 3, 3, 3, 3, 3, 115, 138, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 3, 3, 3, 3, 3, 139, 3, 140, 141, 142, 3, 143, + 3, 3, 3, 3, 3, 144, 145, 146, 147, 148, 3, 149, 111, 3, 150, 151, + 152, 153, 3, 93, 154, 3, 155, 156, 4, 4, 61, 157, 115, 158, 159, 160, + 3, 3, 161, 4, 162, 163, 4, 4, 3, 3, 3, 3, 164, 165, 4, 4, + 166, 167, 168, 4, 169, 4, 170, 4, 171, 172, 173, 174, 175, 176, 177, 4, + 3, 178, 4, 4, 4, 4, 4, 4, 4, 179, 4, 4, 4, 4, 4, 4, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 4, 189, 190, 191, 192, 4, 4, + 4, 4, 193, 194, 4, 4, 195, 196, 197, 198, 199, 200, 4, 4, 4, 4, + 4, 4, 0, 201, 4, 4, 4, 4, 4, 4, 4, 62, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 202, 4, 3, 203, 4, 4, 4, 4, 4, 4, + 204, 4, 4, 4, 4, 4, 4, 4, 62, 205, 4, 206, 207, 208, 209, 4, + 4, 4, 4, 4, 3, 210, 211, 4, 212, 4, 4, 4, 4, 4, 4, 4, + 3, 213, 214, 4, 4, 4, 4, 4, 3, 3, 3, 70, 215, 216, 217, 218, + 3, 219, 4, 4, 3, 220, 4, 4, 3, 221, 222, 223, 224, 225, 3, 3, + 3, 3, 226, 3, 3, 3, 3, 227, 3, 3, 3, 228, 4, 4, 4, 4, + 229, 230, 231, 232, 4, 4, 4, 4, 73, 3, 233, 234, 235, 73, 236, 237, + 238, 239, 4, 4, 240, 241, 3, 242, 3, 3, 3, 1, 3, 243, 244, 3, + 3, 245, 3, 246, 3, 108, 3, 247, 248, 249, 250, 4, 4, 4, 4, 4, + 3, 3, 3, 251, 3, 3, 3, 3, 3, 3, 3, 3, 60, 3, 3, 3, + 218, 4, 4, 4, 4, 4, 4, 4, }; static RE_UINT8 re_grapheme_base_stage_4[] = { - 0, 1, 1, 2, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 4, 5, 6, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, - 1, 8, 9, 10, 11, 12, 13, 14, 15, 1, 16, 17, 1, 1, 18, 19, - 20, 21, 22, 1, 1, 23, 1, 24, 25, 26, 27, 0, 0, 28, 0, 0, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 33, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 56, 60, 61, 62, 63, 64, 65, 66, 10, 67, 68, 0, 69, 70, 71, 0, - 72, 73, 74, 75, 76, 77, 78, 0, 1, 79, 80, 81, 82, 1, 83, 1, - 1, 1, 84, 1, 85, 86, 87, 1, 88, 1, 89, 90, 91, 1, 1, 92, - 1, 1, 1, 1, 90, 1, 1, 93, 94, 95, 96, 97, 1, 98, 99, 100, - 101, 1, 1, 102, 1, 103, 1, 104, 90, 105, 106, 107, 1, 108, 109, 1, - 110, 1, 111, 112, 100, 113, 0, 0, 114, 115, 116, 117, 118, 119, 1, 120, - 1, 121, 122, 1, 0, 0, 123, 124, 1, 1, 1, 1, 1, 1, 0, 0, - 125, 1, 126, 127, 1, 128, 129, 130, 131, 132, 1, 133, 134, 89, 0, 0, - 1, 1, 1, 1, 135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 136, - 1, 137, 16, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 138, 0, 0, 0, 0, 0, 1, 139, 2, 1, 1, 1, 1, 140, - 1, 83, 1, 141, 142, 143, 143, 0, 1, 144, 0, 0, 145, 1, 1, 136, - 1, 1, 1, 1, 1, 1, 104, 146, 1, 135, 10, 1, 147, 1, 1, 1, - 148, 149, 1, 1, 139, 89, 1, 150, 2, 1, 1, 1, 1, 1, 1, 2, - 1, 1, 1, 1, 1, 104, 1, 1, 1, 1, 1, 1, 1, 1, 151, 0, - 1, 1, 1, 1, 152, 1, 153, 1, 1, 154, 1, 155, 102, 1, 1, 156, - 1, 1, 1, 1, 157, 16, 0, 158, 159, 160, 1, 102, 1, 1, 161, 162, - 1, 163, 164, 90, 29, 165, 166, 0, 1, 167, 168, 144, 1, 169, 170, 171, - 172, 173, 0, 0, 0, 0, 1, 174, 1, 1, 1, 1, 1, 150, 175, 144, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 176, 1, 1, 91, 0, - 177, 178, 179, 1, 1, 1, 180, 1, 1, 1, 181, 1, 182, 1, 183, 184, - 185, 181, 186, 187, 1, 1, 1, 90, 10, 1, 1, 1, 127, 2, 188, 189, - 190, 191, 192, 0, 1, 1, 1, 89, 193, 194, 1, 1, 195, 0, 181, 90, - 0, 0, 0, 0, 90, 1, 93, 0, 2, 150, 16, 0, 196, 1, 197, 0, - 1, 1, 1, 1, 127, 198, 0, 0, 199, 200, 201, 0, 0, 0, 0, 0, - 202, 203, 0, 0, 1, 204, 0, 0, 205, 136, 206, 1, 0, 0, 0, 0, - 1, 207, 208, 209, 0, 0, 0, 0, 1, 1, 210, 0, 0, 0, 0, 0, - 0, 0, 0, 2, 0, 0, 0, 0, 211, 102, 212, 21, 118, 213, 214, 215, - 29, 216, 217, 0, 118, 218, 215, 0, 0, 0, 0, 0, 1, 219, 198, 0, - 1, 1, 1, 220, 0, 0, 0, 0, 1, 1, 1, 221, 0, 0, 0, 0, - 1, 220, 0, 0, 0, 0, 0, 0, 1, 222, 0, 0, 0, 0, 0, 0, - 1, 1, 223, 2, 224, 0, 0, 0, 225, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 104, 1, 226, 1, 227, 228, 229, 127, 0, - 1, 1, 230, 0, 0, 0, 0, 0, 1, 1, 142, 96, 0, 0, 0, 0, - 1, 1, 128, 1, 231, 232, 233, 1, 234, 235, 236, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 237, 1, 1, 1, 1, 1, 1, 1, 1, 238, 1, - 233, 239, 240, 241, 242, 243, 0, 244, 1, 108, 1, 1, 136, 245, 246, 0, - 131, 139, 1, 108, 89, 0, 0, 247, 248, 89, 249, 0, 0, 0, 0, 0, - 1, 250, 1, 90, 136, 1, 251, 93, 1, 2, 211, 1, 1, 1, 1, 252, - 1, 127, 150, 183, 0, 0, 0, 253, 1, 1, 254, 0, 1, 1, 255, 0, - 1, 1, 1, 136, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 142, 0, - 1, 92, 1, 1, 1, 1, 1, 1, 127, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 1, 1, 1, 1, 2, 0, 0, 3, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 4, 5, 1, 6, 1, 7, 1, 1, 1, + 1, 1, 1, 8, 1, 9, 8, 1, 10, 0, 0, 11, 12, 1, 13, 14, + 15, 16, 1, 1, 13, 0, 1, 8, 1, 17, 18, 1, 19, 20, 1, 0, + 21, 1, 1, 1, 1, 1, 22, 23, 1, 1, 13, 24, 1, 25, 26, 2, + 1, 27, 0, 0, 0, 0, 1, 28, 29, 1, 1, 30, 31, 32, 33, 1, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 35, 36, 43, 44, 45, 15, 46, + 47, 6, 36, 48, 49, 44, 40, 50, 51, 35, 36, 52, 53, 39, 40, 54, + 55, 56, 57, 58, 59, 44, 15, 13, 60, 20, 36, 61, 62, 63, 40, 64, + 65, 20, 36, 66, 67, 11, 40, 68, 65, 20, 1, 69, 70, 0, 40, 71, + 72, 73, 1, 74, 75, 76, 15, 46, 8, 1, 1, 77, 78, 41, 0, 0, + 79, 80, 81, 82, 83, 84, 0, 0, 1, 4, 1, 85, 86, 1, 87, 88, + 89, 0, 0, 90, 91, 13, 0, 0, 1, 1, 87, 92, 1, 93, 8, 94, + 95, 3, 1, 1, 96, 1, 1, 1, 97, 98, 1, 1, 97, 1, 1, 99, + 100, 101, 1, 1, 1, 100, 1, 1, 1, 13, 1, 87, 1, 102, 1, 1, + 1, 1, 1, 14, 1, 87, 1, 1, 1, 1, 1, 103, 3, 50, 1, 104, + 1, 50, 3, 44, 1, 1, 1, 105, 106, 107, 102, 102, 13, 102, 1, 1, + 1, 1, 1, 54, 1, 1, 108, 1, 1, 1, 1, 22, 1, 2, 109, 110, + 111, 1, 19, 14, 1, 1, 41, 1, 102, 112, 1, 1, 1, 113, 1, 1, + 1, 114, 115, 28, 102, 102, 19, 0, 116, 1, 1, 117, 118, 1, 13, 107, + 119, 1, 120, 1, 1, 1, 121, 122, 1, 1, 41, 123, 124, 1, 1, 1, + 54, 125, 126, 127, 1, 128, 1, 1, 128, 129, 1, 19, 1, 1, 1, 130, + 130, 131, 1, 132, 13, 1, 133, 1, 1, 1, 0, 33, 2, 87, 1, 19, + 102, 1, 1, 1, 1, 1, 1, 13, 1, 1, 75, 0, 13, 0, 1, 1, + 1, 1, 1, 134, 1, 135, 1, 124, 36, 50, 0, 0, 1, 1, 2, 1, + 1, 2, 1, 1, 1, 1, 2, 136, 1, 1, 96, 1, 1, 1, 133, 44, + 1, 75, 137, 137, 137, 137, 0, 0, 28, 0, 0, 0, 1, 138, 1, 1, + 1, 1, 1, 139, 1, 22, 0, 41, 1, 1, 102, 1, 8, 1, 1, 1, + 1, 140, 1, 1, 141, 1, 19, 8, 2, 1, 1, 13, 1, 1, 139, 1, + 87, 0, 0, 0, 87, 1, 1, 1, 75, 1, 1, 1, 1, 1, 41, 0, + 1, 1, 2, 142, 1, 19, 1, 1, 1, 1, 1, 143, 2, 1, 19, 50, + 0, 0, 0, 144, 145, 1, 146, 102, 147, 102, 0, 148, 1, 1, 149, 1, + 75, 150, 1, 87, 29, 1, 1, 151, 152, 153, 130, 2, 1, 1, 154, 155, + 156, 84, 1, 157, 1, 1, 1, 158, 159, 160, 161, 22, 162, 163, 137, 1, + 1, 1, 164, 0, 1, 1, 165, 102, 140, 1, 1, 41, 1, 1, 19, 1, + 1, 102, 0, 0, 75, 166, 1, 167, 168, 1, 1, 1, 50, 29, 1, 1, + 0, 1, 1, 1, 1, 119, 1, 1, 54, 0, 0, 19, 0, 102, 0, 1, + 1, 169, 170, 130, 1, 1, 1, 87, 1, 19, 1, 2, 171, 172, 137, 173, + 157, 1, 101, 174, 19, 19, 0, 0, 175, 1, 1, 176, 87, 41, 44, 0, + 0, 1, 1, 87, 1, 44, 8, 41, 13, 1, 1, 22, 1, 152, 1, 1, + 177, 22, 0, 0, 1, 19, 102, 0, 1, 1, 54, 1, 1, 1, 178, 0, + 1, 1, 1, 75, 1, 22, 54, 0, 179, 1, 1, 180, 1, 181, 1, 1, + 1, 2, 144, 0, 1, 182, 1, 58, 1, 1, 1, 183, 44, 184, 1, 139, + 54, 103, 1, 1, 1, 1, 0, 0, 1, 1, 185, 75, 1, 1, 1, 71, + 1, 135, 1, 186, 1, 187, 188, 0, 103, 0, 0, 0, 0, 0, 1, 2, + 20, 1, 1, 54, 189, 119, 1, 0, 119, 1, 1, 190, 50, 1, 103, 102, + 29, 1, 191, 15, 139, 1, 1, 192, 119, 1, 1, 193, 194, 13, 8, 14, + 1, 6, 2, 195, 0, 0, 0, 1, 1, 2, 28, 102, 51, 35, 36, 196, + 197, 21, 139, 0, 1, 1, 1, 198, 199, 102, 0, 0, 1, 1, 2, 200, + 201, 0, 0, 0, 1, 1, 1, 202, 62, 102, 0, 0, 1, 1, 203, 204, + 102, 0, 0, 0, 1, 1, 1, 205, 1, 103, 0, 0, 1, 1, 2, 14, + 1, 1, 2, 0, 1, 2, 153, 0, 0, 1, 19, 206, 1, 1, 1, 144, + 22, 138, 6, 207, 1, 0, 0, 0, 14, 1, 1, 2, 0, 29, 0, 0, + 50, 0, 0, 0, 1, 1, 13, 87, 103, 208, 0, 0, 1, 1, 9, 1, + 1, 1, 209, 0, 210, 1, 153, 1, 1, 19, 0, 0, 211, 0, 0, 0, + 1, 75, 1, 50, 1, 130, 1, 1, 1, 3, 212, 30, 213, 1, 1, 1, + 214, 215, 1, 216, 217, 20, 1, 1, 1, 1, 135, 1, 161, 1, 1, 1, + 218, 0, 0, 0, 213, 1, 219, 220, 221, 222, 223, 224, 138, 41, 225, 41, + 0, 0, 0, 50, 1, 139, 2, 8, 8, 8, 1, 22, 87, 1, 2, 1, + 1, 13, 0, 0, 0, 0, 15, 1, 28, 1, 1, 13, 103, 50, 0, 0, + 1, 1, 87, 1, 1, 1, 1, 19, 2, 116, 1, 54, 13, 1, 1, 138, + 1, 1, 213, 1, 226, 1, 1, 1, 1, 0, 87, 139, 1, 14, 0, 0, + 41, 1, 1, 1, 54, 102, 1, 1, 54, 1, 19, 0, 1, 75, 0, 0, }; static RE_UINT8 re_grapheme_base_stage_5[] = { - 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 223, 255, 255, - 0, 0, 255, 124, 240, 215, 255, 255, 251, 255, 255, 255, 7, 252, 255, 255, - 255, 0, 254, 255, 255, 255, 127, 254, 254, 255, 255, 255, 255, 134, 0, 0, - 0, 0, 0, 64, 73, 0, 255, 255, 255, 7, 31, 0, 192, 255, 0, 200, - 255, 7, 0, 0, 255, 255, 254, 255, 255, 255, 63, 64, 96, 194, 255, 255, - 255, 63, 253, 255, 255, 255, 0, 0, 0, 224, 255, 255, 63, 0, 2, 0, - 255, 7, 240, 7, 255, 255, 63, 4, 16, 1, 255, 127, 255, 255, 255, 65, - 253, 31, 0, 0, 248, 255, 255, 255, 255, 255, 255, 235, 1, 222, 1, 255, - 243, 255, 255, 254, 236, 159, 249, 255, 255, 253, 197, 163, 129, 89, 0, 176, - 195, 255, 255, 15, 232, 135, 249, 255, 255, 253, 109, 195, 1, 0, 0, 94, - 192, 255, 28, 0, 232, 191, 251, 255, 255, 253, 237, 227, 1, 26, 1, 0, - 195, 255, 3, 0, 255, 253, 237, 35, 129, 25, 0, 176, 195, 255, 255, 0, - 232, 199, 61, 214, 24, 199, 255, 131, 198, 29, 1, 0, 192, 255, 255, 7, - 238, 223, 253, 255, 255, 253, 239, 35, 30, 0, 0, 3, 195, 255, 0, 255, - 236, 223, 253, 255, 255, 253, 239, 99, 155, 13, 0, 64, 195, 255, 6, 0, - 255, 255, 255, 167, 193, 93, 0, 0, 195, 255, 63, 254, 236, 255, 127, 252, - 255, 255, 251, 47, 127, 0, 3, 127, 0, 0, 28, 0, 255, 255, 13, 128, - 127, 128, 255, 15, 150, 37, 240, 254, 174, 236, 13, 32, 95, 0, 255, 243, - 255, 255, 255, 252, 255, 255, 95, 253, 255, 254, 255, 255, 255, 31, 0, 128, - 32, 31, 0, 0, 0, 0, 0, 192, 191, 223, 255, 7, 255, 31, 2, 153, - 255, 255, 255, 60, 254, 255, 225, 255, 155, 223, 255, 223, 191, 32, 255, 255, - 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, - 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 255, 31, 255, 255, 255, 3, - 255, 255, 31, 0, 255, 255, 1, 0, 255, 223, 3, 0, 255, 255, 99, 0, - 255, 255, 3, 0, 255, 223, 1, 0, 255, 255, 79, 192, 191, 1, 240, 31, - 255, 3, 255, 3, 255, 7, 255, 3, 255, 255, 255, 0, 255, 5, 255, 255, - 255, 255, 63, 0, 120, 14, 251, 1, 241, 255, 255, 255, 255, 63, 31, 0, - 255, 15, 255, 255, 255, 3, 255, 199, 255, 255, 127, 198, 255, 255, 191, 0, - 26, 224, 7, 0, 255, 63, 0, 0, 240, 255, 255, 255, 255, 255, 47, 232, - 251, 15, 255, 255, 255, 7, 240, 31, 252, 255, 255, 255, 195, 244, 255, 255, - 191, 92, 12, 240, 255, 15, 48, 248, 255, 227, 255, 255, 255, 0, 8, 0, - 2, 222, 111, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 255, 63, - 255, 255, 223, 255, 223, 255, 207, 239, 255, 255, 220, 127, 255, 7, 255, 255, - 255, 128, 255, 255, 0, 0, 243, 255, 255, 127, 255, 31, 255, 3, 255, 255, - 255, 255, 15, 0, 127, 0, 0, 0, 255, 31, 255, 3, 255, 127, 255, 255, - 255, 127, 12, 254, 255, 128, 1, 0, 255, 255, 127, 0, 127, 127, 127, 127, - 255, 255, 255, 15, 255, 255, 255, 251, 0, 0, 255, 15, 255, 255, 127, 248, - 224, 255, 255, 255, 255, 63, 254, 255, 15, 0, 255, 255, 255, 31, 0, 0, - 255, 31, 255, 255, 127, 0, 255, 255, 255, 15, 0, 0, 255, 127, 8, 192, - 255, 255, 252, 0, 255, 127, 15, 0, 0, 0, 0, 255, 187, 247, 255, 255, - 159, 15, 255, 3, 15, 192, 255, 3, 0, 0, 252, 15, 63, 192, 255, 255, - 127, 0, 12, 128, 255, 255, 55, 236, 255, 191, 255, 195, 255, 129, 25, 0, - 247, 47, 255, 243, 255, 255, 98, 62, 5, 0, 0, 248, 255, 207, 63, 0, - 126, 126, 126, 0, 127, 127, 0, 0, 223, 30, 255, 3, 127, 248, 255, 255, - 255, 63, 255, 255, 127, 0, 248, 160, 255, 255, 127, 95, 219, 255, 255, 255, - 3, 0, 248, 255, 0, 0, 255, 255, 255, 255, 252, 255, 255, 0, 0, 0, - 0, 0, 255, 63, 0, 0, 255, 3, 255, 255, 247, 255, 127, 15, 223, 255, - 252, 252, 252, 28, 127, 127, 0, 48, 255, 239, 255, 255, 127, 255, 255, 183, - 255, 63, 255, 63, 135, 255, 255, 255, 255, 255, 143, 255, 255, 7, 255, 15, - 255, 255, 255, 191, 15, 255, 63, 0, 255, 3, 0, 0, 63, 253, 255, 255, - 255, 255, 191, 145, 255, 255, 191, 255, 255, 255, 255, 143, 255, 255, 255, 131, - 255, 255, 255, 192, 1, 0, 239, 254, 255, 0, 255, 1, 255, 255, 63, 254, - 255, 255, 63, 255, 255, 255, 7, 255, 255, 1, 0, 0, 253, 255, 255, 255, - 128, 63, 252, 255, 255, 255, 135, 217, 3, 0, 255, 255, 255, 1, 255, 3, - 127, 16, 192, 255, 15, 0, 0, 0, 255, 255, 63, 128, 255, 215, 64, 0, - 255, 127, 0, 0, 7, 0, 15, 0, 255, 255, 255, 1, 31, 0, 255, 255, - 0, 0, 248, 255, 3, 0, 0, 0, 127, 254, 255, 255, 95, 60, 0, 0, - 24, 240, 255, 255, 255, 195, 255, 255, 35, 0, 0, 0, 255, 255, 255, 223, - 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, - 95, 252, 253, 255, 63, 255, 255, 255, 255, 207, 255, 255, 150, 254, 247, 10, - 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, - 0, 0, 3, 0, 255, 127, 254, 127, 254, 255, 254, 255, 192, 255, 255, 255, - 7, 0, 255, 255, 255, 1, 3, 0, 1, 0, 191, 255, 223, 7, 0, 0, - 255, 255, 255, 30, 0, 0, 0, 248, 225, 255, 0, 0, 63, 0, 0, 0, + 0, 0, 255, 255, 255, 127, 255, 223, 255, 252, 240, 215, 251, 255, 7, 252, + 254, 255, 127, 254, 255, 230, 0, 64, 73, 0, 255, 7, 31, 0, 192, 255, + 0, 200, 63, 64, 96, 194, 255, 63, 253, 255, 0, 224, 63, 0, 2, 0, + 240, 7, 63, 4, 16, 1, 255, 65, 7, 0, 248, 255, 255, 235, 1, 222, + 1, 255, 243, 255, 237, 159, 249, 255, 255, 253, 197, 163, 129, 89, 0, 176, + 195, 255, 255, 15, 232, 135, 109, 195, 1, 0, 0, 94, 28, 0, 232, 191, + 237, 227, 1, 26, 3, 0, 236, 159, 237, 35, 129, 25, 255, 0, 232, 199, + 61, 214, 24, 199, 255, 131, 198, 29, 238, 223, 255, 35, 30, 0, 0, 3, + 0, 255, 236, 223, 239, 99, 155, 13, 6, 0, 255, 167, 193, 93, 63, 254, + 236, 255, 127, 252, 251, 47, 127, 0, 3, 127, 13, 128, 127, 128, 150, 37, + 240, 254, 174, 236, 13, 32, 95, 0, 255, 243, 95, 253, 255, 254, 255, 31, + 0, 128, 32, 31, 0, 192, 191, 223, 2, 153, 255, 60, 225, 255, 155, 223, + 191, 32, 255, 61, 127, 61, 61, 127, 61, 255, 127, 255, 255, 3, 255, 1, + 99, 0, 79, 192, 191, 1, 240, 31, 255, 5, 120, 14, 251, 1, 241, 255, + 255, 199, 127, 198, 191, 0, 26, 224, 240, 255, 47, 232, 251, 15, 252, 255, + 195, 196, 191, 92, 12, 240, 48, 248, 255, 227, 8, 0, 2, 222, 111, 0, + 63, 63, 255, 170, 223, 255, 207, 239, 220, 127, 255, 128, 207, 255, 63, 255, + 12, 254, 127, 127, 255, 251, 15, 0, 127, 248, 224, 255, 8, 192, 252, 0, + 128, 255, 187, 247, 159, 15, 15, 192, 252, 15, 63, 192, 12, 128, 55, 236, + 255, 191, 255, 195, 255, 129, 25, 0, 247, 47, 255, 239, 98, 62, 5, 0, + 0, 248, 255, 207, 126, 126, 126, 0, 48, 0, 223, 30, 248, 160, 127, 95, + 219, 255, 247, 255, 127, 15, 252, 252, 252, 28, 0, 48, 255, 183, 135, 255, + 143, 255, 15, 255, 15, 128, 63, 253, 191, 145, 191, 255, 255, 143, 255, 192, + 239, 254, 31, 248, 7, 255, 3, 30, 0, 254, 128, 63, 135, 217, 127, 16, + 119, 0, 63, 128, 255, 33, 44, 63, 237, 163, 158, 57, 6, 90, 242, 0, + 3, 79, 254, 3, 7, 88, 255, 215, 64, 0, 7, 128, 32, 0, 255, 224, + 255, 147, 95, 60, 24, 240, 35, 0, 100, 222, 239, 255, 191, 231, 223, 223, + 255, 123, 95, 252, 159, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, + 247, 94, 238, 251, 231, 255, }; -/* Grapheme_Base: 2169 bytes. */ +/* Grapheme_Base: 2438 bytes. */ RE_UINT32 re_get_grapheme_base(RE_UINT32 ch) { RE_UINT32 code; @@ -6661,18 +7133,18 @@ RE_UINT32 re_get_grapheme_base(RE_UINT32 ch) { RE_UINT32 pos; RE_UINT32 value; - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_grapheme_base_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_grapheme_base_stage_1[f] << 4; + f = code >> 9; + code ^= f << 9; pos = (RE_UINT32)re_grapheme_base_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_grapheme_base_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_grapheme_base_stage_4[pos + f] << 5; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_grapheme_base_stage_3[pos + f] << 2; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_grapheme_base_stage_4[pos + f] << 4; pos += code; value = (re_grapheme_base_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; @@ -6682,35 +7154,37 @@ RE_UINT32 re_get_grapheme_base(RE_UINT32 ch) { /* Grapheme_Link. */ static RE_UINT8 re_grapheme_link_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, + 0, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, }; static RE_UINT8 re_grapheme_link_stage_2[] = { - 0, 1, 2, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 6, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_grapheme_link_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 4, - 5, 0, 0, 0, 0, 0, 0, 6, 0, 0, 7, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 9, 0, 0, 10, 11, 12, 13, 0, 0, 0, 0, - 0, 0, 14, 0, 0, 0, 0, 0, 15, 16, 0, 0, 0, 0, 17, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 3, 0, 0, 4, 5, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, 0, 0, 8, 0, 9, 10, + 0, 0, 11, 0, 0, 0, 0, 0, 12, 9, 13, 14, 0, 15, 0, 16, + 0, 0, 0, 0, 17, 0, 0, 0, 18, 19, 20, 14, 21, 22, 1, 0, + 0, 23, 0, 17, 17, 24, 0, 0, }; static RE_UINT8 re_grapheme_link_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 4, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, - 6, 6, 0, 0, 0, 0, 7, 0, 0, 0, 0, 8, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 9, 0, 10, 0, 0, 0, 11, 0, 0, 0, 0, - 12, 0, 0, 0, 0, 0, 4, 0, 0, 0, 13, 0, 0, 0, 8, 0, - 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 15, 0, 0, - 0, 16, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 14, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 3, 0, 0, + 4, 0, 0, 0, 0, 5, 0, 0, 6, 6, 0, 0, 0, 0, 7, 0, + 0, 0, 0, 8, 0, 0, 4, 0, 0, 9, 0, 10, 0, 0, 0, 11, + 12, 0, 0, 0, 0, 0, 13, 0, 0, 0, 8, 0, 0, 0, 0, 14, + 0, 0, 0, 1, 0, 11, 0, 0, 0, 0, 12, 11, 0, 15, 0, 0, + 0, 16, 0, 0, 0, 17, 0, 0, 0, 0, 0, 2, 0, 0, 18, 0, + 0, 14, 0, 0, }; static RE_UINT8 re_grapheme_link_stage_5[] = { @@ -6718,10 +7192,10 @@ static RE_UINT8 re_grapheme_link_stage_5[] = { 16, 0, 0, 0, 0, 0, 0, 6, 0, 0, 16, 0, 0, 0, 4, 0, 1, 0, 0, 0, 0, 12, 0, 0, 0, 0, 12, 0, 0, 0, 0, 128, 64, 0, 0, 0, 0, 0, 8, 0, 0, 0, 64, 0, 0, 0, 0, 2, - 0, 0, 24, 0, + 0, 0, 24, 0, 0, 0, 32, 0, 4, 0, 0, 0, }; -/* Grapheme_Link: 374 bytes. */ +/* Grapheme_Link: 396 bytes. */ RE_UINT32 re_get_grapheme_link(RE_UINT32 ch) { RE_UINT32 code; @@ -6729,15 +7203,15 @@ RE_UINT32 re_get_grapheme_link(RE_UINT32 ch) { RE_UINT32 pos; RE_UINT32 value; - f = ch >> 15; - code = ch ^ (f << 15); + f = ch >> 14; + code = ch ^ (f << 14); pos = (RE_UINT32)re_grapheme_link_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; + f = code >> 10; + code ^= f << 10; pos = (RE_UINT32)re_grapheme_link_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_grapheme_link_stage_3[pos + f] << 3; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_grapheme_link_stage_3[pos + f] << 2; f = code >> 5; code ^= f << 5; pos = (RE_UINT32)re_grapheme_link_stage_4[pos + f] << 5; @@ -6927,7 +7401,7 @@ static RE_UINT8 re_dash_stage_4[] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 5, 6, 7, 1, 1, 1, 1, 1, - 8, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, + 8, 1, 1, 1, 1, 1, 1, 1, 9, 3, 1, 1, 1, 1, 1, 1, 10, 1, 11, 1, 1, 1, 1, 1, 12, 13, 1, 1, 14, 1, 1, 1, }; @@ -7041,24 +7515,25 @@ static RE_UINT8 re_quotation_mark_stage_2[] = { static RE_UINT8 re_quotation_mark_stage_3[] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 4, + 2, 1, 1, 1, 1, 1, 1, 3, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 5, }; static RE_UINT8 re_quotation_mark_stage_4[] = { 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, - 1, 5, 1, 1, 6, 7, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, + 5, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 7, 8, 1, 1, }; static RE_UINT8 re_quotation_mark_stage_5[] = { 0, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 255, 0, 0, 0, 6, - 0, 240, 0, 224, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, - 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 240, 0, 224, 0, 0, 0, 0, + 30, 0, 0, 0, 0, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 12, 0, 0, 0, }; -/* Quotation_Mark: 193 bytes. */ +/* Quotation_Mark: 209 bytes. */ RE_UINT32 re_get_quotation_mark(RE_UINT32 ch) { RE_UINT32 code; @@ -7087,7 +7562,7 @@ RE_UINT32 re_get_quotation_mark(RE_UINT32 ch) { /* Terminal_Punctuation. */ static RE_UINT8 re_terminal_punctuation_stage_1[] = { - 0, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 2, 3, 4, 5, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -7099,20 +7574,23 @@ static RE_UINT8 re_terminal_punctuation_stage_2[] = { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 13, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 14, - 15, 9, 16, 9, 17, 9, 9, 9, 9, 18, 9, 9, 9, 9, 9, 9, + 15, 9, 16, 9, 17, 18, 9, 9, 9, 19, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 20, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 21, }; static RE_UINT8 re_terminal_punctuation_stage_3[] = { 0, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 4, 5, 6, 7, 8, 9, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, 1, 12, 1, - 13, 1, 1, 1, 1, 1, 14, 1, 1, 1, 1, 1, 15, 16, 1, 17, - 18, 1, 19, 1, 1, 20, 21, 1, 22, 1, 1, 1, 1, 1, 1, 1, - 23, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 24, 1, 1, 1, 25, 1, 1, 1, 1, 1, 1, 1, - 1, 26, 1, 1, 27, 28, 1, 1, 29, 30, 31, 32, 33, 34, 1, 35, - 1, 1, 1, 1, 36, 1, 37, 1, 1, 1, 1, 1, 1, 1, 1, 38, - 39, 1, 40, 1, 1, 1, 41, 1, 42, 43, 44, 45, 1, 1, 1, 1, - 46, 1, 1, 1, 1, 1, 1, 1, + 13, 1, 1, 1, 1, 1, 14, 1, 1, 1, 1, 1, 15, 16, 17, 18, + 19, 1, 20, 1, 1, 21, 22, 1, 23, 1, 1, 1, 1, 1, 1, 1, + 24, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 25, 1, 1, 1, 26, 1, 1, 1, 1, 1, 1, 1, + 1, 27, 1, 1, 28, 29, 1, 1, 30, 31, 32, 33, 34, 35, 1, 36, + 1, 1, 1, 1, 37, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 39, + 40, 1, 41, 1, 42, 43, 44, 45, 46, 47, 48, 49, 50, 1, 1, 1, + 1, 1, 1, 51, 52, 1, 1, 1, 53, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 54, 55, 56, 1, 1, 41, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_terminal_punctuation_stage_4[] = { @@ -7120,14 +7598,17 @@ static RE_UINT8 re_terminal_punctuation_stage_4[] = { 4, 0, 5, 0, 6, 0, 0, 0, 0, 0, 7, 0, 8, 0, 0, 0, 0, 0, 0, 9, 0, 10, 2, 0, 0, 0, 0, 11, 0, 0, 12, 0, 13, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 15, 0, 0, 0, 16, - 0, 0, 0, 17, 0, 0, 18, 0, 19, 0, 0, 0, 0, 0, 11, 0, - 0, 20, 0, 0, 0, 0, 21, 0, 0, 22, 0, 23, 0, 24, 25, 0, - 0, 26, 0, 0, 27, 0, 0, 0, 0, 0, 0, 23, 28, 0, 0, 0, - 0, 0, 0, 29, 0, 0, 0, 30, 0, 0, 31, 0, 0, 32, 0, 0, - 0, 0, 25, 0, 0, 0, 33, 0, 0, 0, 34, 35, 0, 0, 0, 36, - 0, 0, 37, 0, 1, 0, 0, 38, 34, 0, 39, 0, 0, 0, 40, 0, - 34, 0, 0, 0, 0, 41, 0, 0, 0, 0, 42, 0, 0, 23, 43, 0, - 0, 0, 44, 0, 0, 0, 45, 0, 0, 0, 0, 46, + 0, 0, 0, 17, 0, 18, 0, 0, 0, 0, 19, 0, 20, 0, 0, 0, + 0, 0, 11, 0, 0, 21, 0, 0, 0, 0, 22, 0, 0, 23, 0, 24, + 0, 25, 26, 0, 0, 27, 28, 0, 29, 0, 0, 0, 0, 0, 0, 24, + 30, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 32, 0, 0, 33, 0, + 0, 34, 0, 0, 0, 0, 26, 0, 0, 0, 35, 0, 0, 0, 36, 37, + 0, 0, 0, 38, 0, 0, 39, 0, 1, 0, 0, 40, 36, 0, 41, 0, + 0, 0, 42, 0, 36, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 43, + 0, 44, 0, 0, 45, 0, 0, 0, 0, 0, 46, 0, 0, 24, 47, 0, + 0, 0, 48, 0, 0, 0, 49, 0, 0, 50, 0, 0, 0, 0, 51, 0, + 0, 0, 29, 0, 0, 0, 0, 52, 0, 0, 0, 33, 0, 0, 0, 53, + 0, 54, 55, 0, }; static RE_UINT8 re_terminal_punctuation_stage_5[] = { @@ -7135,17 +7616,19 @@ static RE_UINT8 re_terminal_punctuation_stage_5[] = { 0, 2, 0, 0, 8, 0, 0, 0, 0, 16, 0, 136, 0, 0, 16, 0, 255, 23, 0, 0, 0, 0, 0, 3, 0, 0, 255, 127, 48, 0, 0, 0, 0, 0, 0, 12, 0, 225, 7, 0, 0, 12, 0, 0, 254, 1, 0, 0, - 0, 96, 0, 0, 0, 56, 0, 0, 0, 0, 112, 4, 60, 3, 0, 0, - 0, 15, 0, 0, 0, 0, 0, 236, 0, 0, 0, 248, 0, 0, 0, 192, - 0, 0, 0, 48, 128, 3, 0, 0, 0, 64, 0, 0, 6, 0, 0, 0, - 0, 224, 0, 0, 0, 0, 248, 0, 0, 0, 192, 0, 0, 192, 0, 0, - 0, 128, 0, 0, 0, 0, 0, 224, 0, 0, 0, 128, 0, 0, 3, 0, - 0, 8, 0, 0, 0, 0, 247, 0, 18, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 128, 0, 0, 0, 0, 252, 128, 63, 0, 0, 3, 0, 0, 0, - 14, 0, 0, 0, 96, 0, 0, 0, 0, 0, 15, 0, + 0, 96, 0, 0, 0, 56, 0, 0, 0, 0, 96, 0, 0, 0, 112, 4, + 60, 3, 0, 0, 0, 15, 0, 0, 0, 0, 0, 236, 0, 0, 0, 248, + 0, 0, 0, 192, 0, 0, 0, 48, 128, 3, 0, 0, 0, 64, 0, 16, + 2, 0, 0, 0, 6, 0, 0, 0, 0, 224, 0, 0, 0, 0, 248, 0, + 0, 0, 192, 0, 0, 192, 0, 0, 0, 128, 0, 0, 0, 0, 0, 224, + 0, 0, 0, 128, 0, 0, 3, 0, 0, 8, 0, 0, 0, 0, 247, 0, + 18, 0, 0, 0, 0, 0, 1, 0, 0, 0, 128, 0, 0, 0, 63, 0, + 0, 0, 0, 252, 0, 0, 0, 30, 128, 63, 0, 0, 3, 0, 0, 0, + 14, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 31, 60, 2, 0, 0, + 0, 0, 31, 0, 0, 0, 32, 0, 0, 0, 128, 3, 16, 0, 0, 0, }; -/* Terminal_Punctuation: 676 bytes. */ +/* Terminal_Punctuation: 808 bytes. */ RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch) { RE_UINT32 code; @@ -7354,26 +7837,28 @@ RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch) { /* Other_Alphabetic. */ static RE_UINT8 re_other_alphabetic_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, }; static RE_UINT8 re_other_alphabetic_stage_2[] = { 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, - 6, 10, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, + 10, 11, 12, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 14, 6, 6, 6, 6, 6, 6, 15, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, }; static RE_UINT8 re_other_alphabetic_stage_3[] = { 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 0, 0, 14, 0, 0, 0, 15, 16, 17, 18, 19, 20, 0, 0, 0, - 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, - 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, - 0, 0, 29, 0, 0, 0, 0, 0, 30, 31, 0, 0, 0, 0, 32, 0, - 0, 0, 0, 0, 0, 0, 0, 33, + 13, 0, 0, 14, 0, 0, 0, 15, 16, 17, 18, 19, 20, 21, 0, 0, + 0, 0, 0, 0, 22, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, + 25, 26, 27, 28, 0, 0, 0, 0, 0, 0, 0, 29, 0, 0, 0, 0, + 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, + 32, 33, 34, 35, 36, 37, 38, 0, 0, 0, 0, 39, 0, 0, 0, 40, + 0, 0, 0, 0, 41, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_other_alphabetic_stage_4[] = { @@ -7381,26 +7866,31 @@ static RE_UINT8 re_other_alphabetic_stage_4[] = { 0, 0, 0, 0, 0, 2, 3, 0, 4, 0, 5, 6, 0, 0, 7, 8, 9, 10, 0, 0, 0, 11, 0, 0, 12, 13, 0, 0, 0, 0, 0, 14, 15, 16, 17, 18, 19, 20, 21, 18, 19, 20, 22, 23, 19, 20, 24, 18, - 19, 20, 25, 18, 26, 20, 27, 0, 19, 20, 28, 18, 18, 20, 28, 18, - 18, 20, 29, 18, 18, 0, 30, 31, 0, 32, 33, 0, 0, 34, 33, 0, + 19, 20, 25, 18, 26, 20, 27, 0, 15, 20, 28, 18, 19, 20, 28, 18, + 19, 20, 29, 18, 18, 0, 30, 31, 0, 32, 33, 0, 0, 34, 33, 0, 0, 0, 0, 35, 36, 37, 0, 0, 0, 38, 39, 40, 41, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 31, 31, 31, 31, 0, 43, 44, 0, 0, 0, 0, 0, 0, 45, 0, 0, 0, 46, 0, 0, 0, 10, 47, 0, 48, 0, 49, 50, 0, 0, 0, 0, 51, 52, 15, 0, 53, 54, 0, 55, - 0, 56, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 43, 57, 58, - 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, 59, 42, 0, 0, 0, - 0, 60, 0, 0, 61, 62, 15, 0, 0, 63, 64, 0, 15, 62, 0, 0, - 0, 65, 66, 0, 0, 67, 0, 68, 0, 0, 0, 0, 0, 0, 0, 69, - 70, 0, 0, 0, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, - 53, 72, 73, 0, 26, 74, 0, 0, 53, 64, 0, 0, 53, 75, 0, 0, - 0, 0, 0, 0, 0, 76, 0, 0, 0, 0, 35, 77, 0, 0, 0, 0, + 0, 56, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, 57, + 0, 0, 0, 0, 0, 43, 58, 59, 0, 0, 0, 0, 0, 0, 0, 58, + 0, 0, 0, 60, 42, 0, 0, 0, 0, 61, 0, 0, 62, 63, 15, 0, + 0, 64, 65, 0, 15, 63, 0, 0, 0, 66, 67, 0, 0, 68, 0, 69, + 0, 0, 0, 0, 0, 0, 0, 70, 71, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 72, 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, 0, + 53, 74, 75, 0, 26, 76, 0, 0, 53, 65, 0, 0, 53, 77, 0, 0, + 0, 78, 0, 0, 0, 0, 42, 44, 19, 20, 21, 18, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 10, 62, 0, 0, 0, 0, 0, 0, 79, 0, 0, + 0, 80, 81, 0, 0, 82, 0, 0, 0, 83, 0, 0, 0, 0, 0, 0, + 0, 0, 35, 84, 0, 0, 0, 0, 0, 0, 0, 0, 71, 0, 0, 0, + 0, 10, 85, 85, 59, 0, 0, 0, }; static RE_UINT8 re_other_alphabetic_stage_5[] = { 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 255, 191, 182, 0, 0, 0, 0, 0, 255, 7, 0, 248, 255, 254, 0, 0, 1, 0, 0, 0, 192, 31, 158, 33, 0, 0, 0, 0, 2, 0, 0, 0, 255, 255, 192, 255, 1, 0, - 0, 0, 192, 248, 239, 30, 0, 0, 240, 3, 255, 127, 15, 0, 0, 0, + 0, 0, 192, 248, 239, 30, 0, 0, 240, 3, 255, 255, 15, 0, 0, 0, 0, 0, 0, 204, 255, 223, 224, 0, 12, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 192, 159, 25, 128, 0, 135, 25, 2, 0, 0, 0, 35, 0, 191, 27, 0, 0, 159, 25, 192, 0, 4, 0, 0, 0, 199, 29, 128, 0, @@ -7411,15 +7901,17 @@ static RE_UINT8 re_other_alphabetic_stage_5[] = { 255, 1, 0, 0, 0, 2, 0, 0, 255, 15, 255, 1, 1, 3, 0, 0, 0, 0, 128, 15, 0, 0, 224, 127, 254, 255, 31, 0, 31, 0, 0, 0, 0, 0, 224, 255, 7, 0, 0, 0, 254, 51, 0, 0, 128, 255, 3, 0, - 240, 255, 63, 0, 255, 255, 255, 255, 255, 3, 0, 0, 0, 0, 240, 15, - 248, 0, 0, 0, 3, 0, 0, 0, 0, 0, 240, 255, 192, 7, 0, 0, - 128, 255, 7, 0, 0, 254, 127, 0, 8, 48, 0, 0, 0, 0, 157, 65, - 0, 248, 32, 0, 248, 7, 0, 0, 0, 0, 0, 64, 110, 240, 0, 0, - 0, 0, 0, 255, 63, 0, 0, 0, 0, 0, 255, 1, 0, 0, 248, 255, - 0, 248, 63, 0, 255, 255, 255, 127, + 240, 255, 63, 0, 128, 255, 31, 0, 255, 255, 255, 255, 255, 3, 0, 0, + 0, 0, 240, 15, 248, 0, 0, 0, 3, 0, 0, 0, 0, 0, 240, 255, + 192, 7, 0, 0, 128, 255, 7, 0, 0, 254, 127, 0, 8, 48, 0, 0, + 0, 0, 157, 65, 0, 248, 32, 0, 248, 7, 0, 0, 0, 0, 0, 64, + 0, 0, 192, 7, 110, 240, 0, 0, 0, 0, 0, 255, 63, 0, 0, 0, + 0, 0, 255, 1, 0, 0, 248, 255, 0, 240, 159, 0, 0, 128, 63, 127, + 0, 0, 255, 127, 1, 0, 0, 0, 0, 248, 63, 0, 0, 0, 127, 0, + 255, 255, 255, 127, 255, 3, 255, 255, }; -/* Other_Alphabetic: 786 bytes. */ +/* Other_Alphabetic: 929 bytes. */ RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch) { RE_UINT32 code; @@ -7427,9 +7919,9 @@ RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch) { RE_UINT32 pos; RE_UINT32 value; - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_other_alphabetic_stage_1[f] << 4; + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_alphabetic_stage_1[f] << 5; f = code >> 11; code ^= f << 11; pos = (RE_UINT32)re_other_alphabetic_stage_2[pos + f] << 3; @@ -7518,8 +8010,8 @@ static RE_UINT8 re_diacritic_stage_1[] = { static RE_UINT8 re_diacritic_stage_2[] = { 0, 1, 2, 3, 4, 5, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, - 4, 4, 10, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12, 4, 4, 4, 4, 4, + 10, 11, 12, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 13, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 14, 4, 4, 15, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, }; @@ -7530,8 +8022,9 @@ static RE_UINT8 re_diacritic_stage_3[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 24, 1, 25, 1, 26, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 28, 29, 30, 31, 32, 1, 1, 1, 1, 1, 1, 1, 33, 1, 1, 34, 35, - 36, 37, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 39, - 1, 40, 1, 1, 1, 1, 1, 1, + 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 37, 1, 1, 1, 1, 1, + 38, 39, 40, 41, 42, 43, 44, 1, 1, 1, 45, 1, 1, 1, 1, 46, + 1, 47, 1, 1, 1, 1, 1, 1, 48, 1, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_diacritic_stage_4[] = { @@ -7544,18 +8037,22 @@ static RE_UINT8 re_diacritic_stage_4[] = { 0, 0, 25, 0, 0, 22, 25, 0, 0, 0, 25, 0, 0, 0, 26, 0, 0, 0, 27, 0, 0, 0, 28, 0, 20, 29, 0, 0, 30, 0, 31, 0, 0, 32, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 0, - 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, - 0, 37, 38, 39, 0, 40, 0, 0, 0, 41, 0, 42, 0, 0, 4, 43, - 0, 44, 5, 17, 0, 0, 45, 46, 0, 0, 0, 0, 0, 47, 48, 49, - 0, 0, 0, 0, 0, 0, 0, 50, 0, 51, 0, 0, 0, 0, 0, 0, - 0, 52, 0, 0, 53, 0, 0, 22, 0, 0, 0, 54, 0, 0, 0, 55, - 56, 57, 0, 0, 58, 0, 0, 20, 0, 0, 0, 0, 0, 0, 38, 59, - 0, 60, 61, 0, 0, 61, 2, 0, 0, 0, 0, 62, 0, 15, 63, 64, - 0, 0, 0, 0, 0, 0, 0, 65, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 66, 0, 0, 0, 0, 0, 0, 0, 1, 2, 67, 68, 0, 0, 69, - 0, 0, 0, 0, 0, 70, 0, 0, 0, 71, 0, 0, 0, 0, 2, 0, - 0, 0, 0, 0, 0, 41, 0, 0, 0, 0, 0, 0, 72, 0, 0, 0, - 0, 0, 0, 73, 74, 75, 0, 0, + 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 37, 0, 0, + 0, 38, 39, 40, 0, 41, 0, 0, 0, 42, 0, 43, 0, 0, 4, 44, + 0, 45, 5, 17, 0, 0, 46, 47, 0, 0, 0, 0, 0, 48, 49, 50, + 0, 0, 0, 0, 0, 0, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, + 0, 53, 0, 0, 54, 0, 0, 22, 0, 0, 0, 55, 56, 0, 0, 57, + 58, 59, 0, 0, 60, 0, 0, 20, 0, 0, 0, 0, 0, 0, 39, 61, + 0, 62, 63, 0, 0, 63, 2, 64, 0, 0, 0, 65, 0, 15, 66, 67, + 0, 0, 68, 0, 0, 0, 0, 69, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 70, 0, 0, 0, 0, 0, 0, 0, 1, 2, 71, 72, 0, 0, 73, + 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 74, + 0, 0, 0, 0, 0, 75, 0, 0, 0, 76, 0, 63, 0, 0, 2, 0, + 0, 77, 0, 0, 0, 0, 0, 78, 0, 22, 25, 79, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 15, 2, 0, + 0, 15, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, + 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 83, 84, 85, 0, 0, + 0, 0, 0, 0, 0, 0, 86, 0, }; static RE_UINT8 re_diacritic_stage_5[] = { @@ -7568,19 +8065,22 @@ static RE_UINT8 re_diacritic_stage_5[] = { 0, 0, 2, 0, 0, 32, 0, 0, 0, 4, 0, 0, 128, 95, 0, 0, 0, 31, 0, 0, 0, 0, 160, 194, 220, 0, 0, 0, 64, 0, 0, 0, 0, 0, 128, 6, 128, 191, 0, 12, 0, 254, 15, 32, 0, 0, 0, 14, - 0, 0, 224, 159, 0, 0, 16, 0, 16, 0, 0, 0, 0, 248, 15, 0, - 0, 12, 0, 0, 0, 0, 192, 0, 0, 0, 0, 63, 255, 33, 16, 0, - 0, 240, 255, 255, 240, 255, 0, 0, 0, 0, 0, 224, 0, 0, 0, 160, - 3, 224, 0, 224, 0, 224, 0, 96, 0, 128, 3, 0, 0, 128, 0, 0, - 0, 252, 0, 0, 0, 0, 0, 30, 0, 128, 0, 176, 0, 0, 3, 0, - 0, 0, 128, 255, 3, 0, 0, 0, 0, 1, 0, 0, 255, 255, 3, 0, - 0, 120, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 7, 0, 0, 0, - 0, 0, 64, 0, 0, 48, 0, 0, 127, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 192, 8, 0, 0, 0, 0, 0, 0, 6, 0, 0, 24, 0, - 0, 128, 255, 255, 128, 227, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, + 0, 0, 224, 159, 0, 0, 255, 63, 0, 0, 16, 0, 16, 0, 0, 0, + 0, 248, 15, 0, 0, 12, 0, 0, 0, 0, 192, 0, 0, 0, 0, 63, + 255, 33, 16, 3, 0, 240, 255, 255, 240, 255, 0, 0, 0, 0, 32, 224, + 0, 0, 0, 160, 3, 224, 0, 224, 0, 224, 0, 96, 0, 128, 3, 0, + 0, 128, 0, 0, 0, 252, 0, 0, 0, 0, 0, 30, 0, 128, 0, 176, + 0, 0, 0, 48, 0, 0, 3, 0, 0, 0, 128, 255, 3, 0, 0, 0, + 0, 1, 0, 0, 255, 255, 3, 0, 0, 120, 0, 0, 0, 0, 8, 0, + 32, 0, 0, 0, 0, 0, 0, 56, 7, 0, 0, 0, 0, 0, 64, 0, + 0, 0, 0, 248, 0, 48, 0, 0, 255, 63, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 192, 8, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 6, + 0, 0, 24, 0, 0, 0, 96, 0, 0, 6, 0, 0, 192, 31, 31, 0, + 12, 0, 0, 0, 0, 0, 31, 0, 0, 128, 255, 255, 128, 227, 7, 248, + 231, 15, 0, 0, 0, 60, 0, 0, 0, 0, 127, 0, }; -/* Diacritic: 849 bytes. */ +/* Diacritic: 981 bytes. */ RE_UINT32 re_get_diacritic(RE_UINT32 ch) { RE_UINT32 code; @@ -7609,14 +8109,15 @@ RE_UINT32 re_get_diacritic(RE_UINT32 ch) { /* Extender. */ static RE_UINT8 re_extender_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, }; static RE_UINT8 re_extender_stage_2[] = { 0, 1, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; @@ -7625,6 +8126,7 @@ static RE_UINT8 re_extender_stage_3[] = { 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 7, 1, 8, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 11, 1, 1, 12, 13, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 14, + 1, 1, 1, 15, 1, 16, 1, 1, 1, 1, 1, 17, 1, 1, 1, 1, }; static RE_UINT8 re_extender_stage_4[] = { @@ -7634,8 +8136,9 @@ static RE_UINT8 re_extender_stage_4[] = { 6, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 9, 0, 10, 0, 0, 0, 0, 11, 12, 0, 0, 13, 0, 0, 14, 15, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 18, 0, 0, 19, 20, - 0, 0, 0, 18, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 17, 5, 0, 0, 0, 18, 0, 0, 19, 20, + 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 21, 0, 0, 0, 22, 0, 0, 0, 0, 0, }; static RE_UINT8 re_extender_stage_5[] = { @@ -7644,10 +8147,10 @@ static RE_UINT8 re_extender_stage_5[] = { 128, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 8, 32, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 96, 0, 0, 0, 112, 0, 0, 32, 0, 0, 16, 0, 0, 0, 128, 0, 0, 0, 0, 1, 0, 0, 0, 0, 32, - 0, 0, 24, 0, + 0, 0, 24, 0, 192, 1, 0, 0, 12, 0, 0, 0, }; -/* Extender: 349 bytes. */ +/* Extender: 414 bytes. */ RE_UINT32 re_get_extender(RE_UINT32 ch) { RE_UINT32 code; @@ -7655,9 +8158,9 @@ RE_UINT32 re_get_extender(RE_UINT32 ch) { RE_UINT32 pos; RE_UINT32 value; - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_extender_stage_1[f] << 5; + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_extender_stage_1[f] << 4; f = code >> 11; code ^= f << 11; pos = (RE_UINT32)re_extender_stage_2[pos + f] << 3; @@ -7688,14 +8191,15 @@ static RE_UINT8 re_other_lowercase_stage_2[] = { static RE_UINT8 re_other_lowercase_stage_3[] = { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 4, 2, 5, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 7, 2, 2, 2, 2, + 2, 2, 2, 7, 2, 8, 2, 2, }; static RE_UINT8 re_other_lowercase_stage_4[] = { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, 8, 9, 0, 0, 10, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, - 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 14, + 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 14, 0, 15, + 0, 0, 0, 0, 0, 16, 0, 0, }; static RE_UINT8 re_other_lowercase_stage_5[] = { @@ -7705,11 +8209,12 @@ static RE_UINT8 re_other_lowercase_stage_5[] = { 255, 255, 255, 255, 255, 7, 0, 1, 0, 0, 0, 248, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 2, 128, 0, 0, 255, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 255, 3, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 3, + 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 48, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, + 0, 0, 0, 240, 0, 0, 0, 0, }; -/* Other_Lowercase: 273 bytes. */ +/* Other_Lowercase: 297 bytes. */ RE_UINT32 re_get_other_lowercase(RE_UINT32 ch) { RE_UINT32 code; @@ -7738,30 +8243,33 @@ RE_UINT32 re_get_other_lowercase(RE_UINT32 ch) { /* Other_Uppercase. */ static RE_UINT8 re_other_uppercase_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, + 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, }; static RE_UINT8 re_other_uppercase_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, }; static RE_UINT8 re_other_uppercase_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, + 0, 3, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_other_uppercase_stage_4[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 1, 0, + 0, 0, 0, 0, 0, 2, 1, 0, 0, 3, 4, 4, 5, 0, 0, 0, }; static RE_UINT8 re_other_uppercase_stage_5[] = { - 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 192, 255, + 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 192, 255, 0, 0, 255, 255, + 255, 3, 255, 255, 255, 3, 0, 0, }; -/* Other_Uppercase: 117 bytes. */ +/* Other_Uppercase: 162 bytes. */ RE_UINT32 re_get_other_uppercase(RE_UINT32 ch) { RE_UINT32 code; @@ -7769,12 +8277,12 @@ RE_UINT32 re_get_other_uppercase(RE_UINT32 ch) { RE_UINT32 pos; RE_UINT32 value; - f = ch >> 16; - code = ch ^ (f << 16); + f = ch >> 15; + code = ch ^ (f << 15); pos = (RE_UINT32)re_other_uppercase_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_other_uppercase_stage_2[pos + f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_other_uppercase_stage_2[pos + f] << 3; f = code >> 8; code ^= f << 8; pos = (RE_UINT32)re_other_uppercase_stage_3[pos + f] << 3; @@ -7849,21 +8357,23 @@ static RE_UINT8 re_other_grapheme_extend_stage_1[] = { static RE_UINT8 re_other_grapheme_extend_stage_2[] = { 0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, + 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_other_grapheme_extend_stage_3[] = { 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 6, 0, 7, 8, 0, 0, 0, 0, 0, + 9, 0, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_other_grapheme_extend_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, - 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 0, 3, 1, 2, 0, 4, - 5, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 8, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, + 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 0, 3, 1, 2, 0, 4, + 5, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 1, 2, 0, 0, + 0, 0, 8, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 10, 0, 0, }; static RE_UINT8 re_other_grapheme_extend_stage_5[] = { @@ -7871,10 +8381,11 @@ static RE_UINT8 re_other_grapheme_extend_stage_5[] = { 0, 0, 128, 0, 0, 0, 0, 0, 4, 0, 96, 0, 0, 0, 0, 0, 0, 128, 0, 128, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 32, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 32, 192, 7, 0, }; -/* Other_Grapheme_Extend: 249 bytes. */ +/* Other_Grapheme_Extend: 289 bytes. */ RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch) { RE_UINT32 code; @@ -8462,7 +8973,7 @@ RE_UINT32 re_get_other_id_continue(RE_UINT32 ch) { /* STerm. */ static RE_UINT8 re_sterm_stage_1[] = { - 0, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 2, 3, 4, 5, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -8474,46 +8985,53 @@ static RE_UINT8 re_sterm_stage_2[] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 12, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 13, - 3, 3, 14, 3, 15, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 14, 3, 15, 16, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 17, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 18, }; static RE_UINT8 re_sterm_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, - 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 9, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 11, 1, 12, 1, - 13, 1, 14, 1, 1, 15, 16, 1, 17, 1, 1, 1, 1, 1, 1, 1, - 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 19, 1, 1, 1, - 20, 1, 1, 1, 1, 1, 1, 1, 1, 21, 1, 1, 22, 23, 1, 1, - 24, 25, 26, 27, 28, 29, 1, 30, 1, 1, 1, 1, 31, 1, 32, 1, - 1, 1, 1, 1, 33, 1, 1, 1, 34, 35, 36, 37, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, + 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 8, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 10, 1, 11, 1, + 12, 1, 13, 1, 1, 14, 15, 1, 16, 1, 1, 1, 1, 1, 1, 1, + 17, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 18, 1, 1, 1, + 19, 1, 1, 1, 1, 1, 1, 1, 1, 20, 1, 1, 21, 22, 1, 1, + 23, 24, 25, 26, 27, 28, 1, 29, 1, 1, 1, 1, 30, 1, 31, 1, + 1, 1, 1, 1, 32, 1, 1, 1, 33, 34, 35, 36, 37, 1, 1, 1, + 1, 1, 1, 38, 39, 1, 1, 1, 1, 1, 1, 1, 40, 41, 42, 1, + 1, 3, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_sterm_stage_4[] = { - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, - 4, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 7, - 0, 0, 0, 8, 0, 0, 9, 0, 0, 0, 0, 10, 0, 0, 0, 11, - 0, 12, 0, 0, 13, 0, 0, 0, 0, 0, 8, 0, 0, 14, 0, 0, - 0, 0, 15, 0, 0, 16, 0, 17, 0, 18, 19, 0, 0, 11, 0, 0, - 20, 0, 0, 0, 0, 0, 0, 4, 21, 0, 0, 0, 0, 0, 0, 22, - 0, 0, 0, 23, 0, 0, 21, 0, 0, 24, 0, 0, 0, 0, 25, 0, - 0, 0, 26, 0, 0, 0, 0, 27, 0, 0, 0, 28, 0, 0, 29, 0, - 1, 0, 0, 30, 0, 0, 23, 0, 0, 0, 31, 0, 0, 17, 32, 0, - 0, 0, 33, 0, 0, 0, 34, 0, + 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, + 0, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 7, + 0, 0, 8, 0, 0, 0, 0, 9, 0, 0, 0, 10, 0, 11, 0, 0, + 12, 0, 0, 0, 0, 0, 7, 0, 0, 13, 0, 0, 0, 0, 14, 0, + 0, 15, 0, 16, 0, 17, 18, 0, 0, 19, 0, 0, 20, 0, 0, 0, + 0, 0, 0, 3, 21, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, 23, + 0, 0, 21, 0, 0, 24, 0, 0, 0, 0, 25, 0, 0, 0, 26, 0, + 0, 0, 0, 27, 0, 0, 0, 28, 0, 0, 29, 0, 1, 0, 0, 30, + 0, 0, 23, 0, 0, 0, 31, 0, 0, 16, 32, 0, 0, 0, 33, 0, + 0, 0, 34, 0, 0, 35, 0, 0, 0, 0, 36, 0, 0, 0, 37, 0, + 0, 0, 0, 21, 0, 0, 0, 38, 0, 39, 40, 0, }; static RE_UINT8 re_sterm_stage_5[] = { - 0, 0, 0, 0, 2, 64, 0, 128, 0, 0, 0, 80, 0, 2, 0, 0, - 0, 0, 0, 128, 0, 0, 16, 0, 7, 0, 0, 0, 0, 0, 0, 2, - 48, 0, 0, 0, 0, 12, 0, 0, 132, 1, 0, 0, 0, 64, 0, 0, - 0, 0, 96, 0, 8, 2, 0, 0, 0, 15, 0, 0, 0, 0, 0, 204, - 0, 0, 0, 24, 0, 0, 0, 192, 0, 0, 0, 48, 128, 3, 0, 0, + 0, 0, 0, 0, 2, 64, 0, 128, 0, 2, 0, 0, 0, 0, 0, 128, + 0, 0, 16, 0, 7, 0, 0, 0, 0, 0, 0, 2, 48, 0, 0, 0, + 0, 12, 0, 0, 132, 1, 0, 0, 0, 64, 0, 0, 0, 0, 96, 0, + 8, 2, 0, 0, 0, 15, 0, 0, 0, 0, 0, 204, 0, 0, 0, 24, + 0, 0, 0, 192, 0, 0, 0, 48, 128, 3, 0, 0, 0, 64, 0, 16, 4, 0, 0, 0, 0, 192, 0, 0, 0, 0, 136, 0, 0, 0, 192, 0, 0, 128, 0, 0, 0, 3, 0, 0, 0, 0, 0, 224, 0, 0, 3, 0, 0, 8, 0, 0, 0, 0, 196, 0, 2, 0, 0, 0, 128, 1, 0, 0, - 3, 0, 0, 0, 14, 0, 0, 0, 96, 0, 0, 0, + 3, 0, 0, 0, 14, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 27, + 12, 2, 0, 0, 6, 0, 0, 0, 0, 0, 32, 0, 0, 0, 128, 1, + 16, 0, 0, 0, }; -/* STerm: 568 bytes. */ +/* STerm: 668 bytes. */ RE_UINT32 re_get_sterm(RE_UINT32 ch) { RE_UINT32 code; @@ -8787,57 +9305,59 @@ RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch) { static RE_UINT8 re_bidi_class_stage_1[] = { 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 7, - 8, 9, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 11, 12, 13, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 15, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 8, 9, 5, 5, 5, 5, 10, 5, 5, 5, 5, 11, 5, 12, 13, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 16, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 15, }; static RE_UINT8 re_bidi_class_stage_2[] = { 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2, 2, 2, 2, 30, 31, 32, 2, 2, 2, 2, 33, 34, 35, - 36, 37, 38, 39, 40, 2, 41, 42, 43, 44, 2, 45, 2, 2, 2, 46, - 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 52, 52, 52, 57, 58, 52, - 2, 2, 52, 52, 52, 52, 59, 2, 2, 60, 61, 62, 63, 64, 52, 65, - 66, 67, 2, 68, 69, 70, 71, 72, 2, 2, 2, 2, 2, 2, 2, 2, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 2, 46, 2, 2, 2, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 53, 53, 53, 58, 53, 53, + 2, 2, 53, 53, 53, 53, 59, 60, 2, 61, 62, 63, 64, 65, 53, 66, + 67, 68, 2, 69, 70, 71, 72, 73, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 73, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 74, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 74, 2, 2, 75, 76, 77, 78, - 79, 80, 81, 82, 83, 84, 2, 85, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 75, 2, 2, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 2, 86, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 86, 87, 87, 87, 88, 89, 90, 91, 92, 93, - 2, 2, 94, 95, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 96, 96, 97, 96, 98, 96, 99, 96, 96, 96, 96, 96, 100, 96, 96, 96, - 101, 102, 103, 104, 2, 2, 2, 2, 2, 2, 2, 2, 2, 105, 2, 2, + 2, 2, 2, 2, 2, 2, 87, 88, 88, 88, 89, 90, 91, 92, 93, 94, + 2, 2, 95, 96, 2, 97, 98, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 99, 99, 100, 99, 101, 102, 103, 99, 99, 99, 99, 99, 104, 99, 99, 99, + 105, 106, 107, 108, 109, 110, 111, 2, 2, 112, 2, 113, 114, 115, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 106, - 2, 2, 107, 108, 109, 2, 110, 2, 2, 2, 2, 2, 2, 111, 112, 113, + 2, 2, 2, 2, 2, 116, 117, 2, 2, 2, 2, 2, 2, 2, 2, 118, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 119, 2, 2, 2, 2, 2, 2, + 2, 2, 120, 121, 122, 2, 123, 2, 2, 2, 2, 2, 2, 124, 125, 126, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 87, 114, 96, 96, - 115, 116, 117, 2, 2, 2, 118, 119, 120, 121, 122, 123, 124, 125, 126, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 127, + 99, 127, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 88, 128, 99, 99, + 129, 130, 131, 2, 2, 2, 132, 133, 53, 134, 135, 136, 137, 138, 139, 140, + 141, 142, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 143, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 127, - 128, 128, 129, 130, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 143, + 144, 144, 145, 146, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, }; static RE_UINT8 re_bidi_class_stage_3[] = { @@ -8847,64 +9367,72 @@ static RE_UINT8 re_bidi_class_stage_3[] = { 21, 11, 11, 11, 11, 11, 11, 11, 22, 23, 17, 24, 25, 26, 26, 26, 27, 28, 29, 29, 30, 17, 31, 32, 29, 29, 29, 29, 29, 33, 34, 35, 29, 36, 29, 17, 28, 29, 29, 29, 29, 29, 37, 32, 26, 26, 38, 39, - 26, 40, 41, 26, 26, 42, 26, 26, 26, 26, 29, 29, 29, 29, 43, 44, - 45, 11, 11, 46, 47, 48, 49, 11, 50, 11, 11, 51, 52, 11, 49, 53, - 54, 11, 11, 51, 55, 50, 11, 56, 54, 11, 11, 51, 57, 11, 49, 58, - 50, 11, 11, 59, 52, 60, 49, 11, 61, 11, 11, 11, 62, 11, 11, 63, - 11, 11, 11, 64, 65, 66, 49, 67, 11, 11, 11, 51, 68, 11, 49, 11, - 11, 11, 11, 11, 52, 11, 49, 11, 11, 11, 11, 11, 69, 70, 11, 11, + 26, 40, 41, 26, 26, 42, 26, 26, 26, 26, 29, 29, 29, 29, 43, 17, + 44, 11, 11, 45, 46, 47, 48, 11, 49, 11, 11, 50, 51, 11, 48, 52, + 53, 11, 11, 50, 54, 49, 11, 55, 53, 11, 11, 50, 56, 11, 48, 57, + 49, 11, 11, 58, 51, 59, 48, 11, 60, 11, 11, 11, 61, 11, 11, 62, + 63, 11, 11, 64, 65, 66, 48, 67, 49, 11, 11, 50, 68, 11, 48, 11, + 49, 11, 11, 11, 51, 11, 48, 11, 11, 11, 11, 11, 69, 70, 11, 11, 11, 11, 11, 71, 72, 11, 11, 11, 11, 11, 11, 73, 74, 11, 11, 11, - 11, 75, 11, 76, 11, 11, 11, 77, 78, 79, 17, 80, 60, 11, 11, 11, - 11, 11, 81, 82, 11, 83, 84, 85, 86, 87, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 81, 11, 11, 11, 88, 11, 11, 11, 11, 11, 11, - 4, 11, 11, 11, 11, 11, 11, 11, 89, 90, 11, 11, 11, 11, 11, 11, - 11, 91, 11, 91, 11, 49, 11, 49, 11, 11, 11, 92, 93, 94, 11, 88, - 95, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96, 11, 11, 11, 11, 11, - 11, 11, 97, 98, 99, 11, 11, 11, 11, 11, 11, 11, 11, 100, 16, 16, - 11, 101, 11, 11, 11, 102, 103, 104, 105, 11, 11, 106, 61, 11, 107, 105, - 108, 11, 109, 11, 11, 11, 110, 108, 11, 11, 111, 112, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 113, 114, 115, 11, 11, 11, 11, 17, 17, 116, 111, - 11, 11, 11, 117, 118, 119, 119, 120, 121, 16, 122, 123, 124, 125, 126, 127, - 128, 11, 129, 129, 129, 17, 17, 84, 130, 131, 132, 133, 134, 16, 11, 11, - 135, 16, 16, 16, 16, 16, 16, 16, 16, 136, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 137, 11, 11, 11, 5, - 16, 138, 16, 16, 16, 16, 16, 139, 16, 16, 140, 11, 141, 11, 16, 16, - 142, 143, 11, 11, 11, 11, 144, 16, 16, 16, 145, 16, 16, 16, 16, 16, - 146, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 147, 88, 11, 11, - 11, 11, 11, 11, 11, 11, 148, 149, 11, 11, 11, 11, 11, 11, 11, 150, - 11, 11, 11, 11, 11, 11, 17, 17, 16, 16, 16, 151, 11, 11, 11, 11, - 16, 152, 16, 16, 16, 16, 16, 139, 16, 16, 16, 16, 16, 137, 11, 151, - 153, 16, 154, 155, 11, 11, 11, 11, 11, 156, 4, 11, 11, 11, 11, 157, - 11, 11, 11, 11, 16, 16, 139, 11, 11, 120, 11, 11, 11, 16, 11, 158, - 11, 11, 11, 146, 159, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 160, - 11, 11, 11, 11, 11, 100, 11, 161, 11, 11, 11, 11, 16, 16, 16, 16, - 11, 16, 16, 16, 140, 11, 11, 11, 119, 11, 11, 11, 11, 11, 150, 162, - 11, 150, 11, 11, 11, 11, 11, 108, 16, 16, 163, 11, 11, 11, 11, 11, - 164, 11, 11, 11, 11, 11, 11, 11, 165, 11, 166, 167, 11, 11, 11, 168, - 11, 11, 11, 11, 115, 11, 17, 108, 11, 11, 169, 11, 170, 108, 11, 11, - 45, 11, 11, 171, 11, 11, 11, 11, 11, 11, 172, 173, 174, 11, 11, 11, - 11, 11, 11, 175, 50, 11, 68, 60, 11, 11, 11, 11, 11, 11, 176, 11, - 11, 177, 178, 26, 26, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 179, 29, 29, 29, 29, 29, 29, 29, 29, 29, 8, 8, 180, - 17, 88, 116, 16, 16, 181, 182, 29, 29, 29, 29, 29, 29, 29, 29, 183, - 184, 3, 4, 5, 4, 5, 137, 11, 11, 11, 11, 11, 11, 11, 185, 186, - 187, 11, 11, 11, 16, 16, 16, 16, 141, 151, 11, 11, 11, 11, 11, 87, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 188, 26, 26, 26, 26, 26, 26, - 189, 26, 26, 190, 26, 26, 26, 26, 26, 26, 26, 191, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 192, 193, 50, 11, 11, 194, 116, 14, 137, 11, - 108, 11, 11, 195, 11, 11, 11, 11, 45, 11, 196, 197, 11, 11, 11, 11, - 108, 11, 11, 198, 11, 11, 11, 11, 11, 11, 199, 200, 11, 11, 11, 11, - 150, 45, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 201, 202, - 203, 11, 204, 11, 11, 11, 11, 11, 16, 16, 16, 16, 205, 11, 11, 11, - 16, 16, 16, 16, 16, 140, 11, 11, 11, 11, 11, 11, 11, 157, 11, 11, - 11, 206, 11, 11, 161, 11, 11, 11, 135, 11, 11, 11, 207, 208, 208, 208, - 29, 29, 29, 29, 29, 29, 29, 209, 16, 16, 151, 16, 16, 16, 16, 16, - 16, 139, 210, 211, 146, 146, 11, 11, 212, 11, 11, 11, 11, 11, 133, 11, - 16, 16, 4, 213, 16, 16, 16, 147, 16, 139, 16, 16, 214, 11, 16, 4, - 16, 16, 16, 210, 215, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 216, - 16, 16, 16, 217, 139, 16, 218, 11, 11, 11, 11, 11, 11, 11, 11, 5, - 16, 16, 16, 16, 219, 11, 11, 11, 16, 16, 16, 16, 137, 11, 11, 11, - 16, 16, 16, 16, 16, 16, 16, 139, 11, 11, 11, 11, 11, 11, 11, 220, + 11, 75, 11, 76, 11, 11, 11, 77, 78, 79, 17, 80, 59, 11, 11, 11, + 11, 11, 81, 82, 11, 83, 63, 84, 85, 86, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 81, 11, 11, 11, 87, 11, 11, 11, 11, 11, 11, + 4, 11, 11, 11, 11, 11, 11, 11, 88, 89, 11, 11, 11, 11, 11, 11, + 11, 90, 11, 90, 11, 48, 11, 48, 11, 11, 11, 91, 92, 93, 11, 87, + 94, 11, 11, 11, 11, 11, 11, 11, 11, 11, 95, 11, 11, 11, 11, 11, + 11, 11, 96, 97, 98, 11, 11, 11, 11, 11, 11, 11, 11, 99, 16, 16, + 11, 100, 11, 11, 11, 101, 102, 103, 11, 11, 11, 104, 11, 11, 11, 11, + 105, 11, 11, 106, 60, 11, 107, 105, 108, 11, 109, 11, 11, 11, 110, 108, + 11, 11, 111, 112, 11, 11, 11, 11, 11, 11, 11, 11, 11, 113, 114, 115, + 11, 11, 11, 11, 17, 17, 17, 116, 11, 11, 11, 117, 118, 119, 119, 120, + 121, 16, 122, 123, 124, 125, 126, 127, 128, 11, 129, 129, 129, 17, 17, 63, + 130, 131, 132, 133, 134, 16, 11, 11, 135, 16, 16, 16, 16, 16, 16, 16, + 16, 136, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 137, 11, 11, 11, 5, 16, 138, 16, 16, 16, 16, 16, 139, + 16, 16, 140, 11, 139, 11, 16, 16, 141, 142, 11, 11, 11, 11, 143, 16, + 16, 16, 144, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 145, + 16, 146, 16, 147, 148, 149, 11, 11, 11, 11, 11, 11, 11, 11, 150, 151, + 11, 11, 11, 11, 11, 11, 11, 152, 11, 11, 11, 11, 11, 11, 17, 17, + 16, 16, 16, 16, 153, 11, 11, 11, 16, 154, 16, 16, 16, 16, 16, 155, + 16, 16, 16, 16, 16, 137, 11, 156, 157, 16, 158, 159, 11, 11, 11, 11, + 11, 160, 4, 11, 11, 11, 11, 161, 11, 11, 11, 11, 16, 16, 155, 11, + 11, 120, 11, 11, 11, 16, 11, 162, 11, 11, 11, 163, 164, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 165, 11, 11, 11, 11, 11, 99, 11, 166, + 11, 11, 11, 11, 16, 16, 16, 16, 11, 16, 16, 16, 140, 11, 11, 11, + 119, 11, 11, 11, 11, 11, 152, 167, 11, 152, 11, 11, 11, 11, 11, 108, + 16, 16, 149, 11, 11, 11, 11, 11, 168, 11, 11, 11, 11, 11, 11, 11, + 169, 11, 170, 171, 11, 11, 11, 172, 11, 11, 11, 11, 173, 11, 17, 108, + 11, 11, 174, 11, 175, 108, 11, 11, 44, 11, 11, 176, 11, 11, 177, 11, + 11, 11, 178, 179, 180, 11, 11, 50, 11, 11, 11, 181, 49, 11, 68, 59, + 11, 11, 11, 11, 11, 11, 182, 11, 11, 183, 184, 26, 26, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 185, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 8, 8, 186, 17, 87, 187, 16, 16, 188, 189, 29, + 29, 29, 29, 29, 29, 29, 29, 190, 191, 3, 4, 5, 4, 5, 137, 11, + 11, 11, 11, 11, 11, 11, 192, 193, 194, 11, 11, 11, 16, 16, 16, 16, + 195, 156, 4, 11, 11, 11, 11, 86, 11, 11, 11, 11, 11, 11, 196, 142, + 11, 11, 11, 11, 11, 11, 11, 197, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 198, 26, 26, 26, 26, 26, 26, 199, 26, 26, 200, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 201, 26, 26, 26, 26, 202, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 203, 204, 49, 11, 11, 205, 206, 14, 137, 152, + 108, 11, 11, 207, 11, 11, 11, 11, 44, 11, 208, 209, 11, 11, 11, 210, + 108, 11, 11, 211, 11, 11, 11, 11, 11, 11, 152, 212, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 152, 213, 11, 49, 11, 11, 50, 63, 11, 214, 209, + 11, 11, 11, 215, 216, 11, 11, 11, 11, 11, 11, 217, 63, 11, 11, 11, + 11, 11, 11, 218, 63, 11, 11, 11, 11, 11, 219, 220, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 209, 11, 11, 11, 206, 11, 11, 11, 11, + 152, 44, 11, 11, 11, 11, 11, 11, 11, 221, 222, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 223, 224, 225, 11, 226, 11, 11, 11, 11, 11, + 16, 16, 16, 16, 227, 11, 11, 11, 16, 16, 16, 16, 16, 140, 11, 11, + 11, 11, 11, 11, 11, 161, 11, 11, 11, 228, 11, 11, 166, 11, 11, 11, + 135, 11, 11, 11, 229, 230, 230, 230, 26, 26, 26, 26, 26, 231, 26, 26, + 29, 29, 29, 29, 29, 29, 29, 232, 16, 16, 156, 16, 16, 16, 16, 16, + 16, 155, 233, 163, 163, 163, 16, 137, 234, 11, 11, 11, 11, 11, 133, 11, + 16, 16, 195, 16, 16, 16, 16, 235, 16, 16, 16, 16, 233, 236, 16, 237, + 16, 16, 16, 16, 16, 16, 16, 233, 16, 16, 16, 16, 139, 16, 16, 154, + 16, 16, 238, 16, 16, 16, 16, 16, 16, 16, 16, 16, 239, 16, 16, 16, + 16, 16, 16, 16, 16, 11, 195, 155, 16, 16, 16, 16, 16, 16, 16, 155, + 16, 16, 16, 16, 16, 240, 11, 11, 156, 16, 16, 16, 237, 87, 16, 16, + 237, 16, 235, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 241, 8, 8, 8, 8, 8, 8, 8, 8, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 8, }; @@ -8921,51 +9449,56 @@ static RE_UINT8 re_bidi_class_stage_4[] = { 44, 40, 40, 40, 40, 45, 25, 46, 25, 47, 48, 49, 8, 8, 50, 40, 51, 40, 40, 40, 40, 45, 25, 25, 34, 34, 52, 25, 25, 53, 54, 34, 34, 55, 32, 25, 25, 31, 31, 56, 34, 34, 31, 34, 40, 25, 25, 25, - 25, 25, 25, 39, 57, 12, 12, 12, 12, 12, 58, 59, 60, 25, 59, 61, - 60, 25, 12, 12, 62, 12, 12, 12, 61, 12, 12, 12, 12, 12, 12, 59, - 60, 59, 12, 61, 63, 12, 30, 12, 64, 12, 12, 12, 64, 28, 65, 29, - 29, 61, 12, 12, 60, 66, 59, 61, 67, 12, 12, 12, 12, 12, 12, 65, - 12, 58, 12, 12, 58, 12, 12, 12, 59, 12, 12, 61, 13, 10, 68, 12, - 12, 12, 12, 62, 59, 62, 69, 29, 12, 64, 12, 12, 12, 12, 10, 70, - 12, 12, 12, 29, 12, 12, 58, 12, 62, 71, 12, 12, 61, 25, 57, 30, - 12, 28, 25, 57, 61, 25, 66, 59, 12, 12, 25, 29, 12, 12, 29, 12, - 12, 72, 73, 26, 60, 25, 25, 57, 25, 69, 12, 60, 25, 25, 60, 25, - 25, 25, 25, 59, 12, 12, 12, 60, 69, 25, 64, 64, 12, 12, 29, 62, - 59, 12, 12, 12, 60, 59, 12, 12, 58, 64, 12, 61, 12, 12, 12, 61, - 10, 10, 26, 12, 74, 12, 12, 12, 12, 12, 13, 11, 62, 59, 12, 12, - 12, 66, 25, 29, 12, 58, 60, 25, 25, 12, 30, 61, 10, 10, 75, 76, - 12, 12, 61, 12, 57, 28, 59, 12, 58, 12, 60, 12, 11, 26, 12, 12, - 12, 12, 12, 23, 12, 28, 65, 12, 12, 58, 25, 57, 71, 60, 25, 59, - 28, 25, 25, 65, 25, 12, 12, 12, 12, 69, 57, 59, 12, 12, 28, 25, - 29, 12, 12, 12, 62, 29, 66, 12, 12, 58, 29, 72, 12, 12, 12, 25, - 25, 62, 12, 12, 57, 25, 25, 25, 69, 25, 59, 61, 12, 59, 12, 12, - 25, 57, 12, 12, 12, 12, 12, 77, 26, 12, 12, 24, 12, 12, 12, 24, - 12, 12, 12, 22, 78, 78, 79, 80, 10, 10, 81, 82, 83, 84, 10, 10, - 10, 85, 10, 10, 10, 10, 10, 86, 0, 87, 88, 0, 89, 8, 90, 70, - 8, 8, 90, 70, 83, 83, 83, 83, 17, 70, 26, 12, 12, 20, 11, 23, - 10, 77, 91, 92, 12, 12, 23, 12, 10, 11, 23, 26, 12, 12, 91, 12, - 93, 10, 10, 10, 10, 26, 12, 12, 10, 20, 10, 10, 10, 12, 12, 12, - 10, 70, 12, 12, 10, 10, 70, 12, 10, 10, 8, 8, 8, 8, 8, 12, - 12, 12, 23, 10, 10, 10, 10, 24, 24, 10, 10, 10, 10, 10, 10, 11, - 12, 24, 70, 28, 29, 12, 24, 10, 12, 12, 12, 28, 10, 10, 10, 12, - 10, 10, 17, 10, 94, 11, 10, 10, 11, 12, 62, 29, 11, 23, 12, 24, - 12, 12, 95, 11, 12, 12, 13, 12, 12, 12, 12, 70, 12, 12, 12, 10, - 12, 13, 70, 12, 12, 12, 12, 13, 96, 25, 25, 97, 26, 12, 12, 12, - 12, 12, 11, 12, 58, 58, 28, 12, 12, 64, 10, 12, 12, 12, 98, 12, - 12, 10, 12, 12, 12, 62, 25, 29, 12, 28, 25, 25, 28, 62, 29, 59, - 12, 12, 60, 57, 64, 64, 12, 12, 28, 12, 12, 59, 69, 65, 59, 62, - 12, 61, 59, 61, 12, 12, 12, 99, 34, 34, 100, 34, 40, 40, 40, 101, - 40, 40, 40, 102, 103, 104, 10, 105, 106, 70, 107, 12, 40, 40, 40, 108, - 109, 5, 6, 7, 5, 110, 10, 70, 0, 0, 111, 112, 91, 12, 12, 12, - 34, 34, 34, 113, 31, 33, 34, 25, 34, 34, 114, 52, 34, 34, 115, 10, - 35, 35, 35, 35, 35, 35, 35, 116, 12, 12, 25, 25, 28, 57, 64, 12, - 12, 28, 25, 60, 25, 59, 12, 12, 12, 62, 25, 57, 12, 12, 28, 61, - 25, 66, 12, 12, 12, 28, 29, 12, 117, 0, 118, 25, 57, 60, 25, 12, - 12, 12, 62, 29, 119, 120, 12, 12, 12, 91, 12, 12, 13, 12, 12, 121, - 8, 8, 8, 8, 122, 40, 40, 40, 10, 10, 10, 70, 24, 10, 10, 70, - 8, 8, 123, 12, 10, 17, 10, 10, 10, 20, 70, 12, 20, 10, 10, 10, - 10, 10, 24, 11, 10, 10, 10, 26, 10, 10, 12, 12, 11, 24, 10, 10, - 12, 12, 12, 124, + 57, 12, 12, 12, 12, 12, 58, 59, 60, 25, 59, 61, 60, 25, 12, 12, + 62, 12, 12, 12, 61, 12, 12, 12, 12, 12, 12, 59, 60, 59, 12, 61, + 63, 12, 64, 12, 65, 12, 12, 12, 65, 28, 66, 29, 29, 61, 12, 12, + 60, 67, 59, 61, 68, 12, 12, 12, 12, 12, 12, 66, 12, 58, 12, 12, + 58, 12, 12, 12, 59, 12, 12, 61, 13, 10, 69, 12, 59, 12, 12, 12, + 12, 12, 12, 62, 59, 62, 70, 29, 12, 65, 12, 12, 12, 12, 10, 71, + 12, 12, 12, 29, 12, 12, 58, 12, 62, 72, 12, 12, 61, 25, 57, 64, + 12, 28, 25, 57, 61, 25, 67, 59, 12, 12, 25, 29, 12, 12, 29, 12, + 12, 73, 74, 26, 60, 25, 25, 57, 25, 70, 12, 60, 25, 25, 60, 25, + 25, 25, 25, 59, 12, 12, 12, 60, 70, 25, 65, 65, 12, 12, 29, 62, + 60, 59, 12, 12, 58, 65, 12, 61, 12, 12, 12, 61, 10, 10, 26, 12, + 75, 12, 12, 12, 12, 12, 13, 11, 62, 59, 12, 12, 12, 67, 25, 29, + 12, 58, 60, 25, 25, 12, 64, 61, 10, 10, 76, 77, 12, 12, 61, 12, + 57, 28, 59, 12, 58, 12, 60, 12, 11, 26, 12, 12, 12, 12, 12, 23, + 12, 28, 66, 12, 12, 58, 25, 57, 72, 60, 25, 59, 28, 25, 25, 66, + 25, 25, 25, 57, 25, 12, 12, 12, 12, 70, 57, 59, 12, 12, 28, 25, + 29, 12, 12, 12, 62, 29, 67, 29, 12, 58, 29, 73, 12, 12, 12, 25, + 25, 62, 12, 12, 57, 25, 25, 25, 70, 25, 59, 61, 12, 59, 29, 12, + 25, 29, 12, 25, 12, 12, 12, 78, 26, 12, 12, 24, 12, 12, 12, 24, + 12, 12, 12, 22, 79, 79, 80, 81, 10, 10, 82, 83, 84, 85, 10, 10, + 10, 86, 10, 10, 10, 10, 10, 87, 0, 88, 89, 0, 90, 8, 91, 71, + 8, 8, 91, 71, 84, 84, 84, 84, 17, 71, 26, 12, 12, 20, 11, 23, + 10, 78, 92, 93, 12, 12, 23, 12, 10, 11, 23, 26, 12, 12, 92, 12, + 94, 10, 10, 10, 10, 26, 12, 12, 10, 20, 10, 10, 10, 10, 71, 12, + 10, 71, 12, 12, 10, 10, 8, 8, 8, 8, 8, 12, 12, 12, 23, 10, + 10, 10, 10, 24, 10, 23, 10, 10, 10, 26, 10, 10, 10, 10, 26, 24, + 10, 10, 20, 10, 26, 12, 12, 12, 12, 24, 71, 28, 29, 12, 24, 10, + 12, 12, 12, 28, 71, 12, 12, 12, 10, 10, 17, 10, 10, 12, 12, 12, + 10, 10, 10, 12, 95, 11, 10, 10, 11, 12, 62, 29, 11, 23, 12, 24, + 12, 12, 96, 11, 12, 12, 13, 12, 12, 12, 12, 71, 24, 10, 10, 10, + 12, 12, 12, 10, 12, 13, 71, 12, 12, 12, 12, 13, 97, 25, 25, 98, + 12, 12, 11, 12, 58, 58, 28, 12, 12, 65, 10, 12, 12, 12, 99, 12, + 12, 10, 12, 12, 12, 59, 12, 12, 12, 62, 25, 29, 12, 28, 25, 25, + 28, 62, 29, 59, 12, 61, 12, 12, 12, 12, 60, 57, 65, 65, 12, 12, + 28, 12, 12, 59, 70, 66, 59, 62, 12, 61, 59, 61, 12, 12, 12, 100, + 34, 34, 101, 34, 40, 40, 40, 102, 40, 40, 40, 103, 25, 25, 25, 29, + 104, 105, 10, 106, 107, 71, 108, 12, 40, 40, 40, 109, 30, 5, 6, 7, + 5, 110, 10, 71, 0, 0, 111, 112, 92, 12, 12, 12, 10, 10, 10, 11, + 113, 8, 8, 8, 12, 62, 57, 12, 34, 34, 34, 114, 31, 33, 34, 25, + 34, 34, 115, 52, 34, 33, 34, 34, 34, 34, 116, 10, 35, 35, 35, 35, + 35, 35, 35, 117, 12, 12, 25, 25, 25, 57, 12, 12, 28, 57, 65, 12, + 12, 28, 25, 60, 25, 59, 12, 12, 28, 12, 12, 12, 12, 62, 25, 57, + 29, 70, 12, 12, 28, 25, 57, 12, 12, 62, 25, 59, 28, 25, 72, 28, + 70, 12, 12, 12, 62, 29, 12, 67, 28, 25, 57, 73, 12, 12, 28, 61, + 25, 67, 12, 12, 12, 12, 12, 65, 0, 12, 12, 12, 12, 28, 29, 12, + 118, 0, 119, 25, 57, 60, 25, 12, 12, 12, 62, 29, 120, 121, 12, 12, + 12, 92, 12, 12, 13, 12, 12, 122, 8, 8, 8, 8, 25, 115, 34, 34, + 123, 40, 40, 40, 10, 10, 10, 71, 8, 8, 124, 11, 10, 10, 10, 26, + 12, 10, 10, 10, 10, 10, 12, 12, 10, 24, 10, 10, 71, 24, 10, 10, + 10, 11, 12, 12, 12, 12, 12, 125, }; static RE_UINT8 re_bidi_class_stage_5[] = { @@ -8976,34 +9509,34 @@ static RE_UINT8 re_bidi_class_stage_5[] = { 6, 10, 4, 4, 10, 10, 0, 10, 10, 11, 10, 10, 4, 4, 2, 2, 10, 0, 10, 10, 10, 2, 0, 10, 0, 10, 10, 0, 0, 0, 10, 10, 0, 10, 10, 10, 12, 12, 12, 12, 10, 10, 0, 0, 0, 0, 10, 0, - 0, 0, 0, 12, 12, 12, 0, 0, 0, 0, 0, 4, 1, 12, 12, 12, + 0, 0, 0, 12, 12, 12, 0, 0, 0, 10, 10, 4, 1, 12, 12, 12, 12, 12, 1, 12, 1, 12, 12, 1, 1, 1, 1, 1, 5, 5, 5, 5, - 5, 13, 10, 10, 13, 4, 4, 13, 6, 13, 10, 10, 12, 12, 12, 13, + 5, 5, 10, 10, 13, 4, 4, 13, 6, 13, 10, 10, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 12, 5, 5, 4, 5, 5, 13, 13, 13, 12, 13, 13, 13, 13, 13, 12, 12, 12, 5, 10, 12, 12, 13, 13, 12, 12, 10, 12, 12, 12, 12, 13, 13, 2, 2, 13, 13, 13, 12, 13, 13, 1, 1, 1, 12, 1, 1, 10, 10, 10, 10, 1, 1, 1, 1, 12, 12, 12, 12, 1, 1, 12, 12, 12, 0, 0, 0, 12, 0, 12, 0, 0, 0, 0, 12, 12, 12, 0, 12, 0, 0, 0, 0, 12, 12, 0, 0, 4, 4, - 0, 12, 12, 0, 12, 0, 0, 12, 12, 12, 0, 12, 0, 4, 0, 0, - 10, 4, 10, 0, 12, 0, 12, 12, 10, 10, 10, 0, 12, 0, 12, 0, - 0, 12, 0, 12, 0, 12, 10, 10, 9, 0, 0, 0, 10, 10, 10, 12, - 12, 12, 11, 0, 0, 10, 0, 10, 9, 9, 9, 9, 9, 9, 9, 11, - 11, 11, 0, 1, 9, 7, 16, 17, 18, 14, 15, 6, 4, 4, 4, 4, - 4, 10, 10, 10, 6, 10, 10, 10, 10, 10, 10, 9, 11, 11, 19, 20, - 21, 22, 11, 11, 2, 0, 0, 0, 2, 2, 3, 3, 0, 10, 0, 0, - 0, 0, 4, 0, 10, 10, 3, 4, 9, 10, 10, 10, 0, 12, 12, 10, - 12, 12, 12, 10, 12, 12, 10, 10, 4, 4, 0, 0, 0, 1, 12, 1, - 1, 3, 1, 1, 13, 13, 10, 10, 13, 10, 13, 13, 6, 10, 6, 0, - 10, 6, 10, 10, 10, 10, 10, 4, 10, 10, 3, 3, 10, 4, 4, 10, - 13, 13, 13, 11, 0, 10, 10, 4, 10, 4, 4, 0, 11, 10, 10, 10, - 10, 10, 11, 11, 1, 1, 1, 10, 12, 12, 12, 1, 1, 10, 10, 10, - 5, 5, 5, 1, 0, 0, 0, 11, 11, 11, 11, 12, 10, 10, 12, 12, - 12, 10, 0, 0, 0, 0, 2, 2, 10, 10, 13, 13, 2, 2, 2, 0, - 0, 0, 11, 11, + 0, 0, 0, 4, 0, 12, 12, 0, 12, 0, 0, 12, 12, 12, 0, 12, + 0, 4, 0, 0, 10, 4, 10, 0, 12, 0, 12, 12, 10, 10, 10, 0, + 12, 0, 12, 0, 0, 12, 0, 12, 0, 12, 10, 10, 9, 0, 0, 0, + 10, 10, 10, 12, 12, 12, 11, 0, 0, 10, 0, 10, 9, 9, 9, 9, + 9, 9, 9, 11, 11, 11, 0, 1, 9, 7, 16, 17, 18, 14, 15, 6, + 4, 4, 4, 4, 4, 10, 10, 10, 6, 10, 10, 10, 10, 10, 10, 9, + 11, 11, 19, 20, 21, 22, 11, 11, 2, 0, 0, 0, 2, 2, 3, 3, + 0, 10, 0, 0, 0, 0, 4, 0, 10, 10, 3, 4, 9, 10, 10, 10, + 0, 12, 12, 10, 12, 12, 12, 10, 12, 12, 10, 10, 4, 4, 0, 0, + 0, 1, 12, 1, 1, 3, 1, 1, 13, 13, 10, 10, 13, 10, 13, 13, + 6, 10, 6, 0, 10, 6, 10, 10, 10, 10, 10, 4, 10, 10, 3, 3, + 10, 4, 4, 10, 13, 13, 13, 11, 10, 4, 4, 0, 11, 10, 10, 10, + 10, 10, 11, 11, 12, 2, 2, 2, 1, 1, 1, 10, 12, 12, 12, 1, + 1, 10, 10, 10, 5, 5, 5, 1, 0, 0, 0, 11, 11, 11, 11, 12, + 10, 10, 12, 12, 12, 10, 0, 0, 0, 0, 2, 2, 10, 10, 13, 13, + 2, 2, 2, 10, 0, 0, 11, 11, }; -/* Bidi_Class: 3216 bytes. */ +/* Bidi_Class: 3464 bytes. */ RE_UINT32 re_get_bidi_class(RE_UINT32 ch) { RE_UINT32 code; @@ -9031,7 +9564,7 @@ RE_UINT32 re_get_bidi_class(RE_UINT32 ch) { /* Canonical_Combining_Class. */ static RE_UINT8 re_canonical_combining_class_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 6, 2, + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 6, 2, 7, 8, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -9043,23 +9576,20 @@ static RE_UINT8 re_canonical_combining_class_stage_1[] = { }; static RE_UINT8 re_canonical_combining_class_stage_2[] = { - 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 0, 0, 15, 0, 0, 0, 16, 17, 18, 19, 20, 21, 22, 0, 0, - 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 0, 0, - 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 0, + 14, 0, 0, 0, 0, 0, 15, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 27, 0, 28, 29, 30, 31, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 33, 0, - 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, - 36, 37, 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 39, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 21, + 22, 23, 0, 0, 0, 24, 0, 0, 25, 26, 27, 28, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 31, 32, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_canonical_combining_class_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 9, 0, 10, 11, 0, 0, 12, 13, 14, 15, 16, 0, 0, 0, 0, 17, @@ -9067,20 +9597,32 @@ static RE_UINT8 re_canonical_combining_class_stage_3[] = { 0, 22, 24, 0, 0, 22, 24, 0, 0, 22, 24, 0, 0, 0, 24, 0, 0, 0, 25, 0, 0, 22, 24, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 27, 28, 0, 0, 29, 30, 0, 31, 32, 0, 33, 34, 0, 35, 0, - 0, 36, 0, 0, 37, 0, 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, - 39, 39, 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 41, 0, 0, - 0, 42, 0, 0, 0, 0, 0, 0, 43, 0, 0, 44, 0, 0, 0, 0, - 0, 45, 46, 47, 0, 48, 0, 49, 0, 50, 0, 0, 0, 0, 51, 52, - 0, 0, 0, 0, 0, 0, 53, 54, 0, 0, 0, 0, 0, 0, 55, 56, - 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, 58, 0, 0, 0, 59, - 0, 60, 0, 0, 61, 0, 0, 0, 0, 0, 0, 62, 63, 0, 0, 64, - 65, 0, 0, 0, 0, 0, 46, 66, 0, 67, 68, 0, 0, 69, 70, 0, - 0, 0, 0, 0, 0, 71, 72, 73, 0, 0, 0, 0, 0, 0, 0, 24, - 74, 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 76, 77, 78, 0, 0, 0, 0, 0, 0, - 0, 0, 65, 0, 0, 79, 0, 0, 80, 81, 0, 0, 0, 0, 70, 0, - 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 83, 84, 85, 0, 0, - 0, 0, 86, 0, 0, 0, 0, 0, + 0, 36, 0, 0, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 39, 39, 0, 0, 0, 0, 40, 0, + 0, 0, 0, 0, 0, 41, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, + 43, 0, 0, 44, 0, 45, 0, 0, 0, 46, 47, 48, 0, 49, 0, 50, + 0, 51, 0, 0, 0, 0, 52, 53, 0, 0, 0, 0, 0, 0, 54, 55, + 0, 0, 0, 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 58, 0, 0, 0, 59, 0, 0, 0, 60, + 0, 61, 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 63, 64, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, + 66, 0, 0, 0, 0, 0, 47, 67, 0, 68, 69, 0, 0, 70, 71, 0, + 0, 0, 0, 0, 0, 72, 73, 74, 0, 0, 0, 0, 0, 0, 0, 24, + 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, 0, 0, + 0, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, + 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 79, 0, 0, 0, 0, + 80, 81, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 66, 59, 0, 83, 0, 0, 84, 85, 0, 70, 0, 0, 71, 0, + 0, 86, 0, 0, 0, 0, 0, 87, 0, 22, 24, 88, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 0, 0, 59, 90, 0, + 0, 59, 0, 0, 0, 91, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 92, 0, 93, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 96, 97, 0, 0, + 0, 0, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_canonical_combining_class_stage_4[] = { @@ -9094,40 +9636,46 @@ static RE_UINT8 re_canonical_combining_class_stage_4[] = { 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 43, 36, 44, 45, 21, 45, 46, 0, 0, 0, 0, 0, 0, 0, 19, 1, 21, 0, 0, 0, 0, 0, 0, 0, 0, 38, 47, 1, 1, 48, 48, 49, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 50, 0, 0, 21, 43, 51, 52, 21, 35, 53, - 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 55, 56, 57, 0, 0, - 0, 0, 0, 55, 0, 0, 0, 0, 0, 0, 0, 55, 0, 58, 0, 0, - 0, 0, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, - 0, 0, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, - 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, - 0, 0, 0, 0, 0, 65, 66, 0, 0, 0, 0, 0, 67, 68, 69, 70, - 71, 72, 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 74, 75, 0, 0, 0, 0, 76, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 77, 0, 0, - 0, 0, 0, 0, 59, 0, 0, 78, 0, 0, 79, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 19, 81, 0, - 77, 0, 0, 0, 0, 48, 1, 82, 0, 0, 0, 0, 0, 54, 0, 0, - 0, 77, 0, 0, 0, 0, 0, 0, 0, 0, 19, 10, 1, 0, 0, 0, - 0, 0, 83, 0, 0, 0, 0, 0, 0, 84, 0, 0, 83, 0, 0, 0, - 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 53, 9, 12, 4, - 85, 8, 86, 76, 0, 57, 0, 0, 21, 1, 21, 87, 88, 1, 1, 1, - 1, 53, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 90, 1, 91, 57, - 78, 92, 93, 4, 57, 0, 0, 0, 0, 0, 0, 19, 49, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 94, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 95, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 0, - 0, 0, 0, 19, 0, 1, 1, 49, 0, 0, 0, 0, 0, 0, 0, 19, - 0, 0, 0, 0, 49, 0, 0, 0, 0, 59, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 49, 0, 0, 0, 0, 0, 98, 64, 0, 0, 0, 0, - 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, - 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 100, 57, 38, - 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 101, 1, 53, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 102, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 103, 94, 0, 0, 0, 0, 0, 0, 104, 0, - 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94, 77, 0, 0, - 0, 0, 0, 0, 0, 105, 0, 0, 0, 106, 107, 108, 109, 0, 98, 4, - 110, 48, 23, 0, 0, 0, 0, 0, 0, 0, 38, 49, 0, 0, 0, 0, - 38, 57, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 50, 0, 0, 21, 43, 51, 52, 21, 35, 1, + 0, 0, 0, 0, 0, 0, 0, 53, 0, 0, 0, 54, 55, 56, 0, 0, + 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 54, 0, 57, 0, 0, + 0, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, + 0, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 0, + 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, + 0, 0, 0, 0, 0, 64, 65, 0, 0, 0, 0, 0, 66, 67, 68, 69, + 70, 71, 0, 0, 0, 0, 0, 0, 0, 72, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 73, 74, 0, 0, 0, 0, 75, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 76, 0, 0, + 0, 0, 0, 0, 58, 0, 0, 77, 0, 0, 78, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 79, 0, 0, 0, 0, 0, 0, 19, 80, 0, + 76, 0, 0, 0, 0, 48, 1, 81, 0, 0, 0, 0, 1, 51, 15, 41, + 0, 0, 0, 0, 0, 53, 0, 0, 0, 76, 0, 0, 0, 0, 0, 0, + 0, 0, 19, 10, 1, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, + 0, 83, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, + 0, 0, 0, 0, 84, 9, 12, 4, 85, 8, 86, 75, 0, 56, 49, 0, + 21, 1, 21, 87, 88, 1, 1, 1, 1, 1, 1, 1, 1, 49, 0, 89, + 0, 0, 0, 0, 90, 1, 91, 56, 77, 92, 93, 4, 56, 0, 0, 0, + 0, 0, 0, 19, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 95, 96, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 19, 0, 1, 1, 49, + 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 49, 0, 0, 0, + 0, 58, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 49, 0, 0, 0, + 0, 0, 98, 63, 0, 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, + 0, 0, 0, 0, 73, 0, 0, 0, 76, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 99, 100, 56, 38, 77, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101, + 1, 14, 4, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, + 80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 84, 0, + 0, 0, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103, 94, + 0, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 105, 0, + 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94, 76, 0, 0, + 0, 0, 0, 0, 0, 105, 0, 0, 0, 0, 106, 0, 0, 0, 0, 0, + 0, 38, 1, 56, 1, 56, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, + 0, 0, 0, 0, 8, 86, 0, 0, 0, 0, 0, 0, 1, 84, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 108, 0, 109, 110, 111, 112, 0, 98, 4, + 113, 48, 23, 0, 0, 0, 0, 0, 0, 0, 38, 49, 0, 0, 0, 0, + 38, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 113, 0, 0, }; static RE_UINT8 re_canonical_combining_class_stage_5[] = { @@ -9144,24 +9692,25 @@ static RE_UINT8 re_canonical_combining_class_stage_5[] = { 50, 0, 45, 50, 50, 45, 0, 0, 0, 31, 0, 0, 50, 45, 50, 50, 45, 45, 50, 45, 45, 50, 45, 50, 45, 50, 50, 0, 50, 50, 0, 50, 0, 50, 50, 50, 50, 50, 0, 0, 0, 45, 45, 45, 50, 45, 45, 45, - 22, 23, 24, 50, 50, 50, 50, 0, 2, 0, 0, 0, 0, 4, 0, 0, - 0, 50, 45, 50, 50, 0, 0, 0, 0, 32, 33, 0, 0, 0, 4, 0, - 34, 34, 4, 0, 35, 35, 35, 35, 36, 36, 0, 0, 37, 37, 37, 37, - 45, 45, 0, 0, 0, 45, 0, 45, 0, 43, 0, 0, 0, 38, 39, 0, - 40, 0, 0, 0, 0, 0, 39, 39, 39, 39, 0, 0, 39, 0, 50, 50, - 4, 0, 50, 50, 0, 0, 45, 0, 0, 0, 0, 2, 0, 4, 4, 0, - 0, 45, 0, 0, 4, 0, 0, 0, 0, 50, 0, 0, 0, 49, 0, 0, - 0, 46, 50, 45, 45, 0, 0, 0, 50, 0, 0, 45, 0, 0, 4, 4, - 0, 0, 2, 0, 50, 0, 1, 1, 1, 0, 0, 0, 50, 53, 42, 45, + 22, 23, 24, 50, 2, 0, 0, 0, 0, 4, 0, 0, 0, 50, 45, 50, + 50, 0, 0, 0, 0, 32, 33, 0, 0, 0, 4, 0, 34, 34, 4, 0, + 35, 35, 35, 35, 36, 36, 0, 0, 37, 37, 37, 37, 45, 45, 0, 0, + 0, 45, 0, 45, 0, 43, 0, 0, 0, 38, 39, 0, 40, 0, 0, 0, + 0, 0, 39, 39, 39, 39, 0, 0, 39, 0, 50, 50, 4, 0, 50, 50, + 0, 0, 45, 0, 0, 0, 0, 2, 0, 4, 4, 0, 0, 45, 0, 0, + 4, 0, 0, 0, 0, 50, 0, 0, 0, 49, 0, 0, 0, 46, 50, 45, + 45, 0, 0, 0, 50, 0, 0, 45, 0, 0, 4, 4, 0, 0, 2, 0, + 50, 50, 50, 0, 50, 0, 1, 1, 1, 0, 0, 0, 50, 53, 42, 45, 41, 50, 50, 50, 52, 45, 50, 45, 50, 50, 1, 1, 1, 1, 1, 50, 0, 1, 1, 50, 45, 50, 1, 1, 0, 0, 0, 4, 0, 0, 44, 49, 51, 46, 47, 47, 0, 3, 3, 0, 0, 0, 0, 45, 50, 0, 50, 50, 45, 0, 0, 50, 0, 0, 21, 0, 0, 45, 0, 50, 50, 1, 45, 0, - 0, 4, 2, 0, 0, 0, 4, 2, 0, 43, 43, 1, 1, 1, 0, 0, - 0, 48, 43, 43, 43, 43, 43, 0, 45, 45, 45, 0, + 0, 50, 45, 0, 0, 4, 2, 0, 0, 2, 4, 0, 0, 0, 4, 2, + 0, 0, 1, 0, 0, 43, 43, 1, 1, 1, 0, 0, 0, 48, 43, 43, + 43, 43, 43, 0, 45, 45, 45, 0, }; -/* Canonical_Combining_Class: 1828 bytes. */ +/* Canonical_Combining_Class: 2096 bytes. */ RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch) { RE_UINT32 code; @@ -9171,10 +9720,10 @@ RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch) { f = ch >> 13; code = ch ^ (f << 13); - pos = (RE_UINT32)re_canonical_combining_class_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_canonical_combining_class_stage_2[pos + f] << 3; + pos = (RE_UINT32)re_canonical_combining_class_stage_1[f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_canonical_combining_class_stage_2[pos + f] << 4; f = code >> 5; code ^= f << 5; pos = (RE_UINT32)re_canonical_combining_class_stage_3[pos + f] << 3; @@ -9207,20 +9756,20 @@ static RE_UINT8 re_decomposition_type_stage_2[] = { 30, 31, 32, 33, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 34, 7, 7, 7, 7, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 36, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 34, 35, 7, 7, 7, 36, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 38, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 35, 37, 38, 39, 40, 41, 42, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 37, 39, 40, 41, 42, 43, 44, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 43, 44, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 45, 46, 7, 47, 48, 49, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 45, 7, 7, 46, 47, 48, 49, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 50, 7, - 7, 51, 52, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 50, 7, 7, 51, 52, 53, 54, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 55, 7, + 7, 56, 57, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 35, 35, 53, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 37, 37, 58, 7, 7, 7, 7, 7, }; static RE_UINT8 re_decomposition_type_stage_3[] = { @@ -9258,25 +9807,30 @@ static RE_UINT8 re_decomposition_type_stage_3[] = { 0, 0, 0, 135, 102, 102, 102, 102, 136, 137, 0, 0, 0, 0, 0, 0, 102, 136, 102, 102, 138, 139, 116, 140, 116, 116, 116, 116, 141, 116, 116, 140, 142, 142, 142, 142, 142, 143, 102, 144, 142, 142, 142, 142, 142, 142, 102, 145, - 0, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 0, 0, 0, 147, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 147, 0, 0, 0, 0, 0, 0, 0, 148, + 0, 0, 0, 0, 0, 149, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 21, 0, 0, 0, 0, 0, - 81, 148, 149, 6, 6, 6, 81, 6, 6, 6, 6, 6, 6, 78, 0, 0, - 150, 151, 152, 153, 154, 155, 156, 156, 157, 156, 158, 159, 0, 160, 161, 162, - 163, 163, 163, 163, 163, 163, 164, 165, 165, 166, 167, 167, 167, 168, 169, 170, - 163, 171, 172, 173, 0, 174, 175, 176, 177, 178, 165, 179, 180, 0, 0, 181, - 0, 182, 0, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 192, 193, 194, - 195, 196, 196, 196, 196, 196, 197, 198, 198, 198, 198, 199, 200, 201, 202, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 203, 204, 0, 0, 0, 0, 0, - 0, 0, 205, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 205, 206, 0, 0, 0, 0, 207, 14, 0, 0, 0, - 208, 208, 208, 208, 208, 209, 208, 208, 208, 210, 211, 212, 213, 208, 208, 208, - 214, 215, 208, 216, 217, 218, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, - 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 219, 208, 208, 208, 208, 208, - 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 220, 208, 208, 208, - 213, 208, 221, 222, 223, 224, 225, 226, 227, 228, 229, 228, 0, 0, 0, 0, - 230, 102, 231, 142, 142, 0, 232, 0, 0, 233, 0, 0, 0, 0, 0, 0, - 234, 142, 142, 235, 236, 237, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 81, 150, 151, 6, 6, 6, 81, 6, 6, 6, 6, 6, 6, 78, 0, 0, + 152, 153, 154, 155, 156, 157, 158, 158, 159, 158, 160, 161, 0, 162, 163, 164, + 165, 165, 165, 165, 165, 165, 166, 167, 167, 168, 169, 169, 169, 170, 171, 172, + 165, 173, 174, 175, 0, 176, 177, 178, 179, 180, 167, 181, 182, 0, 0, 183, + 0, 184, 0, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 194, 195, 196, + 197, 198, 198, 198, 198, 198, 199, 200, 200, 200, 200, 201, 202, 203, 204, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 205, 206, 0, 0, 0, 0, 0, + 0, 0, 207, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 208, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 207, 209, 0, 0, 0, 0, 210, 14, 0, 0, 0, + 211, 211, 211, 211, 211, 212, 211, 211, 211, 213, 214, 215, 216, 211, 211, 211, + 217, 218, 211, 219, 220, 221, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, + 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 222, 211, 211, 211, 211, 211, + 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 223, 211, 211, 211, + 216, 211, 224, 225, 226, 227, 228, 229, 230, 231, 232, 231, 0, 0, 0, 0, + 233, 102, 234, 142, 142, 0, 235, 0, 0, 236, 0, 0, 0, 0, 0, 0, + 237, 142, 142, 238, 239, 240, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; @@ -9317,30 +9871,31 @@ static RE_UINT8 re_decomposition_type_stage_4[] = { 9, 22, 33, 12, 0, 19, 3, 78, 0, 37, 11, 79, 34, 20, 20, 20, 20, 20, 20, 30, 4, 24, 24, 24, 20, 73, 0, 0, 80, 73, 73, 73, 73, 73, 73, 75, 20, 20, 20, 81, 81, 81, 81, 81, 81, 81, 20, 20, - 82, 81, 81, 81, 20, 20, 20, 83, 25, 0, 0, 0, 0, 0, 55, 0, - 36, 10, 8, 11, 36, 33, 13, 8, 20, 30, 0, 0, 3, 20, 0, 46, - 59, 59, 84, 8, 8, 11, 8, 36, 9, 22, 8, 15, 85, 86, 86, 86, - 86, 86, 86, 86, 86, 85, 85, 85, 87, 85, 86, 86, 88, 0, 0, 0, - 89, 90, 91, 92, 85, 87, 86, 85, 85, 85, 93, 87, 94, 94, 94, 94, - 94, 95, 95, 95, 95, 95, 95, 95, 95, 96, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 98, 99, 99, 99, 99, 99, 100, 94, 94, 101, 95, 95, 95, - 95, 95, 95, 102, 97, 99, 99, 103, 104, 97, 105, 106, 107, 105, 108, 105, - 104, 96, 95, 105, 96, 109, 110, 97, 111, 106, 112, 105, 95, 106, 113, 95, - 96, 106, 0, 0, 94, 94, 94, 114, 115, 115, 116, 0, 115, 115, 115, 115, - 115, 117, 118, 20, 119, 120, 120, 120, 120, 119, 120, 0, 121, 122, 123, 123, - 124, 91, 125, 126, 90, 125, 127, 127, 127, 127, 126, 91, 125, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 126, 125, 126, 91, 128, 129, 130, 130, 130, - 130, 130, 130, 130, 131, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 133, - 134, 132, 134, 132, 134, 132, 134, 135, 130, 136, 132, 133, 0, 0, 27, 19, - 0, 0, 18, 0, 0, 0, 0, 13, 8, 19, 0, 0, 0, 0, 18, 8, - 59, 59, 59, 59, 59, 137, 59, 59, 59, 59, 59, 137, 138, 139, 61, 137, - 59, 59, 66, 61, 59, 61, 59, 59, 59, 66, 140, 61, 59, 137, 59, 137, - 59, 59, 66, 140, 59, 141, 142, 59, 137, 59, 59, 59, 59, 62, 59, 59, - 59, 59, 59, 142, 139, 143, 61, 59, 140, 59, 144, 0, 138, 145, 144, 61, - 139, 143, 144, 144, 139, 143, 140, 59, 140, 59, 61, 141, 59, 59, 66, 59, - 59, 59, 59, 0, 61, 61, 66, 59, 20, 20, 30, 0, 20, 20, 146, 75, - 0, 0, 4, 0, 147, 0, 0, 0, 148, 0, 0, 0, 81, 81, 148, 0, - 20, 20, 35, 0, 149, 0, 0, 0, + 82, 81, 81, 81, 20, 20, 20, 83, 0, 0, 0, 55, 25, 0, 0, 0, + 0, 0, 55, 0, 0, 0, 0, 24, 36, 10, 8, 11, 36, 33, 13, 8, + 20, 30, 0, 0, 3, 20, 0, 46, 59, 59, 84, 8, 8, 11, 8, 36, + 9, 22, 8, 15, 85, 86, 86, 86, 86, 86, 86, 86, 86, 85, 85, 85, + 87, 85, 86, 86, 88, 0, 0, 0, 89, 90, 91, 92, 85, 87, 86, 85, + 85, 85, 93, 87, 94, 94, 94, 94, 94, 95, 95, 95, 95, 95, 95, 95, + 95, 96, 97, 97, 97, 97, 97, 97, 97, 97, 97, 98, 99, 99, 99, 99, + 99, 100, 94, 94, 101, 95, 95, 95, 95, 95, 95, 102, 97, 99, 99, 103, + 104, 97, 105, 106, 107, 105, 108, 105, 104, 96, 95, 105, 96, 109, 110, 97, + 111, 106, 112, 105, 95, 106, 113, 95, 96, 106, 0, 0, 94, 94, 94, 114, + 115, 115, 116, 0, 115, 115, 115, 115, 115, 117, 118, 20, 119, 120, 120, 120, + 120, 119, 120, 0, 121, 122, 123, 123, 124, 91, 125, 126, 90, 125, 127, 127, + 127, 127, 126, 91, 125, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 126, + 125, 126, 91, 128, 129, 130, 130, 130, 130, 130, 130, 130, 131, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 133, 134, 132, 134, 132, 134, 132, 134, 135, + 130, 136, 132, 133, 0, 0, 27, 19, 0, 0, 18, 0, 0, 0, 0, 13, + 0, 0, 18, 36, 8, 19, 0, 0, 0, 0, 18, 8, 59, 59, 59, 59, + 59, 137, 59, 59, 59, 59, 59, 137, 138, 139, 61, 137, 59, 59, 66, 61, + 59, 61, 59, 59, 59, 66, 140, 61, 59, 137, 59, 137, 59, 59, 66, 140, + 59, 141, 142, 59, 137, 59, 59, 59, 59, 62, 59, 59, 59, 59, 59, 142, + 139, 143, 61, 59, 140, 59, 144, 0, 138, 145, 144, 61, 139, 143, 144, 144, + 139, 143, 140, 59, 140, 59, 61, 141, 59, 59, 66, 59, 59, 59, 59, 0, + 61, 61, 66, 59, 20, 20, 30, 0, 20, 20, 146, 75, 0, 0, 4, 0, + 147, 0, 0, 0, 148, 0, 0, 0, 81, 81, 148, 0, 20, 20, 35, 0, + 149, 0, 0, 0, }; static RE_UINT8 re_decomposition_type_stage_5[] = { @@ -9384,7 +9939,7 @@ static RE_UINT8 re_decomposition_type_stage_5[] = { 16, 16, 16, 0, 9, 9, 0, 0, }; -/* Decomposition_Type: 2872 bytes. */ +/* Decomposition_Type: 2964 bytes. */ RE_UINT32 re_get_decomposition_type(RE_UINT32 ch) { RE_UINT32 code; @@ -9559,20 +10114,22 @@ RE_UINT32 re_get_east_asian_width(RE_UINT32 ch) { /* Joining_Group. */ static RE_UINT8 re_joining_group_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, }; static RE_UINT8 re_joining_group_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; static RE_UINT8 re_joining_group_stage_3[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_joining_group_stage_4[] = { @@ -9580,7 +10137,8 @@ static RE_UINT8 re_joining_group_stage_4[] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0, 0, 21, 0, 22, 0, 0, 23, 24, 25, 26, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, - 0, 0, 0, 0, 34, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 34, 35, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 37, 38, 39, 40, 41, 42, 0, 0, }; static RE_UINT8 re_joining_group_stage_5[] = { @@ -9601,10 +10159,14 @@ static RE_UINT8 re_joining_group_stage_5[] = { 4, 4, 4, 4, 4, 4, 4, 13, 13, 6, 6, 31, 35, 1, 1, 1, 9, 9, 11, 11, 11, 24, 24, 26, 26, 26, 22, 31, 31, 35, 13, 13, 35, 31, 13, 3, 3, 55, 55, 45, 43, 43, 54, 54, 13, 35, 35, 19, - 4, 0, 13, 39, 9, 29, 22, 24, 45, 45, 31, 43, 57, 0, 0, 0, + 4, 4, 13, 39, 9, 29, 22, 24, 45, 45, 31, 43, 57, 0, 6, 33, + 11, 58, 31, 0, 0, 0, 0, 0, 59, 61, 61, 65, 65, 62, 0, 83, + 0, 85, 85, 0, 0, 66, 80, 84, 68, 68, 68, 69, 63, 81, 70, 71, + 77, 60, 60, 73, 73, 76, 74, 74, 74, 75, 0, 0, 78, 0, 0, 0, + 0, 0, 0, 72, 64, 79, 82, 67, }; -/* Joining_Group: 481 bytes. */ +/* Joining_Group: 586 bytes. */ RE_UINT32 re_get_joining_group(RE_UINT32 ch) { RE_UINT32 code; @@ -9612,12 +10174,12 @@ RE_UINT32 re_get_joining_group(RE_UINT32 ch) { RE_UINT32 pos; RE_UINT32 value; - f = ch >> 16; - code = ch ^ (f << 16); + f = ch >> 15; + code = ch ^ (f << 15); pos = (RE_UINT32)re_joining_group_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_joining_group_stage_2[pos + f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_joining_group_stage_2[pos + f] << 4; f = code >> 7; code ^= f << 7; pos = (RE_UINT32)re_joining_group_stage_3[pos + f] << 4; @@ -9632,15 +10194,15 @@ RE_UINT32 re_get_joining_group(RE_UINT32 ch) { /* Joining_Type. */ static RE_UINT8 re_joining_type_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 6, 2, 2, 7, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 6, 2, 7, 8, 9, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, }; static RE_UINT8 re_joining_type_stage_2[] = { @@ -9654,13 +10216,17 @@ static RE_UINT8 re_joining_type_stage_2[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 33, 1, 1, 34, 35, - 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, 1, 1, 1, 1, - 38, 39, 1, 1, 1, 1, 40, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41, + 1, 36, 37, 38, 1, 1, 1, 1, 1, 1, 39, 40, 1, 1, 1, 1, + 41, 42, 43, 44, 45, 46, 47, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48, 49, 1, 1, 1, 50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 42, 43, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 44, 45, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 51, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 52, 53, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 54, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 55, 56, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; @@ -9672,12 +10238,12 @@ static RE_UINT8 re_joining_type_stage_3[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 5, 6, 0, 0, 0, 0, 7, 8, 9, 10, 2, 11, 12, 13, 14, 15, 15, 16, 17, 18, 19, 20, 21, 22, 2, 23, 24, 25, 26, 0, 0, 27, 28, 29, 15, 30, 31, - 0, 32, 33, 0, 34, 35, 0, 0, 0, 0, 36, 0, 0, 0, 37, 38, + 0, 32, 33, 0, 34, 35, 0, 0, 0, 0, 36, 37, 0, 0, 38, 2, 39, 0, 0, 40, 41, 42, 43, 0, 44, 0, 0, 45, 46, 0, 43, 0, 47, 0, 0, 45, 48, 44, 0, 49, 47, 0, 0, 45, 50, 0, 43, 0, 44, 0, 0, 51, 46, 52, 43, 0, 53, 0, 0, 0, 54, 0, 0, 0, - 0, 0, 0, 55, 56, 57, 43, 0, 0, 0, 0, 51, 58, 0, 43, 0, - 0, 0, 0, 0, 46, 0, 43, 0, 0, 0, 0, 0, 59, 60, 0, 0, + 28, 0, 0, 55, 56, 57, 43, 0, 44, 0, 0, 51, 58, 0, 43, 0, + 44, 0, 0, 0, 46, 0, 43, 0, 0, 0, 0, 0, 59, 60, 0, 0, 0, 0, 0, 61, 62, 0, 0, 0, 0, 0, 0, 63, 64, 0, 0, 0, 0, 65, 0, 66, 0, 0, 0, 67, 68, 69, 2, 70, 52, 0, 0, 0, 0, 0, 71, 72, 0, 73, 28, 74, 75, 1, 0, 0, 0, 0, 0, 0, @@ -9685,30 +10251,41 @@ static RE_UINT8 re_joining_type_stage_3[] = { 0, 76, 0, 76, 0, 43, 0, 43, 0, 0, 0, 77, 78, 79, 0, 0, 80, 0, 15, 15, 15, 15, 15, 81, 82, 15, 83, 0, 0, 0, 0, 0, 0, 0, 84, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 86, 0, 0, 0, 87, 88, 89, 0, 0, 0, 0, 0, 0, 0, 0, - 90, 0, 0, 91, 53, 0, 92, 90, 93, 0, 94, 0, 0, 0, 95, 93, - 0, 0, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98, 99, 100, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 101, 96, - 102, 0, 103, 0, 0, 0, 104, 0, 0, 0, 0, 0, 0, 2, 2, 28, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 93, + 0, 86, 0, 0, 0, 87, 88, 89, 0, 0, 0, 90, 0, 0, 0, 0, + 91, 0, 0, 92, 53, 0, 93, 91, 94, 0, 95, 0, 0, 0, 96, 94, + 0, 0, 97, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 100, 101, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 102, + 103, 0, 104, 0, 0, 0, 105, 0, 0, 0, 0, 0, 0, 2, 2, 28, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 94, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 2, 2, - 0, 0, 105, 0, 0, 0, 0, 0, 0, 106, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 20, 107, 0, 20, 0, 0, 0, 0, 0, 93, - 108, 0, 57, 0, 15, 15, 15, 109, 0, 0, 0, 0, 100, 0, 2, 93, - 0, 0, 110, 0, 111, 93, 0, 0, 39, 0, 0, 112, 0, 0, 0, 0, - 0, 0, 113, 114, 115, 0, 0, 0, 0, 0, 0, 116, 44, 0, 117, 52, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, - 0, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, + 0, 0, 106, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 20, 108, 0, 20, 0, 0, 0, 0, 0, 94, + 109, 0, 57, 0, 15, 15, 15, 110, 0, 0, 0, 0, 111, 0, 2, 94, + 0, 0, 112, 0, 113, 94, 0, 0, 39, 0, 0, 114, 0, 0, 115, 0, + 0, 0, 116, 117, 118, 0, 0, 45, 0, 0, 0, 119, 44, 0, 120, 52, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 0, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 121, 0, 0, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 44, 0, 0, 123, 101, 0, 0, 0, 93, 0, 0, 124, 0, 0, 0, 0, - 39, 0, 125, 126, 0, 0, 0, 0, 93, 0, 0, 127, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 129, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, + 0, 0, 0, 0, 0, 0, 0, 125, 0, 0, 0, 0, 0, 0, 0, 0, + 126, 0, 0, 127, 0, 0, 0, 0, 0, 0, 0, 0, 128, 129, 130, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 131, 132, 133, 0, 0, 0, 0, 0, + 44, 0, 0, 134, 135, 0, 0, 20, 94, 0, 0, 136, 0, 0, 0, 0, + 39, 0, 137, 138, 0, 0, 0, 139, 94, 0, 0, 140, 0, 0, 0, 0, + 0, 0, 20, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 142, 0, + 44, 0, 0, 45, 28, 0, 143, 138, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144, 145, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 146, 28, 0, 0, 0, + 0, 0, 0, 147, 28, 0, 0, 0, 0, 0, 148, 149, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 138, + 0, 0, 0, 135, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 39, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 130, 131, 132, 0, 105, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 150, 91, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 151, 152, 153, 0, 106, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 135, 0, 0, 44, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, }; @@ -9723,7 +10300,7 @@ static RE_UINT8 re_joining_type_stage_4[] = { 15, 15, 32, 15, 15, 15, 13, 36, 24, 36, 22, 15, 0, 37, 2, 2, 9, 0, 0, 0, 0, 0, 18, 15, 15, 15, 38, 2, 2, 0, 39, 0, 0, 37, 6, 2, 2, 5, 5, 4, 36, 33, 12, 13, 15, 40, 5, 0, - 41, 15, 25, 42, 0, 2, 2, 2, 2, 2, 2, 8, 8, 0, 0, 0, + 15, 15, 25, 41, 42, 0, 0, 0, 0, 2, 2, 2, 8, 0, 0, 0, 0, 0, 43, 9, 5, 2, 9, 1, 5, 2, 0, 0, 37, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 9, 5, 9, 0, 1, 7, 0, 0, 0, 7, 3, 27, 4, 4, 1, 0, 0, 5, 6, 9, 1, 0, 0, 0, 27, @@ -9736,18 +10313,23 @@ static RE_UINT8 re_joining_type_stage_4[] = { 37, 9, 0, 0, 0, 6, 2, 4, 0, 43, 5, 2, 2, 0, 0, 1, 0, 46, 47, 4, 15, 15, 0, 0, 0, 46, 15, 15, 15, 15, 48, 0, 8, 3, 9, 0, 43, 0, 5, 0, 0, 3, 27, 0, 0, 43, 2, 8, - 44, 5, 2, 9, 3, 2, 2, 27, 2, 0, 0, 0, 0, 28, 8, 9, - 0, 0, 3, 2, 4, 0, 0, 0, 37, 4, 6, 0, 0, 43, 4, 45, - 0, 0, 0, 2, 2, 37, 0, 0, 8, 2, 2, 2, 28, 2, 9, 1, - 0, 9, 0, 0, 2, 8, 0, 0, 0, 0, 3, 49, 0, 0, 37, 8, - 2, 9, 37, 2, 0, 0, 37, 4, 0, 0, 7, 0, 8, 2, 2, 4, - 43, 43, 3, 0, 50, 0, 0, 0, 0, 37, 2, 4, 0, 3, 2, 2, - 3, 37, 4, 9, 0, 0, 5, 8, 7, 7, 0, 0, 3, 0, 0, 9, - 28, 27, 9, 37, 0, 0, 0, 4, 0, 1, 9, 1, 0, 0, 0, 43, - 0, 0, 5, 0, 5, 7, 0, 2, 0, 0, 8, 3, 0, 0, 2, 2, - 3, 8, 7, 1, 0, 3, 2, 5, 2, 9, 0, 0, 0, 37, 2, 8, - 0, 0, 3, 1, 2, 6, 0, 0, 0, 3, 4, 0, 3, 2, 2, 2, - 8, 5, 2, 0, + 44, 5, 2, 9, 3, 2, 2, 27, 2, 2, 2, 8, 2, 0, 0, 0, + 0, 28, 8, 9, 0, 0, 3, 2, 4, 0, 0, 0, 37, 4, 6, 4, + 0, 43, 4, 45, 0, 0, 0, 2, 2, 37, 0, 0, 8, 2, 2, 2, + 28, 2, 9, 1, 0, 9, 4, 0, 2, 4, 0, 2, 0, 0, 3, 49, + 0, 0, 37, 8, 2, 9, 37, 2, 0, 0, 37, 4, 0, 0, 7, 0, + 8, 2, 2, 4, 43, 43, 3, 0, 50, 0, 0, 0, 0, 9, 0, 0, + 0, 37, 2, 4, 0, 3, 2, 2, 3, 37, 4, 9, 0, 1, 0, 0, + 0, 0, 5, 8, 7, 7, 0, 0, 3, 0, 0, 9, 28, 27, 9, 37, + 0, 0, 0, 4, 0, 1, 9, 1, 0, 0, 0, 43, 2, 2, 2, 4, + 0, 0, 5, 0, 0, 37, 8, 0, 5, 7, 0, 2, 0, 0, 8, 3, + 15, 51, 52, 53, 14, 54, 15, 12, 55, 56, 46, 13, 24, 22, 12, 57, + 55, 0, 0, 0, 0, 0, 20, 58, 0, 0, 2, 2, 2, 8, 0, 0, + 3, 8, 7, 1, 0, 3, 2, 5, 2, 9, 0, 0, 3, 0, 0, 0, + 0, 37, 2, 8, 4, 28, 0, 0, 3, 2, 8, 0, 0, 37, 2, 9, + 3, 2, 44, 3, 28, 0, 0, 0, 37, 4, 0, 6, 3, 2, 8, 45, + 0, 0, 3, 1, 2, 6, 0, 0, 0, 0, 0, 7, 0, 3, 4, 0, + 3, 2, 2, 2, 8, 5, 2, 0, }; static RE_UINT8 re_joining_type_stage_5[] = { @@ -9761,12 +10343,14 @@ static RE_UINT8 re_joining_type_stage_5[] = { 5, 0, 5, 5, 5, 5, 3, 3, 2, 0, 0, 2, 3, 5, 2, 2, 2, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 2, 0, 3, 2, 2, 3, 2, 2, 2, 0, 0, 5, 5, 2, 2, 2, 5, 0, 0, 1, 0, - 3, 2, 0, 0, 2, 0, 2, 2, 3, 0, 0, 0, 0, 0, 5, 0, + 3, 2, 0, 0, 3, 0, 3, 2, 2, 3, 3, 0, 0, 0, 5, 0, 5, 0, 5, 0, 0, 5, 0, 5, 0, 0, 0, 2, 0, 0, 1, 5, - 2, 5, 2, 0, 0, 1, 5, 5, 2, 2, 4, 0, + 2, 5, 2, 0, 0, 1, 5, 5, 2, 2, 4, 0, 2, 3, 0, 3, + 0, 3, 3, 0, 0, 4, 3, 3, 2, 2, 2, 4, 2, 3, 0, 0, + 3, 5, 5, 0, 3, 2, 3, 3, 3, 2, 2, 0, }; -/* Joining_Type: 1896 bytes. */ +/* Joining_Type: 2252 bytes. */ RE_UINT32 re_get_joining_type(RE_UINT32 ch) { RE_UINT32 code; @@ -9834,124 +10418,135 @@ static RE_UINT8 re_line_break_stage_2[] = { 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 108, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 79, 79, 79, 79, 110, 111, 2, 2, 112, 113, 114, 115, 116, 117, - 118, 119, 120, 121, 72, 122, 123, 124, 2, 125, 72, 72, 72, 72, 72, 72, - 126, 72, 127, 128, 129, 72, 130, 72, 131, 72, 72, 72, 132, 72, 72, 72, - 133, 134, 135, 136, 72, 72, 72, 72, 72, 72, 72, 72, 72, 137, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 2, 2, 2, 2, 2, 2, 138, 72, 139, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 2, 2, 2, 2, 140, 141, 142, 2, 143, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 2, 2, 2, 2, 144, 72, 72, 72, 72, 72, 72, 72, 72, 72, 145, 146, - 147, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 2, 148, 149, 150, 151, 72, 152, 72, 153, 154, 155, 2, 2, 156, 2, 157, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 158, 159, 72, 72, - 160, 161, 162, 163, 164, 72, 165, 166, 167, 168, 169, 170, 171, 172, 173, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 79, 79, 79, 79, 111, 112, 2, 2, 113, 114, 115, 116, 117, 118, + 119, 120, 121, 122, 110, 123, 124, 125, 2, 126, 127, 110, 2, 2, 128, 110, + 129, 130, 131, 132, 133, 134, 135, 136, 137, 110, 110, 110, 138, 110, 110, 110, + 139, 140, 141, 142, 143, 144, 145, 110, 110, 146, 110, 147, 148, 149, 110, 110, + 110, 150, 110, 110, 110, 151, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 2, 2, 2, 2, 2, 2, 2, 152, 153, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 2, 2, 2, 2, 154, 155, 156, 2, 157, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 2, 2, 2, 2, 158, 159, 160, 161, 110, 110, 110, 110, 110, 110, 162, 163, + 164, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, 165, 166, 110, 110, 110, 110, 110, 110, + 2, 167, 168, 169, 170, 110, 171, 110, 172, 173, 174, 2, 2, 175, 2, 176, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 2, 177, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 178, 179, 110, 110, + 180, 181, 182, 183, 184, 110, 185, 186, 79, 187, 188, 189, 190, 191, 192, 193, + 194, 195, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 174, - 175, 72, 176, 177, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 196, + 197, 110, 198, 199, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, }; static RE_UINT16 re_line_break_stage_3[] = { 0, 1, 2, 3, 4, 5, 4, 6, 7, 1, 8, 9, 4, 10, 4, 10, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11, 12, 4, 4, 1, 1, 1, 1, 13, 14, 15, 16, 17, 4, 18, 4, 4, 4, 4, 4, - 19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 21, 4, 22, 21, 4, - 23, 24, 1, 25, 26, 27, 28, 29, 30, 31, 4, 4, 32, 1, 33, 34, - 4, 4, 4, 4, 4, 35, 36, 37, 38, 39, 4, 1, 40, 4, 4, 4, - 4, 4, 41, 42, 37, 4, 32, 43, 4, 44, 45, 46, 4, 47, 48, 48, - 48, 48, 49, 48, 48, 48, 50, 51, 52, 4, 4, 53, 1, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 58, 59, 66, 67, 68, 69, 70, - 71, 18, 59, 72, 73, 74, 63, 75, 57, 58, 59, 72, 76, 77, 63, 20, - 78, 79, 80, 81, 82, 83, 69, 84, 85, 86, 59, 87, 88, 89, 63, 90, - 91, 86, 59, 92, 88, 93, 63, 94, 91, 86, 4, 95, 96, 97, 63, 98, - 99, 100, 4, 101, 102, 103, 48, 104, 105, 106, 106, 107, 108, 109, 48, 48, - 110, 111, 112, 113, 114, 115, 48, 48, 116, 117, 37, 118, 56, 4, 119, 120, - 121, 122, 1, 123, 124, 125, 48, 48, 106, 106, 106, 106, 126, 106, 106, 106, - 106, 127, 4, 4, 128, 4, 4, 4, 129, 129, 129, 129, 129, 129, 130, 130, + 19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 4, 21, 20, 4, + 22, 23, 1, 24, 25, 26, 27, 28, 29, 30, 4, 4, 31, 1, 32, 33, + 4, 4, 4, 4, 4, 34, 35, 36, 37, 38, 4, 1, 39, 4, 4, 4, + 4, 4, 40, 41, 36, 4, 31, 42, 4, 43, 44, 45, 4, 46, 47, 47, + 47, 47, 4, 48, 47, 47, 49, 1, 50, 4, 4, 51, 1, 52, 53, 4, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 55, 56, 63, 64, 65, 66, 67, + 68, 18, 56, 69, 70, 71, 60, 72, 73, 55, 56, 69, 74, 75, 60, 76, + 77, 78, 79, 80, 81, 82, 66, 83, 84, 85, 56, 86, 87, 88, 60, 89, + 90, 85, 56, 91, 87, 92, 60, 93, 90, 85, 4, 94, 95, 96, 60, 97, + 98, 99, 4, 100, 101, 102, 66, 103, 104, 105, 105, 106, 107, 108, 47, 47, + 109, 110, 111, 112, 113, 114, 47, 47, 115, 116, 36, 117, 118, 4, 119, 120, + 121, 122, 1, 123, 124, 125, 47, 47, 105, 105, 105, 105, 126, 105, 105, 105, + 105, 127, 4, 4, 128, 4, 4, 4, 129, 129, 129, 129, 129, 129, 130, 130, 130, 130, 131, 132, 132, 132, 132, 132, 4, 4, 4, 4, 133, 134, 4, 4, 133, 4, 4, 135, 136, 137, 4, 4, 4, 136, 4, 4, 4, 138, 139, 119, 4, 140, 4, 4, 4, 4, 4, 141, 142, 4, 4, 4, 4, 4, 4, 4, - 142, 143, 4, 4, 4, 4, 144, 74, 145, 146, 4, 147, 4, 148, 145, 149, - 106, 106, 106, 106, 106, 150, 151, 140, 152, 151, 4, 4, 4, 4, 4, 20, - 4, 4, 153, 4, 4, 4, 4, 154, 4, 119, 155, 155, 156, 106, 157, 158, - 106, 106, 159, 106, 160, 161, 4, 4, 4, 162, 106, 106, 106, 163, 106, 164, - 151, 151, 157, 48, 48, 48, 48, 48, 165, 4, 4, 166, 167, 168, 169, 170, - 171, 4, 172, 37, 4, 4, 41, 173, 4, 4, 166, 174, 175, 37, 4, 176, - 48, 48, 48, 48, 20, 177, 178, 179, 4, 4, 4, 4, 1, 1, 180, 181, - 4, 182, 4, 4, 182, 183, 4, 184, 4, 4, 4, 185, 185, 186, 4, 187, - 188, 189, 190, 191, 192, 193, 194, 195, 196, 119, 197, 198, 199, 1, 1, 200, - 201, 202, 203, 4, 4, 204, 205, 206, 207, 206, 4, 4, 4, 208, 4, 4, - 209, 210, 211, 212, 213, 214, 215, 4, 216, 217, 218, 219, 4, 4, 4, 4, - 4, 220, 221, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 222, - 4, 4, 223, 48, 224, 48, 225, 225, 225, 225, 225, 225, 225, 225, 225, 226, - 225, 225, 225, 225, 205, 225, 225, 227, 225, 228, 229, 230, 231, 232, 233, 4, - 234, 235, 4, 236, 237, 4, 238, 239, 4, 240, 4, 241, 242, 243, 244, 245, - 246, 4, 4, 4, 4, 247, 248, 249, 225, 250, 4, 4, 251, 4, 252, 4, - 253, 254, 4, 4, 4, 255, 4, 256, 4, 4, 4, 4, 119, 257, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 4, 4, 46, 4, 4, 46, 4, 4, - 4, 4, 4, 4, 4, 4, 258, 259, 4, 4, 128, 4, 4, 4, 260, 261, - 4, 223, 262, 262, 262, 262, 1, 1, 263, 264, 265, 266, 48, 48, 48, 48, - 267, 268, 267, 267, 267, 267, 267, 222, 267, 267, 267, 267, 267, 267, 267, 267, - 267, 267, 267, 267, 267, 269, 48, 270, 271, 272, 273, 274, 275, 267, 276, 267, - 277, 278, 279, 267, 276, 267, 277, 280, 281, 267, 282, 283, 267, 267, 267, 267, - 284, 267, 267, 285, 267, 267, 222, 286, 267, 284, 267, 267, 287, 267, 267, 267, - 267, 267, 267, 267, 267, 267, 267, 284, 267, 267, 267, 267, 4, 4, 4, 4, - 267, 288, 267, 267, 267, 267, 267, 267, 289, 267, 267, 267, 290, 4, 4, 176, - 291, 4, 292, 48, 4, 4, 258, 293, 4, 294, 4, 4, 4, 4, 4, 295, - 46, 296, 224, 48, 48, 48, 48, 90, 297, 4, 298, 299, 4, 4, 4, 300, - 301, 4, 4, 166, 302, 151, 1, 303, 37, 4, 304, 4, 305, 306, 129, 307, - 52, 4, 4, 308, 309, 310, 48, 48, 4, 4, 311, 180, 312, 313, 106, 159, - 106, 106, 106, 106, 314, 315, 32, 316, 317, 318, 262, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 4, 4, 319, 151, 320, 321, 322, 323, 322, 324, 322, 320, - 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, - 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, - 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, - 323, 322, 325, 130, 326, 132, 132, 327, 328, 328, 328, 328, 328, 328, 328, 328, - 223, 329, 330, 331, 332, 4, 4, 4, 4, 4, 4, 4, 333, 334, 4, 4, - 4, 4, 4, 335, 48, 4, 4, 4, 4, 336, 4, 4, 20, 48, 48, 337, - 1, 338, 180, 339, 340, 341, 342, 185, 4, 4, 4, 4, 4, 4, 4, 343, - 344, 345, 267, 346, 267, 347, 348, 349, 4, 350, 4, 46, 351, 352, 353, 354, - 355, 4, 137, 356, 184, 184, 48, 48, 4, 4, 4, 4, 4, 4, 4, 224, - 357, 4, 4, 358, 4, 4, 4, 4, 224, 359, 48, 48, 48, 4, 4, 360, - 4, 119, 4, 4, 4, 74, 48, 48, 4, 46, 296, 4, 224, 48, 48, 48, - 4, 361, 4, 4, 362, 363, 48, 48, 4, 184, 151, 48, 48, 48, 48, 48, - 364, 4, 4, 365, 4, 366, 48, 48, 4, 367, 4, 368, 48, 48, 48, 48, - 4, 4, 4, 369, 48, 48, 48, 48, 370, 371, 4, 372, 20, 373, 4, 4, - 4, 4, 4, 374, 4, 375, 4, 376, 4, 4, 4, 4, 377, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 4, 46, 171, 4, 4, 378, 379, 336, 380, 48, - 171, 4, 4, 381, 382, 4, 377, 151, 171, 4, 305, 383, 384, 48, 48, 48, - 171, 4, 4, 308, 385, 151, 48, 48, 4, 4, 32, 386, 151, 48, 48, 48, - 4, 4, 4, 4, 4, 4, 46, 48, 4, 4, 4, 4, 4, 4, 387, 384, - 4, 4, 4, 4, 4, 388, 4, 4, 389, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 390, 4, 4, 46, 48, 48, 48, 48, 48, - 4, 4, 4, 377, 48, 48, 48, 48, 4, 4, 4, 4, 141, 391, 1, 51, - 392, 171, 48, 48, 48, 48, 48, 48, 393, 48, 48, 48, 48, 48, 48, 48, - 4, 4, 4, 4, 4, 4, 4, 154, 4, 4, 22, 4, 4, 4, 394, 1, - 395, 4, 396, 4, 4, 184, 48, 48, 4, 4, 4, 4, 397, 48, 48, 48, - 4, 4, 4, 4, 4, 223, 4, 333, 4, 4, 4, 4, 4, 185, 4, 4, - 4, 145, 398, 399, 400, 4, 4, 4, 401, 402, 4, 403, 404, 86, 4, 4, - 4, 4, 375, 4, 4, 4, 4, 4, 4, 4, 4, 4, 405, 406, 406, 406, - 400, 4, 407, 408, 409, 410, 411, 412, 413, 359, 414, 359, 48, 48, 48, 333, - 267, 267, 270, 267, 267, 267, 267, 267, 267, 222, 284, 415, 283, 283, 48, 48, - 416, 225, 417, 225, 225, 225, 418, 225, 225, 416, 48, 48, 48, 48, 419, 420, - 421, 267, 267, 285, 422, 393, 48, 48, 267, 267, 423, 424, 267, 267, 267, 289, - 267, 222, 267, 425, 426, 48, 267, 423, 267, 267, 267, 284, 427, 267, 267, 267, - 267, 267, 428, 429, 267, 267, 267, 430, 431, 432, 433, 434, 296, 267, 435, 48, - 48, 48, 48, 48, 48, 48, 48, 436, 267, 267, 267, 267, 437, 48, 48, 48, - 267, 267, 267, 267, 269, 48, 48, 48, 4, 4, 4, 4, 4, 4, 4, 296, - 267, 267, 267, 267, 267, 267, 267, 282, 438, 48, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48, + 142, 143, 4, 4, 4, 4, 144, 145, 146, 147, 4, 148, 4, 149, 146, 150, + 105, 105, 105, 105, 105, 151, 152, 140, 153, 152, 4, 4, 4, 4, 4, 76, + 4, 4, 154, 4, 4, 4, 4, 155, 4, 45, 156, 156, 157, 105, 158, 159, + 105, 105, 160, 105, 161, 162, 4, 4, 4, 163, 105, 105, 105, 164, 105, 165, + 152, 152, 158, 166, 47, 47, 47, 47, 167, 4, 4, 168, 169, 170, 171, 172, + 173, 4, 174, 36, 4, 4, 40, 175, 4, 4, 168, 176, 177, 36, 4, 178, + 47, 47, 47, 47, 76, 179, 180, 181, 4, 4, 4, 4, 1, 1, 1, 182, + 4, 183, 4, 4, 183, 184, 4, 185, 4, 4, 4, 186, 186, 187, 4, 188, + 189, 190, 191, 192, 193, 194, 195, 196, 197, 119, 198, 199, 200, 1, 1, 201, + 202, 203, 204, 4, 4, 205, 206, 207, 208, 207, 4, 4, 4, 209, 4, 4, + 210, 211, 212, 213, 214, 215, 216, 4, 217, 218, 219, 220, 4, 4, 4, 4, + 221, 222, 223, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 224, + 4, 4, 225, 47, 226, 47, 227, 227, 227, 227, 227, 227, 227, 227, 227, 228, + 227, 227, 227, 227, 206, 227, 227, 229, 227, 230, 231, 232, 233, 234, 235, 4, + 236, 237, 4, 238, 239, 4, 240, 241, 4, 242, 4, 243, 244, 245, 246, 247, + 248, 4, 4, 4, 4, 249, 250, 251, 227, 252, 4, 4, 253, 4, 254, 4, + 255, 256, 4, 4, 4, 221, 4, 257, 4, 4, 4, 4, 4, 258, 4, 259, + 4, 260, 4, 261, 56, 262, 47, 47, 4, 4, 45, 4, 4, 45, 4, 4, + 4, 4, 4, 4, 4, 4, 263, 264, 4, 4, 128, 4, 4, 4, 265, 266, + 4, 225, 267, 267, 267, 267, 1, 1, 268, 269, 270, 271, 272, 47, 47, 47, + 273, 274, 273, 273, 273, 273, 273, 275, 273, 273, 273, 273, 273, 273, 273, 273, + 273, 273, 273, 273, 273, 276, 47, 277, 278, 279, 280, 281, 282, 273, 283, 273, + 284, 285, 286, 273, 283, 273, 284, 287, 288, 273, 289, 290, 273, 273, 273, 273, + 291, 273, 273, 292, 273, 273, 275, 293, 273, 291, 273, 273, 294, 273, 273, 273, + 273, 273, 273, 273, 273, 273, 273, 291, 273, 273, 273, 273, 4, 4, 4, 4, + 273, 295, 273, 273, 273, 273, 273, 273, 296, 273, 273, 273, 297, 4, 4, 178, + 298, 4, 299, 47, 4, 4, 263, 300, 4, 301, 4, 4, 4, 4, 4, 302, + 45, 4, 185, 262, 47, 47, 47, 303, 304, 4, 305, 306, 4, 4, 4, 307, + 308, 4, 4, 168, 309, 152, 1, 310, 36, 4, 311, 4, 312, 313, 129, 314, + 50, 4, 4, 315, 316, 317, 105, 318, 4, 4, 319, 320, 321, 322, 105, 105, + 105, 105, 105, 105, 323, 324, 31, 325, 326, 327, 267, 4, 4, 4, 328, 47, + 47, 47, 47, 47, 4, 4, 329, 152, 330, 331, 332, 333, 332, 334, 332, 330, + 331, 332, 333, 332, 334, 332, 330, 331, 332, 333, 332, 334, 332, 330, 331, 332, + 333, 332, 334, 332, 330, 331, 332, 333, 332, 334, 332, 330, 331, 332, 333, 332, + 334, 332, 330, 331, 332, 333, 332, 334, 332, 330, 331, 332, 333, 332, 334, 332, + 333, 332, 335, 130, 336, 132, 132, 337, 338, 338, 338, 338, 338, 338, 338, 338, + 47, 47, 47, 47, 47, 47, 47, 47, 225, 339, 340, 341, 342, 4, 4, 4, + 4, 4, 4, 4, 262, 343, 4, 4, 4, 4, 4, 344, 47, 4, 4, 4, + 4, 345, 4, 4, 76, 47, 47, 346, 1, 347, 348, 349, 350, 351, 352, 186, + 4, 4, 4, 4, 4, 4, 4, 353, 354, 355, 273, 356, 273, 357, 358, 359, + 4, 360, 4, 45, 361, 362, 363, 364, 365, 4, 137, 366, 185, 185, 47, 47, + 4, 4, 4, 4, 4, 4, 4, 226, 367, 4, 4, 368, 4, 4, 4, 4, + 119, 369, 71, 47, 47, 4, 4, 370, 4, 119, 4, 4, 4, 71, 33, 369, + 4, 4, 371, 4, 226, 4, 4, 372, 4, 373, 4, 4, 374, 375, 47, 47, + 4, 185, 152, 47, 47, 47, 47, 47, 4, 4, 76, 4, 4, 4, 376, 47, + 4, 4, 4, 225, 4, 155, 76, 47, 377, 4, 4, 378, 4, 379, 4, 4, + 4, 45, 303, 47, 47, 47, 47, 47, 4, 380, 4, 381, 47, 47, 47, 47, + 4, 4, 4, 382, 47, 47, 47, 47, 383, 384, 4, 385, 76, 386, 4, 4, + 4, 4, 47, 47, 4, 4, 387, 388, 4, 4, 4, 389, 4, 260, 4, 390, + 4, 391, 392, 47, 47, 47, 47, 47, 4, 4, 4, 4, 145, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 4, 45, 173, 4, 4, 393, 394, 345, 395, 396, + 173, 4, 4, 397, 398, 4, 145, 152, 173, 4, 312, 399, 400, 4, 4, 401, + 173, 4, 4, 315, 402, 403, 20, 141, 4, 18, 404, 405, 47, 47, 47, 47, + 47, 47, 47, 4, 4, 263, 406, 152, 73, 55, 56, 69, 74, 407, 408, 409, + 4, 4, 4, 1, 410, 152, 47, 47, 4, 4, 263, 411, 412, 47, 47, 47, + 4, 4, 4, 1, 413, 152, 47, 47, 4, 4, 31, 414, 152, 47, 47, 47, + 47, 47, 4, 4, 4, 4, 36, 415, 47, 47, 47, 47, 4, 4, 4, 145, + 4, 145, 47, 47, 47, 47, 47, 47, 4, 4, 4, 4, 4, 4, 45, 416, + 4, 4, 4, 4, 4, 417, 4, 4, 418, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 419, 4, 4, 45, 47, 47, 47, 47, 47, + 4, 4, 4, 145, 4, 45, 420, 47, 47, 47, 47, 47, 47, 4, 185, 421, + 4, 4, 4, 422, 423, 424, 18, 425, 4, 47, 47, 47, 47, 47, 47, 47, + 4, 4, 4, 4, 141, 426, 1, 166, 396, 173, 47, 47, 47, 47, 47, 47, + 427, 47, 47, 47, 47, 47, 47, 47, 4, 4, 4, 4, 4, 4, 226, 119, + 145, 428, 429, 47, 47, 47, 47, 47, 4, 4, 4, 4, 4, 4, 4, 155, + 4, 4, 21, 4, 4, 4, 430, 1, 431, 4, 432, 4, 4, 185, 47, 47, + 4, 4, 4, 4, 433, 47, 47, 47, 4, 4, 4, 4, 4, 225, 4, 262, + 4, 4, 4, 4, 4, 186, 4, 4, 4, 146, 434, 435, 436, 4, 4, 4, + 437, 438, 4, 439, 440, 85, 4, 4, 4, 4, 260, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 441, 442, 442, 442, 4, 4, 4, 4, 443, 320, 47, 47, + 436, 4, 444, 445, 446, 447, 448, 449, 450, 369, 451, 369, 47, 47, 47, 262, + 273, 273, 277, 273, 273, 273, 273, 273, 273, 275, 291, 290, 290, 290, 273, 276, + 452, 227, 453, 227, 227, 227, 454, 227, 227, 455, 47, 47, 47, 47, 456, 457, + 458, 273, 273, 292, 459, 427, 47, 47, 273, 273, 296, 273, 273, 273, 273, 289, + 273, 460, 273, 461, 291, 462, 273, 463, 273, 273, 464, 465, 273, 273, 273, 291, + 466, 467, 468, 469, 470, 273, 273, 274, 273, 273, 471, 273, 273, 472, 273, 473, + 273, 273, 273, 273, 474, 4, 4, 475, 273, 273, 273, 273, 273, 47, 296, 275, + 4, 4, 4, 4, 4, 4, 4, 371, 4, 4, 4, 4, 4, 141, 47, 47, + 369, 4, 4, 4, 76, 140, 4, 4, 76, 4, 185, 47, 47, 47, 47, 47, + 273, 273, 273, 273, 273, 273, 273, 289, 476, 47, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47, }; static RE_UINT8 re_line_break_stage_4[] = { @@ -9965,108 +10560,109 @@ static RE_UINT8 re_line_break_stage_4[] = { 0, 0, 0, 0, 0, 0, 34, 34, 34, 35, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 36, 14, 14, 37, 36, 36, 14, 14, 14, 38, 38, 14, 14, 39, 14, 14, 14, 14, 14, 14, 14, 19, 0, 0, 0, 14, 14, 14, - 14, 14, 14, 14, 36, 36, 36, 36, 39, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 38, 39, 14, 14, 14, 14, 14, 14, 14, 40, 41, 36, 42, - 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, - 19, 45, 0, 46, 36, 36, 36, 36, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 48, 36, 36, 47, 49, 38, 36, 36, 36, 36, 36, - 14, 14, 38, 14, 50, 51, 13, 14, 0, 0, 0, 0, 0, 52, 53, 54, - 14, 14, 14, 14, 14, 19, 0, 0, 12, 12, 12, 12, 12, 55, 56, 14, - 45, 14, 14, 14, 14, 14, 14, 14, 14, 14, 57, 0, 0, 0, 45, 19, - 0, 0, 45, 19, 45, 0, 0, 14, 12, 12, 12, 12, 12, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 39, 19, 14, 14, 14, 14, 14, 14, 14, - 0, 0, 0, 0, 0, 53, 39, 14, 14, 14, 14, 0, 0, 0, 0, 0, - 45, 36, 36, 36, 36, 36, 36, 36, 0, 0, 14, 14, 58, 38, 36, 36, - 14, 14, 14, 0, 0, 19, 0, 0, 0, 0, 19, 0, 19, 0, 0, 36, - 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, 14, 19, 0, 36, 38, - 36, 36, 36, 36, 36, 36, 36, 36, 38, 14, 14, 14, 14, 14, 38, 36, - 36, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, - 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 45, 0, - 19, 0, 0, 0, 14, 14, 14, 14, 14, 0, 59, 12, 12, 12, 12, 12, - 14, 14, 14, 14, 39, 14, 14, 14, 43, 0, 39, 14, 14, 14, 38, 39, - 38, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, - 38, 38, 36, 14, 14, 36, 45, 0, 0, 0, 53, 43, 53, 43, 0, 38, - 36, 36, 36, 43, 36, 36, 14, 39, 14, 0, 36, 12, 12, 12, 12, 12, - 14, 51, 14, 14, 50, 9, 36, 36, 43, 0, 39, 14, 14, 38, 36, 39, - 38, 14, 39, 38, 14, 36, 53, 0, 0, 53, 36, 43, 53, 43, 0, 36, - 43, 36, 36, 36, 39, 14, 38, 38, 36, 36, 36, 12, 12, 12, 12, 12, - 0, 14, 19, 36, 36, 36, 36, 36, 43, 0, 39, 14, 14, 14, 14, 39, - 38, 14, 39, 14, 14, 36, 45, 0, 0, 0, 0, 43, 0, 43, 0, 36, - 38, 36, 36, 36, 36, 36, 36, 36, 9, 36, 36, 36, 36, 36, 36, 36, - 0, 0, 53, 43, 53, 43, 0, 36, 36, 36, 36, 0, 36, 36, 14, 39, - 36, 45, 39, 14, 14, 38, 36, 14, 38, 14, 14, 36, 39, 38, 38, 14, - 36, 39, 38, 36, 14, 38, 36, 14, 14, 14, 14, 14, 14, 36, 36, 0, - 0, 53, 36, 0, 53, 0, 0, 36, 38, 36, 36, 43, 36, 36, 36, 36, - 14, 14, 14, 14, 9, 38, 36, 36, 43, 0, 39, 14, 14, 14, 38, 14, - 38, 14, 14, 14, 14, 14, 14, 14, 14, 14, 39, 14, 14, 36, 39, 0, - 0, 0, 53, 0, 53, 0, 0, 36, 36, 36, 43, 53, 14, 36, 36, 36, - 36, 36, 36, 36, 14, 14, 14, 14, 36, 0, 39, 14, 14, 14, 38, 14, - 14, 14, 39, 14, 14, 36, 45, 0, 36, 36, 43, 53, 36, 36, 36, 38, - 39, 38, 36, 36, 36, 36, 36, 36, 14, 14, 14, 14, 14, 38, 39, 0, - 0, 0, 53, 0, 53, 0, 0, 38, 36, 36, 36, 43, 36, 36, 36, 36, - 14, 14, 14, 36, 60, 14, 14, 14, 36, 0, 39, 14, 14, 14, 14, 14, - 14, 14, 14, 38, 36, 14, 14, 14, 14, 39, 14, 14, 14, 14, 39, 36, - 14, 14, 14, 38, 36, 53, 36, 43, 0, 0, 53, 53, 0, 0, 0, 0, - 36, 0, 38, 36, 36, 36, 36, 36, 61, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 36, 42, - 62, 62, 62, 62, 62, 62, 62, 64, 12, 12, 12, 12, 12, 59, 36, 36, - 61, 63, 63, 61, 63, 63, 61, 36, 36, 36, 62, 62, 61, 62, 62, 62, - 61, 62, 61, 61, 36, 62, 61, 62, 62, 62, 62, 62, 62, 61, 62, 36, - 62, 62, 63, 63, 62, 62, 62, 36, 12, 12, 12, 12, 12, 36, 62, 62, - 32, 65, 29, 65, 66, 67, 68, 54, 54, 69, 57, 14, 0, 14, 14, 14, - 14, 14, 44, 19, 19, 70, 70, 0, 14, 14, 14, 14, 14, 14, 38, 36, - 43, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 14, 14, 19, 0, - 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 59, - 14, 14, 14, 45, 14, 14, 38, 14, 65, 71, 14, 14, 72, 73, 36, 36, - 12, 12, 12, 12, 12, 59, 14, 14, 12, 12, 12, 12, 12, 62, 62, 62, + 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 38, 39, 14, 14, 14, + 14, 14, 14, 14, 40, 41, 39, 9, 42, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 43, 19, 44, 0, 45, 36, 36, 36, 36, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 36, 36, + 46, 48, 38, 36, 36, 36, 36, 36, 14, 14, 14, 14, 49, 50, 13, 14, + 0, 0, 0, 0, 0, 51, 52, 53, 14, 14, 14, 14, 14, 19, 0, 0, + 12, 12, 12, 12, 12, 54, 55, 14, 44, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 56, 0, 0, 0, 44, 19, 0, 0, 44, 19, 44, 0, 0, 14, + 12, 12, 12, 12, 12, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 39, + 19, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 52, 39, 14, + 14, 14, 14, 0, 0, 0, 0, 0, 44, 36, 36, 36, 36, 36, 36, 36, + 0, 0, 14, 14, 57, 38, 36, 36, 14, 14, 14, 0, 0, 19, 0, 0, + 0, 0, 19, 0, 19, 0, 0, 36, 14, 14, 14, 14, 14, 14, 14, 38, + 14, 14, 14, 14, 19, 0, 36, 38, 36, 36, 36, 36, 36, 36, 36, 36, + 14, 38, 36, 36, 36, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, + 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 44, 0, + 19, 0, 0, 0, 14, 14, 14, 14, 14, 0, 58, 12, 12, 12, 12, 12, + 19, 0, 39, 14, 14, 14, 38, 39, 38, 39, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 38, 14, 14, 14, 38, 38, 36, 14, 14, 36, 44, 0, + 0, 0, 52, 42, 52, 42, 0, 38, 36, 36, 36, 42, 36, 36, 14, 39, + 14, 0, 36, 12, 12, 12, 12, 12, 14, 50, 14, 14, 49, 9, 36, 36, + 42, 0, 39, 14, 14, 38, 36, 39, 38, 14, 39, 38, 14, 36, 52, 0, + 0, 52, 36, 42, 52, 42, 0, 36, 42, 36, 36, 36, 39, 14, 38, 38, + 36, 36, 36, 12, 12, 12, 12, 12, 0, 14, 19, 36, 36, 36, 36, 36, + 42, 0, 39, 14, 14, 14, 14, 39, 38, 14, 39, 14, 14, 36, 44, 0, + 0, 0, 0, 42, 0, 42, 0, 36, 38, 36, 36, 36, 36, 36, 36, 36, + 9, 36, 36, 36, 36, 36, 36, 36, 42, 0, 39, 14, 14, 14, 38, 39, + 0, 0, 52, 42, 52, 42, 0, 36, 36, 36, 36, 0, 36, 36, 14, 39, + 14, 14, 14, 14, 36, 36, 36, 36, 36, 44, 39, 14, 14, 38, 36, 14, + 38, 14, 14, 36, 39, 38, 38, 14, 36, 39, 38, 36, 14, 38, 36, 14, + 14, 14, 14, 14, 14, 36, 36, 0, 0, 52, 36, 0, 52, 0, 0, 36, + 38, 36, 36, 42, 36, 36, 36, 36, 14, 14, 14, 14, 9, 38, 36, 36, + 0, 0, 39, 14, 14, 14, 38, 14, 38, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 36, 39, 0, 0, 0, 52, 0, 52, 0, 0, 36, + 36, 36, 42, 52, 14, 36, 36, 36, 36, 36, 36, 36, 14, 14, 14, 14, + 42, 0, 39, 14, 14, 14, 38, 14, 14, 14, 39, 14, 14, 36, 44, 0, + 36, 36, 42, 52, 36, 36, 36, 38, 39, 38, 36, 36, 36, 36, 36, 36, + 14, 14, 14, 14, 14, 38, 39, 0, 0, 0, 52, 0, 52, 0, 0, 38, + 36, 36, 36, 42, 36, 36, 36, 36, 14, 14, 14, 36, 59, 14, 14, 14, + 36, 0, 39, 14, 14, 14, 14, 14, 14, 14, 14, 38, 36, 14, 14, 14, + 14, 39, 14, 14, 14, 14, 39, 36, 14, 14, 14, 38, 36, 52, 36, 42, + 0, 0, 52, 52, 0, 0, 0, 0, 36, 0, 38, 36, 36, 36, 36, 36, + 60, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 62, 36, 63, 61, 61, 61, 61, 61, 61, 61, 64, + 12, 12, 12, 12, 12, 58, 36, 36, 60, 62, 62, 60, 62, 62, 60, 36, + 36, 36, 61, 61, 60, 61, 61, 61, 60, 61, 60, 60, 36, 61, 60, 61, + 61, 61, 61, 61, 61, 60, 61, 36, 61, 61, 62, 62, 61, 61, 61, 36, + 12, 12, 12, 12, 12, 36, 61, 61, 32, 65, 29, 65, 66, 67, 68, 53, + 53, 69, 56, 14, 0, 14, 14, 14, 14, 14, 43, 19, 19, 70, 70, 0, + 14, 14, 14, 14, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 38, 36, + 42, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 14, 14, 19, 0, + 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 58, + 14, 14, 14, 44, 14, 14, 38, 14, 65, 71, 14, 14, 72, 73, 36, 36, + 12, 12, 12, 12, 12, 58, 14, 14, 12, 12, 12, 12, 12, 61, 61, 61, 14, 14, 14, 39, 36, 36, 39, 36, 74, 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 14, 14, 14, 14, 38, 14, 14, 36, 14, 14, 14, 38, 38, 14, 14, 36, 38, 14, 14, 36, 14, 14, 14, 38, 38, 14, 14, 36, 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 38, 43, 0, 27, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 38, 42, 0, 27, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 36, 36, 14, 14, 38, 36, 36, 36, 36, 36, 77, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16, 78, 36, - 14, 14, 14, 14, 14, 27, 59, 14, 14, 14, 14, 14, 14, 14, 38, 14, - 14, 0, 53, 36, 36, 36, 36, 36, 14, 0, 1, 41, 36, 36, 36, 36, - 14, 0, 36, 36, 36, 36, 36, 36, 38, 0, 36, 36, 36, 36, 36, 36, - 62, 62, 59, 79, 77, 80, 62, 36, 12, 12, 12, 12, 12, 36, 36, 36, - 14, 54, 59, 29, 54, 19, 0, 73, 14, 14, 14, 14, 19, 38, 36, 36, - 14, 14, 14, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, 36, 36, - 38, 36, 54, 12, 12, 12, 12, 12, 62, 62, 62, 62, 62, 62, 62, 36, - 62, 62, 63, 36, 36, 36, 36, 36, 62, 62, 62, 62, 62, 62, 36, 36, - 62, 62, 62, 62, 62, 36, 36, 36, 12, 12, 12, 12, 12, 63, 36, 62, - 14, 14, 14, 19, 0, 0, 36, 14, 62, 62, 62, 62, 62, 62, 62, 63, - 62, 62, 62, 62, 62, 62, 63, 43, 0, 0, 45, 14, 14, 14, 14, 14, - 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 45, 14, 14, 14, 36, 36, - 12, 12, 12, 12, 12, 59, 27, 59, 77, 14, 14, 14, 14, 19, 0, 0, - 0, 0, 14, 14, 14, 14, 38, 36, 0, 45, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 27, 58, 14, 14, 14, 14, 14, 38, 36, 36, 36, + 14, 14, 14, 14, 14, 14, 38, 14, 14, 0, 52, 36, 36, 36, 36, 36, + 14, 0, 1, 41, 36, 36, 36, 36, 14, 0, 36, 36, 36, 36, 36, 36, + 38, 0, 36, 36, 36, 36, 36, 36, 61, 61, 58, 79, 77, 80, 61, 36, + 12, 12, 12, 12, 12, 36, 36, 36, 14, 53, 58, 29, 53, 19, 0, 73, + 14, 14, 14, 14, 19, 38, 36, 36, 14, 14, 14, 36, 36, 36, 36, 36, + 0, 0, 0, 0, 0, 0, 36, 36, 38, 36, 53, 12, 12, 12, 12, 12, + 61, 61, 61, 61, 61, 61, 61, 36, 61, 61, 62, 36, 36, 36, 36, 36, + 61, 61, 61, 61, 61, 61, 36, 36, 61, 61, 61, 61, 61, 36, 36, 36, + 12, 12, 12, 12, 12, 62, 36, 61, 14, 14, 14, 19, 0, 0, 36, 14, + 61, 61, 61, 61, 61, 61, 61, 62, 61, 61, 61, 61, 61, 61, 62, 42, + 0, 0, 0, 0, 0, 0, 0, 52, 0, 0, 44, 14, 14, 14, 14, 14, + 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 44, 14, 14, 14, 36, 36, + 12, 12, 12, 12, 12, 58, 27, 58, 77, 14, 14, 14, 14, 19, 0, 0, + 0, 0, 14, 14, 14, 14, 38, 36, 0, 44, 14, 14, 14, 14, 14, 14, 19, 0, 0, 0, 0, 0, 0, 14, 0, 0, 36, 36, 36, 36, 14, 14, - 0, 0, 0, 0, 36, 81, 59, 59, 12, 12, 12, 12, 12, 36, 39, 14, - 14, 14, 14, 14, 14, 14, 14, 59, 0, 45, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 45, 14, 19, 14, 14, 0, 45, 38, 36, 36, 36, 36, - 0, 0, 0, 53, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 0, - 14, 14, 14, 36, 14, 14, 14, 36, 14, 14, 14, 14, 39, 39, 39, 39, - 14, 14, 14, 14, 14, 14, 14, 36, 14, 14, 38, 14, 14, 14, 14, 14, - 14, 14, 36, 14, 14, 14, 39, 14, 36, 14, 38, 14, 14, 14, 32, 38, - 59, 59, 59, 82, 59, 83, 0, 0, 82, 59, 84, 25, 85, 86, 85, 86, - 28, 14, 87, 88, 89, 0, 0, 33, 51, 51, 51, 51, 7, 90, 91, 14, - 14, 14, 92, 93, 91, 14, 14, 14, 14, 14, 14, 77, 59, 59, 27, 59, - 94, 14, 38, 0, 0, 0, 0, 0, 14, 36, 25, 14, 14, 14, 16, 95, - 24, 28, 25, 14, 14, 14, 16, 78, 23, 23, 23, 6, 23, 23, 23, 23, - 23, 23, 23, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 53, 36, 36, 36, 36, 36, 36, 36, 14, 50, 24, 14, 50, 14, 14, 14, - 14, 24, 14, 96, 14, 14, 14, 14, 24, 25, 14, 14, 14, 24, 14, 14, - 14, 14, 28, 14, 14, 24, 14, 25, 28, 28, 28, 28, 28, 28, 14, 14, - 28, 28, 28, 28, 28, 14, 14, 14, 14, 14, 14, 14, 24, 36, 36, 36, - 14, 25, 25, 14, 14, 14, 14, 14, 25, 28, 14, 24, 25, 24, 14, 24, - 24, 23, 24, 14, 14, 25, 24, 28, 25, 24, 24, 24, 28, 28, 25, 25, - 14, 14, 28, 28, 14, 14, 28, 14, 14, 14, 14, 14, 25, 14, 25, 14, - 14, 25, 14, 14, 14, 14, 14, 14, 28, 14, 28, 28, 14, 28, 14, 28, - 14, 28, 14, 28, 14, 14, 14, 14, 14, 14, 24, 14, 24, 14, 14, 14, - 14, 14, 24, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 24, + 0, 0, 0, 0, 36, 81, 58, 58, 12, 12, 12, 12, 12, 36, 39, 14, + 14, 14, 14, 14, 14, 14, 14, 58, 0, 44, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 44, 14, 19, 14, 14, 0, 44, 38, 0, 36, 36, 36, + 0, 0, 0, 36, 36, 36, 0, 0, 14, 14, 14, 36, 14, 14, 14, 36, + 14, 14, 14, 14, 39, 39, 39, 39, 14, 14, 14, 14, 14, 14, 14, 36, + 14, 14, 38, 14, 14, 14, 14, 14, 14, 14, 36, 14, 14, 14, 39, 14, + 36, 14, 38, 14, 14, 14, 32, 38, 58, 58, 58, 82, 58, 83, 0, 0, + 82, 58, 84, 25, 85, 86, 85, 86, 28, 14, 87, 88, 89, 0, 0, 33, + 50, 50, 50, 50, 7, 90, 91, 14, 14, 14, 92, 93, 91, 14, 14, 14, + 14, 14, 14, 77, 58, 58, 27, 58, 94, 14, 38, 0, 0, 0, 0, 0, + 14, 36, 25, 14, 14, 14, 16, 95, 24, 28, 25, 14, 14, 14, 16, 78, + 23, 23, 23, 6, 23, 23, 23, 23, 23, 23, 23, 22, 23, 6, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 52, 36, 36, 36, 36, 36, 36, 36, + 14, 49, 24, 14, 49, 14, 14, 14, 14, 24, 14, 96, 14, 14, 14, 14, + 24, 25, 14, 14, 14, 24, 14, 14, 14, 14, 28, 14, 14, 24, 14, 25, + 28, 28, 28, 28, 28, 28, 14, 14, 28, 28, 28, 28, 28, 14, 14, 14, + 14, 14, 14, 14, 24, 36, 36, 36, 14, 25, 25, 14, 14, 14, 14, 14, + 25, 28, 14, 24, 25, 24, 14, 24, 24, 23, 24, 14, 14, 25, 24, 28, + 25, 24, 24, 24, 28, 28, 25, 25, 14, 14, 28, 28, 14, 14, 28, 14, + 14, 14, 14, 14, 25, 14, 25, 14, 14, 25, 14, 14, 14, 14, 14, 14, + 28, 14, 28, 28, 14, 28, 14, 28, 14, 28, 14, 28, 14, 14, 14, 14, + 14, 14, 24, 14, 24, 14, 14, 14, 14, 14, 24, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 24, 14, 14, 14, 14, 70, 70, 14, 14, 14, 25, 14, 14, 14, 97, 14, 14, 14, 14, 14, 14, 16, 98, 14, 14, - 97, 97, 36, 36, 36, 36, 36, 36, 14, 14, 14, 38, 36, 36, 36, 36, + 97, 97, 14, 14, 14, 38, 36, 36, 14, 14, 14, 38, 36, 36, 36, 36, 14, 14, 14, 14, 14, 38, 36, 36, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 25, 28, 28, 25, 14, 14, 14, 14, 14, 14, 28, 28, 14, 14, 14, 14, 14, 28, 24, 28, 28, 28, 14, 14, 14, @@ -10078,103 +10674,121 @@ static RE_UINT8 re_line_break_stage_4[] = { 14, 14, 14, 14, 14, 14, 14, 28, 14, 14, 14, 14, 14, 14, 100, 97, 97, 97, 97, 97, 101, 28, 102, 100, 97, 102, 101, 28, 97, 28, 101, 102, 97, 24, 14, 14, 28, 101, 28, 28, 102, 97, 97, 102, 97, 101, 102, 97, - 103, 97, 99, 14, 97, 97, 97, 14, 14, 14, 14, 24, 14, 7, 85, 5, - 14, 54, 14, 14, 70, 70, 70, 70, 70, 70, 70, 28, 28, 28, 28, 28, + 97, 97, 99, 14, 97, 97, 97, 14, 14, 14, 14, 24, 14, 7, 85, 85, + 5, 53, 14, 14, 70, 70, 70, 70, 70, 70, 70, 28, 28, 28, 28, 28, 28, 28, 14, 14, 14, 14, 14, 14, 14, 14, 16, 98, 14, 14, 14, 14, - 14, 14, 14, 70, 70, 70, 70, 70, 14, 16, 104, 104, 104, 104, 104, 104, - 104, 104, 104, 104, 98, 14, 14, 14, 14, 14, 14, 14, 70, 70, 14, 14, - 14, 14, 14, 14, 14, 14, 70, 14, 14, 14, 24, 28, 28, 36, 36, 36, - 14, 14, 14, 14, 14, 14, 14, 19, 0, 14, 36, 36, 105, 59, 77, 106, - 14, 14, 14, 14, 36, 36, 36, 39, 41, 36, 36, 36, 36, 36, 36, 43, - 14, 14, 14, 38, 14, 14, 14, 38, 85, 85, 85, 85, 85, 85, 85, 59, - 59, 59, 59, 27, 107, 14, 85, 14, 85, 70, 70, 70, 70, 59, 59, 57, - 59, 27, 77, 14, 14, 108, 36, 36, 97, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 103, 97, 97, 97, 97, 97, 36, 36, 36, 36, 36, - 97, 97, 97, 97, 97, 97, 36, 36, 18, 109, 110, 97, 70, 70, 70, 70, - 70, 97, 70, 70, 70, 70, 111, 112, 97, 97, 97, 97, 97, 0, 0, 0, - 97, 97, 113, 97, 97, 110, 114, 97, 115, 116, 116, 116, 116, 97, 97, 97, - 97, 116, 97, 97, 97, 97, 97, 97, 97, 116, 116, 116, 97, 97, 97, 117, - 97, 97, 116, 118, 43, 119, 91, 114, 120, 116, 116, 116, 116, 97, 97, 97, - 97, 97, 116, 117, 97, 110, 121, 114, 36, 36, 103, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 36, 103, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 122, 97, 97, 97, 97, 97, 122, 36, 36, - 123, 123, 123, 123, 123, 123, 123, 123, 97, 97, 97, 97, 28, 28, 28, 28, - 97, 97, 110, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 122, 36, - 97, 97, 97, 122, 36, 36, 36, 36, 14, 14, 14, 14, 14, 14, 27, 106, - 12, 12, 12, 12, 12, 14, 36, 36, 0, 45, 0, 0, 0, 0, 0, 14, - 14, 14, 14, 14, 36, 36, 36, 43, 0, 27, 59, 59, 36, 36, 36, 36, - 14, 14, 36, 36, 36, 36, 36, 36, 14, 45, 14, 45, 14, 19, 14, 14, - 14, 19, 0, 0, 14, 14, 36, 36, 14, 14, 14, 14, 124, 36, 36, 36, - 14, 14, 65, 54, 36, 36, 36, 36, 0, 14, 14, 14, 14, 14, 14, 14, - 0, 0, 53, 36, 36, 36, 36, 59, 0, 14, 14, 14, 14, 14, 36, 36, - 14, 14, 14, 0, 0, 0, 0, 59, 14, 14, 14, 19, 0, 0, 0, 0, - 0, 0, 36, 36, 36, 36, 36, 39, 74, 74, 74, 74, 74, 74, 125, 36, - 14, 19, 0, 0, 0, 0, 0, 0, 45, 14, 14, 27, 59, 14, 14, 39, - 12, 12, 12, 12, 12, 36, 36, 14, 14, 14, 14, 14, 19, 0, 0, 0, - 14, 19, 14, 14, 14, 14, 0, 36, 12, 12, 12, 12, 12, 36, 27, 59, - 62, 63, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 62, 62, - 59, 14, 19, 53, 36, 36, 36, 36, 39, 14, 14, 38, 39, 14, 14, 38, - 39, 14, 14, 38, 36, 36, 36, 36, 14, 19, 0, 0, 0, 1, 0, 36, - 126, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 126, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 126, 127, 127, 127, - 127, 127, 126, 127, 127, 127, 127, 127, 127, 127, 36, 36, 36, 36, 36, 36, - 75, 75, 75, 128, 36, 129, 76, 76, 76, 76, 76, 76, 76, 76, 36, 36, - 130, 130, 130, 130, 130, 130, 130, 130, 36, 39, 14, 14, 36, 36, 131, 132, - 47, 47, 47, 47, 49, 47, 47, 47, 47, 47, 47, 48, 47, 47, 48, 48, - 47, 131, 48, 47, 47, 47, 47, 47, 14, 36, 36, 36, 36, 36, 36, 36, - 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 70, - 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 124, 36, - 133, 134, 58, 135, 136, 36, 36, 36, 97, 97, 137, 104, 104, 104, 104, 104, - 104, 104, 109, 137, 109, 97, 97, 97, 109, 78, 91, 54, 137, 104, 104, 109, - 97, 97, 97, 122, 138, 139, 36, 36, 14, 14, 14, 14, 14, 14, 38, 140, - 105, 97, 6, 97, 70, 97, 109, 109, 97, 97, 97, 97, 97, 91, 97, 141, - 97, 97, 97, 97, 97, 137, 142, 97, 97, 97, 97, 97, 97, 137, 142, 137, - 112, 70, 93, 143, 123, 123, 123, 123, 144, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 70, 70, 70, 70, 70, 14, 16, 103, 103, 103, 103, 103, 103, + 103, 103, 103, 103, 98, 14, 14, 14, 14, 14, 14, 14, 14, 14, 70, 14, + 14, 14, 24, 28, 28, 14, 14, 14, 14, 14, 36, 14, 14, 14, 14, 14, + 14, 14, 14, 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 39, 14, + 14, 36, 36, 36, 36, 36, 36, 36, 14, 14, 14, 14, 14, 14, 14, 19, + 0, 14, 36, 36, 104, 58, 77, 105, 14, 14, 14, 14, 36, 36, 36, 39, + 41, 36, 36, 36, 36, 36, 36, 42, 14, 14, 14, 38, 14, 14, 14, 38, + 85, 85, 85, 85, 85, 85, 85, 58, 58, 58, 58, 27, 106, 14, 85, 14, + 85, 70, 70, 70, 70, 58, 58, 56, 58, 27, 77, 14, 14, 107, 58, 77, + 58, 108, 36, 36, 36, 36, 36, 36, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 109, 97, 97, 97, 97, 36, 36, 36, 36, 36, 36, + 97, 97, 97, 36, 36, 36, 36, 36, 97, 97, 97, 97, 97, 97, 36, 36, + 18, 110, 111, 97, 70, 70, 70, 70, 70, 97, 70, 70, 70, 70, 112, 113, + 97, 97, 97, 97, 97, 0, 0, 0, 97, 97, 114, 97, 97, 111, 115, 97, + 116, 117, 117, 117, 117, 97, 97, 97, 97, 117, 97, 97, 97, 97, 97, 97, + 97, 117, 117, 117, 97, 97, 97, 118, 97, 97, 117, 119, 42, 120, 91, 115, + 121, 117, 117, 117, 117, 97, 97, 97, 97, 97, 117, 118, 97, 111, 122, 115, + 36, 36, 109, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 36, + 109, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 123, + 97, 97, 97, 97, 97, 123, 36, 36, 124, 124, 124, 124, 124, 124, 124, 124, + 97, 97, 97, 97, 28, 28, 28, 28, 97, 97, 111, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 123, 36, 97, 97, 97, 123, 36, 36, 36, 36, + 14, 14, 14, 14, 14, 14, 27, 105, 12, 12, 12, 12, 12, 14, 36, 36, + 0, 44, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 42, + 0, 27, 58, 58, 36, 36, 36, 36, 36, 36, 36, 39, 14, 14, 14, 14, + 14, 44, 14, 44, 14, 19, 14, 14, 14, 19, 0, 0, 14, 14, 36, 36, + 14, 14, 14, 14, 125, 36, 36, 36, 14, 14, 65, 53, 36, 36, 36, 36, + 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 52, 36, 36, 36, 36, 58, + 0, 14, 14, 14, 14, 14, 36, 36, 14, 14, 14, 0, 0, 0, 0, 58, + 14, 14, 14, 19, 0, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 39, + 74, 74, 74, 74, 74, 74, 126, 36, 14, 19, 0, 0, 0, 0, 0, 0, + 44, 14, 14, 27, 58, 14, 14, 39, 12, 12, 12, 12, 12, 36, 36, 14, + 12, 12, 12, 12, 12, 61, 61, 62, 14, 14, 14, 14, 19, 0, 0, 0, + 0, 0, 0, 52, 36, 36, 36, 36, 14, 19, 14, 14, 14, 14, 0, 36, + 12, 12, 12, 12, 12, 36, 27, 58, 61, 62, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 60, 61, 61, 58, 14, 19, 52, 36, 36, 36, 36, + 39, 14, 14, 38, 39, 14, 14, 38, 39, 14, 14, 38, 36, 36, 36, 36, + 36, 36, 14, 36, 36, 36, 36, 36, 14, 19, 0, 0, 0, 1, 0, 36, + 127, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 127, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 127, 128, 128, 128, + 128, 128, 127, 128, 128, 128, 128, 128, 128, 128, 36, 36, 36, 36, 36, 36, + 75, 75, 75, 129, 36, 130, 76, 76, 76, 76, 76, 76, 76, 76, 36, 36, + 131, 131, 131, 131, 131, 131, 131, 131, 36, 39, 14, 14, 36, 36, 132, 133, + 46, 46, 46, 46, 48, 46, 46, 46, 46, 46, 46, 47, 46, 46, 47, 47, + 46, 132, 47, 46, 46, 46, 46, 46, 36, 39, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 103, 36, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 125, 36, 134, 135, 57, 136, 137, 36, 36, 36, + 0, 0, 0, 0, 0, 0, 0, 36, 97, 97, 138, 103, 103, 103, 103, 103, + 103, 103, 110, 138, 110, 97, 97, 97, 110, 78, 91, 53, 138, 103, 103, 110, + 97, 97, 97, 123, 139, 140, 36, 36, 14, 14, 14, 14, 14, 14, 38, 141, + 104, 97, 6, 97, 70, 97, 110, 110, 97, 97, 97, 97, 97, 91, 97, 142, + 97, 97, 97, 97, 97, 138, 143, 97, 97, 97, 97, 97, 97, 138, 143, 138, + 113, 70, 93, 144, 124, 124, 124, 124, 145, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 91, 36, 14, 14, 14, 36, 14, 14, 14, - 36, 14, 14, 14, 36, 14, 38, 36, 22, 97, 138, 145, 14, 14, 14, 38, - 36, 36, 36, 36, 43, 0, 146, 36, 14, 14, 14, 14, 14, 14, 39, 14, - 14, 14, 14, 14, 14, 38, 14, 39, 59, 41, 36, 39, 14, 14, 14, 14, + 36, 14, 14, 14, 36, 14, 38, 36, 22, 97, 139, 146, 14, 14, 14, 38, + 36, 36, 36, 36, 42, 0, 147, 36, 14, 14, 14, 14, 14, 14, 39, 14, + 14, 14, 14, 14, 14, 38, 14, 39, 58, 41, 36, 39, 14, 14, 14, 14, 14, 14, 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 36, - 14, 14, 14, 14, 14, 14, 19, 36, 14, 14, 14, 14, 14, 14, 14, 81, + 14, 14, 14, 14, 14, 14, 19, 36, 14, 14, 36, 36, 36, 36, 36, 36, + 14, 14, 14, 0, 0, 52, 36, 36, 14, 14, 14, 14, 14, 14, 14, 81, 14, 14, 36, 36, 14, 14, 14, 14, 77, 14, 14, 36, 36, 36, 36, 36, - 14, 14, 14, 36, 38, 14, 14, 14, 14, 14, 14, 39, 38, 36, 38, 39, - 14, 14, 14, 81, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 81, - 14, 14, 14, 14, 14, 36, 36, 39, 14, 14, 14, 14, 36, 36, 36, 14, - 19, 0, 43, 53, 36, 36, 0, 0, 14, 14, 39, 14, 39, 14, 14, 14, - 14, 14, 36, 36, 0, 53, 36, 43, 59, 59, 59, 59, 38, 36, 36, 36, - 14, 14, 14, 36, 81, 59, 59, 59, 14, 14, 14, 36, 14, 14, 14, 14, - 14, 38, 36, 36, 14, 14, 14, 14, 14, 14, 14, 14, 38, 36, 36, 36, - 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 1, 77, 14, 14, 36, - 14, 14, 14, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 45, 14, 59, - 59, 36, 36, 36, 36, 36, 36, 36, 0, 0, 53, 12, 12, 12, 12, 12, - 59, 59, 36, 36, 36, 36, 36, 36, 45, 14, 27, 77, 41, 36, 36, 36, - 0, 0, 0, 0, 36, 36, 36, 36, 14, 38, 36, 36, 36, 36, 36, 36, - 14, 14, 14, 14, 147, 70, 112, 14, 14, 98, 14, 70, 70, 14, 14, 14, - 14, 14, 14, 14, 16, 112, 14, 14, 19, 0, 0, 0, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 36, 36, 43, 97, 36, 36, 36, 36, 36, 36, 36, - 14, 14, 19, 0, 0, 14, 19, 0, 0, 45, 19, 0, 0, 0, 14, 14, - 14, 14, 14, 14, 14, 0, 0, 14, 14, 0, 45, 36, 36, 36, 36, 36, + 14, 14, 36, 36, 36, 36, 36, 39, 14, 14, 14, 36, 38, 14, 14, 14, + 14, 14, 14, 39, 38, 36, 38, 39, 14, 14, 14, 81, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 36, 81, 14, 14, 14, 14, 14, 36, 36, 39, + 14, 14, 14, 14, 36, 36, 36, 14, 19, 0, 42, 52, 36, 36, 0, 0, + 14, 14, 39, 14, 39, 14, 14, 14, 14, 14, 36, 36, 0, 52, 36, 42, + 58, 58, 58, 58, 38, 36, 36, 36, 14, 14, 19, 52, 36, 39, 14, 14, + 58, 58, 58, 148, 36, 36, 36, 36, 14, 14, 14, 36, 81, 58, 58, 58, + 14, 38, 36, 36, 14, 14, 14, 14, 14, 36, 36, 36, 39, 14, 38, 36, + 36, 36, 36, 36, 39, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, + 0, 0, 0, 1, 77, 14, 14, 36, 14, 14, 14, 12, 12, 12, 12, 12, + 36, 36, 36, 36, 36, 36, 36, 42, 0, 0, 0, 0, 0, 44, 14, 58, + 58, 36, 36, 36, 36, 36, 36, 36, 0, 0, 52, 12, 12, 12, 12, 12, + 58, 58, 36, 36, 36, 36, 36, 36, 14, 19, 32, 38, 36, 36, 36, 36, + 44, 14, 27, 77, 41, 36, 39, 36, 12, 12, 12, 12, 12, 38, 36, 36, + 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 58, 27, 77, 36, + 0, 0, 0, 0, 0, 52, 36, 36, 36, 36, 36, 42, 36, 36, 39, 14, + 14, 0, 36, 0, 0, 0, 52, 36, 0, 0, 52, 36, 36, 36, 36, 36, + 0, 0, 14, 14, 36, 36, 36, 36, 0, 0, 0, 36, 0, 0, 0, 0, + 149, 58, 53, 14, 27, 36, 36, 36, 1, 77, 38, 36, 36, 36, 36, 36, + 0, 0, 0, 0, 36, 36, 36, 36, 14, 38, 36, 36, 36, 36, 36, 39, + 58, 58, 41, 36, 36, 36, 36, 36, 14, 14, 14, 14, 150, 70, 113, 14, + 14, 98, 14, 70, 70, 14, 14, 14, 14, 14, 14, 14, 16, 113, 14, 14, + 12, 12, 12, 12, 12, 36, 36, 58, 0, 0, 1, 36, 36, 36, 36, 36, + 0, 0, 0, 1, 58, 14, 14, 14, 14, 14, 77, 36, 36, 36, 36, 36, + 12, 12, 12, 12, 12, 39, 14, 14, 14, 14, 14, 14, 36, 36, 39, 14, + 19, 0, 0, 0, 0, 0, 0, 0, 97, 36, 36, 36, 36, 36, 36, 36, + 14, 14, 14, 14, 14, 36, 19, 1, 0, 0, 36, 36, 36, 36, 36, 36, + 14, 14, 19, 0, 0, 14, 19, 0, 0, 44, 19, 0, 0, 0, 14, 14, + 14, 14, 14, 14, 14, 0, 0, 14, 14, 0, 44, 36, 36, 36, 36, 36, 36, 38, 39, 38, 39, 14, 38, 14, 14, 14, 14, 14, 14, 39, 39, 14, 14, 14, 39, 14, 14, 14, 14, 14, 14, 14, 14, 39, 14, 38, 39, 14, 14, 14, 38, 14, 14, 14, 38, 14, 14, 14, 14, 14, 14, 39, 14, 38, 14, 14, 38, 38, 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 39, 38, 38, 39, 39, 14, 14, 14, - 14, 38, 14, 14, 39, 39, 36, 36, 36, 38, 36, 39, 39, 39, 39, 14, - 39, 38, 38, 39, 39, 39, 39, 39, 39, 38, 38, 39, 14, 38, 14, 14, - 14, 38, 14, 14, 39, 14, 38, 38, 14, 14, 14, 14, 14, 39, 14, 14, - 39, 14, 39, 14, 14, 39, 14, 14, 103, 97, 97, 97, 97, 97, 97, 122, - 28, 28, 28, 28, 28, 148, 36, 36, 28, 28, 28, 28, 28, 28, 28, 38, - 28, 28, 28, 28, 28, 14, 36, 36, 36, 36, 36, 149, 149, 149, 149, 149, - 149, 149, 149, 149, 149, 149, 149, 149, 97, 122, 36, 36, 36, 36, 36, 36, - 97, 97, 97, 97, 122, 36, 36, 36, 122, 36, 36, 36, 36, 36, 36, 36, - 97, 97, 97, 103, 97, 97, 97, 97, 97, 97, 99, 100, 97, 97, 100, 97, - 97, 97, 122, 97, 97, 122, 36, 36, 122, 97, 97, 97, 97, 97, 97, 97, + 12, 12, 12, 12, 12, 12, 12, 12, 14, 14, 38, 39, 14, 14, 14, 14, + 39, 38, 38, 39, 39, 14, 14, 14, 14, 38, 14, 14, 39, 39, 36, 36, + 36, 38, 36, 39, 39, 39, 39, 14, 39, 38, 38, 39, 39, 39, 39, 39, + 39, 38, 38, 39, 14, 38, 14, 14, 14, 38, 14, 14, 39, 14, 38, 38, + 14, 14, 14, 14, 14, 39, 14, 14, 39, 14, 39, 14, 14, 39, 14, 14, + 28, 28, 28, 28, 28, 28, 151, 36, 28, 28, 28, 28, 28, 28, 28, 38, + 28, 28, 28, 28, 28, 14, 36, 36, 28, 28, 28, 28, 28, 151, 36, 36, + 36, 36, 36, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, + 97, 123, 36, 36, 36, 36, 36, 36, 97, 97, 97, 97, 123, 36, 36, 36, + 97, 97, 97, 97, 97, 97, 14, 97, 97, 97, 99, 100, 97, 97, 100, 97, + 36, 36, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 36, 36, 36, 36, 100, 100, 100, 97, 97, 97, 97, 99, 99, 100, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 103, 97, 122, 36, 14, 14, 14, 100, 97, 97, 97, 97, - 97, 97, 97, 99, 14, 14, 14, 14, 14, 14, 100, 97, 97, 97, 97, 97, - 97, 14, 14, 14, 14, 14, 14, 36, 97, 97, 97, 97, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 103, 97, 97, 122, 36, 103, 97, 97, 97, 97, 97, - 43, 36, 36, 36, 36, 36, 36, 36, + 14, 14, 14, 100, 97, 97, 97, 97, 97, 97, 97, 99, 14, 14, 14, 14, + 14, 14, 100, 97, 97, 97, 97, 97, 97, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 123, 36, 36, 97, 97, 109, 97, 97, 97, 97, 97, + 97, 97, 14, 14, 14, 14, 97, 97, 97, 97, 14, 14, 14, 97, 97, 97, + 97, 123, 109, 97, 97, 97, 97, 97, 14, 14, 14, 85, 153, 91, 14, 14, + 42, 36, 36, 36, 36, 36, 36, 36, }; static RE_UINT8 re_line_break_stage_5[] = { @@ -10182,24 +10796,25 @@ static RE_UINT8 re_line_break_stage_5[] = { 1, 36, 12, 9, 8, 15, 8, 7, 11, 11, 8, 8, 12, 12, 12, 6, 12, 1, 9, 36, 18, 2, 12, 16, 16, 29, 4, 1, 10, 9, 9, 9, 12, 25, 25, 12, 25, 3, 12, 18, 25, 25, 17, 12, 25, 1, 17, 25, - 12, 17, 16, 4, 4, 4, 4, 16, 0, 0, 8, 0, 12, 0, 0, 12, - 0, 8, 18, 0, 0, 9, 0, 16, 18, 16, 16, 12, 6, 16, 37, 37, - 37, 0, 37, 12, 12, 10, 10, 10, 16, 6, 16, 0, 6, 6, 10, 11, - 11, 12, 6, 12, 8, 6, 18, 18, 0, 10, 0, 24, 24, 24, 24, 0, + 12, 17, 16, 4, 4, 4, 4, 16, 0, 0, 8, 12, 12, 0, 0, 12, + 0, 8, 18, 0, 0, 16, 18, 16, 16, 12, 6, 16, 37, 37, 37, 0, + 37, 12, 12, 10, 10, 10, 16, 6, 16, 0, 6, 6, 10, 11, 11, 12, + 6, 12, 8, 6, 18, 18, 0, 10, 0, 24, 24, 24, 24, 0, 0, 9, 24, 12, 17, 17, 4, 17, 17, 18, 4, 6, 4, 12, 1, 2, 18, 17, 12, 4, 4, 0, 31, 31, 32, 32, 33, 33, 18, 12, 2, 0, 5, 24, 18, 9, 0, 18, 18, 4, 18, 28, 26, 25, 3, 3, 1, 3, 14, 14, 14, 18, 20, 20, 3, 25, 5, 5, 8, 1, 2, 5, 30, 12, 2, 25, - 9, 12, 13, 13, 2, 12, 13, 12, 12, 13, 13, 25, 25, 13, 0, 13, - 2, 1, 0, 6, 6, 18, 1, 18, 26, 26, 2, 13, 13, 5, 5, 1, - 2, 2, 13, 16, 5, 13, 0, 38, 13, 38, 38, 13, 38, 0, 16, 5, - 5, 38, 38, 5, 13, 0, 38, 38, 10, 12, 31, 0, 34, 35, 35, 35, - 32, 0, 0, 33, 27, 27, 0, 37, 16, 37, 8, 2, 2, 8, 6, 1, - 2, 14, 13, 1, 13, 9, 10, 13, 0, 30, 13, 6, 13, 2, 12, 38, - 38, 12, 9, 0, 23, 25, 1, 1, 25, 0, 39, 39, + 9, 12, 13, 13, 2, 12, 13, 12, 12, 13, 13, 25, 25, 13, 2, 1, + 0, 6, 6, 18, 1, 18, 26, 26, 1, 0, 0, 13, 2, 13, 13, 5, + 5, 1, 2, 2, 13, 16, 5, 13, 0, 38, 13, 38, 38, 13, 38, 0, + 16, 5, 5, 38, 38, 5, 13, 0, 38, 38, 10, 12, 31, 0, 34, 35, + 35, 35, 32, 0, 0, 33, 27, 27, 0, 37, 16, 37, 8, 2, 2, 8, + 6, 1, 2, 14, 13, 1, 13, 9, 10, 13, 0, 30, 13, 6, 13, 2, + 12, 38, 38, 12, 9, 0, 23, 25, 14, 0, 16, 17, 1, 1, 25, 0, + 39, 39, 3, 5, }; -/* Line_Break: 7668 bytes. */ +/* Line_Break: 8332 bytes. */ RE_UINT32 re_get_line_break(RE_UINT32 ch) { RE_UINT32 code; @@ -10228,8 +10843,8 @@ RE_UINT32 re_get_line_break(RE_UINT32 ch) { static RE_UINT8 re_numeric_type_stage_1[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 12, - 13, 14, 15, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 11, 17, - 18, 11, 19, 20, 11, 11, 21, 11, 11, 11, 11, 11, 11, 11, 11, 22, + 13, 14, 15, 11, 11, 11, 16, 11, 11, 11, 11, 11, 11, 17, 18, 19, + 20, 11, 21, 22, 11, 11, 23, 11, 11, 11, 11, 11, 11, 11, 11, 24, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, @@ -10260,16 +10875,18 @@ static RE_UINT8 re_numeric_type_stage_2[] = { 1, 1, 1, 1, 1, 1, 50, 1, 51, 52, 53, 54, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 1, 1, 1, 1, 1, 15, - 1, 56, 1, 57, 58, 1, 1, 1, 59, 60, 61, 62, 1, 1, 63, 1, - 64, 65, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 66, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 67, 1, 1, 1, 68, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 69, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 70, 71, 1, 1, 1, 1, 1, 1, 1, 72, 73, 74, 1, 1, 1, 1, - 1, 1, 1, 75, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, - 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 56, 57, 58, 59, 1, 1, 1, 60, 61, 62, 63, 1, 1, 64, 1, + 65, 66, 54, 1, 67, 1, 68, 1, 69, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 71, 72, 1, 1, 1, 1, + 1, 1, 1, 73, 1, 1, 1, 74, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 75, 1, 1, 1, 1, 1, 1, 1, + 1, 76, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 77, 78, 1, 1, 1, 1, 1, 1, 1, 79, 80, 81, 1, 1, 1, 1, + 1, 1, 1, 82, 1, 1, 1, 1, 1, 83, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 84, 1, 1, 1, 1, + 1, 1, 85, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 82, 1, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_numeric_type_stage_3[] = { @@ -10277,7 +10894,7 @@ static RE_UINT8 re_numeric_type_stage_3[] = { 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 4, - 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 1, 0, 0, 0, 1, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 0, 0, 0, @@ -10299,20 +10916,23 @@ static RE_UINT8 re_numeric_type_stage_3[] = { 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 58, 59, 60, 0, 0, 0, 56, 0, 3, 0, 0, 0, 0, 0, 61, 0, 62, 0, 0, 0, 0, 1, 0, - 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 63, 0, 55, 64, 26, - 65, 66, 19, 67, 35, 0, 0, 0, 0, 68, 69, 0, 0, 0, 70, 0, - 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, - 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73, 74, 0, 0, 0, 0, - 0, 0, 71, 71, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, - 0, 0, 76, 77, 0, 0, 0, 1, 0, 78, 0, 0, 0, 0, 1, 0, - 19, 19, 19, 79, 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 81, 82, 83, 0, 0, 0, 0, 0, 0, 0, - 58, 0, 0, 43, 0, 0, 0, 84, 0, 58, 0, 0, 0, 0, 0, 0, - 0, 35, 0, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86, - 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, - 0, 0, 0, 0, 60, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 36, 0, 0, 0, 0, + 65, 66, 19, 67, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, + 0, 70, 71, 0, 0, 0, 72, 0, 0, 0, 0, 0, 0, 3, 0, 0, + 0, 0, 73, 74, 0, 75, 0, 0, 76, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 77, 78, 79, 0, 0, 80, 0, 0, 73, 73, 0, 81, 0, 0, + 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 83, 84, 0, 0, 0, 1, + 0, 85, 0, 0, 0, 0, 1, 86, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 87, + 19, 19, 19, 88, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, + 0, 0, 89, 90, 0, 0, 0, 0, 0, 0, 0, 91, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 92, 93, 0, 0, 0, 0, 0, 0, 75, 0, + 94, 0, 0, 0, 0, 0, 0, 0, 58, 0, 0, 43, 0, 0, 0, 95, + 0, 58, 0, 0, 0, 0, 0, 0, 0, 35, 0, 0, 96, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 97, 98, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, + 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, }; static RE_UINT8 re_numeric_type_stage_4[] = { @@ -10333,11 +10953,14 @@ static RE_UINT8 re_numeric_type_stage_4[] = { 0, 0, 45, 0, 0, 45, 39, 0, 42, 0, 0, 0, 45, 43, 0, 0, 0, 0, 0, 18, 17, 19, 0, 0, 0, 0, 11, 0, 0, 39, 39, 18, 0, 0, 50, 0, 36, 19, 19, 19, 19, 19, 13, 0, 19, 19, 19, 18, - 13, 0, 0, 0, 42, 40, 0, 0, 0, 0, 51, 0, 0, 0, 0, 19, - 0, 0, 17, 13, 52, 0, 0, 0, 0, 0, 0, 53, 23, 25, 19, 10, - 0, 0, 54, 55, 56, 1, 0, 0, 0, 0, 5, 1, 9, 0, 0, 0, - 19, 19, 7, 0, 0, 5, 1, 1, 1, 1, 1, 1, 23, 57, 0, 0, - 40, 0, 0, 0, 39, 43, 0, 43, 0, 40, 0, 35, 0, 0, 0, 42, + 0, 51, 0, 0, 37, 19, 19, 13, 13, 0, 0, 0, 42, 40, 0, 0, + 0, 0, 52, 0, 0, 0, 0, 19, 0, 0, 0, 37, 36, 19, 0, 0, + 0, 0, 17, 13, 53, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 55, + 0, 56, 0, 0, 0, 37, 0, 0, 23, 25, 19, 10, 0, 0, 57, 58, + 59, 1, 0, 0, 0, 0, 5, 1, 37, 19, 16, 0, 1, 12, 9, 0, + 19, 10, 0, 0, 0, 0, 1, 60, 7, 0, 0, 0, 19, 19, 7, 0, + 0, 5, 1, 1, 1, 1, 1, 1, 23, 61, 0, 0, 40, 0, 0, 0, + 39, 43, 0, 43, 0, 40, 0, 35, 0, 0, 0, 42, }; static RE_UINT8 re_numeric_type_stage_5[] = { @@ -10366,13 +10989,15 @@ static RE_UINT8 re_numeric_type_stage_5[] = { 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, - 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, - 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, - 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 3, 3, 2, 2, 2, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, + 3, 3, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 0, 0, 0, }; -/* Numeric_Type: 2088 bytes. */ +/* Numeric_Type: 2252 bytes. */ RE_UINT32 re_get_numeric_type(RE_UINT32 ch) { RE_UINT32 code; @@ -10401,8 +11026,8 @@ RE_UINT32 re_get_numeric_type(RE_UINT32 ch) { static RE_UINT8 re_numeric_value_stage_1[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 12, - 13, 14, 15, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 11, 17, - 18, 11, 19, 20, 11, 11, 21, 11, 11, 11, 11, 11, 11, 11, 11, 22, + 13, 14, 15, 11, 11, 11, 16, 11, 11, 11, 11, 11, 11, 17, 18, 19, + 20, 11, 21, 22, 11, 11, 23, 11, 11, 11, 11, 11, 11, 11, 11, 24, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, @@ -10433,16 +11058,18 @@ static RE_UINT8 re_numeric_value_stage_2[] = { 1, 1, 1, 1, 1, 1, 50, 1, 51, 52, 53, 54, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 1, 1, 1, 1, 1, 15, - 1, 56, 1, 57, 58, 1, 1, 1, 59, 60, 61, 62, 1, 1, 63, 1, - 64, 65, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 66, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 67, 1, 1, 1, 68, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 69, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 70, 71, 1, 1, 1, 1, 1, 1, 1, 72, 73, 74, 1, 1, 1, 1, - 1, 1, 1, 75, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, - 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 79, 1, 1, 1, 1, 1, 1, 1, + 1, 56, 57, 58, 59, 1, 1, 1, 60, 61, 62, 63, 1, 1, 64, 1, + 65, 66, 54, 1, 67, 1, 68, 1, 69, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 71, 72, 1, 1, 1, 1, + 1, 1, 1, 73, 1, 1, 1, 74, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 75, 1, 1, 1, 1, 1, 1, 1, + 1, 76, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 77, 78, 1, 1, 1, 1, 1, 1, 1, 79, 80, 81, 1, 1, 1, 1, + 1, 1, 1, 82, 1, 1, 1, 1, 1, 83, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 84, 1, 1, 1, 1, + 1, 1, 85, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 86, 1, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_numeric_value_stage_3[] = { @@ -10450,7 +11077,7 @@ static RE_UINT8 re_numeric_value_stage_3[] = { 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 4, - 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 1, 0, 0, 0, 1, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 0, 0, 0, @@ -10472,20 +11099,24 @@ static RE_UINT8 re_numeric_value_stage_3[] = { 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 63, 64, 65, 0, 0, 0, 66, 0, 3, 0, 0, 0, 0, 0, 67, 0, 68, 0, 0, 0, 0, 1, 0, - 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 69, 0, 70, 71, 72, - 73, 74, 75, 76, 77, 0, 0, 0, 0, 78, 79, 0, 0, 0, 80, 0, - 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 81, 0, 0, 0, 0, 0, - 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 84, 0, 0, 0, 0, - 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, - 0, 0, 87, 88, 0, 0, 0, 1, 0, 89, 0, 0, 0, 0, 1, 0, - 90, 91, 92, 93, 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 95, 96, 97, 0, 0, 0, 0, 0, 0, 0, - 98, 0, 0, 99, 0, 0, 0, 100, 0, 101, 0, 0, 0, 0, 0, 0, - 0, 102, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, - 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, - 0, 0, 0, 0, 106, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 108, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 0, 0, + 73, 74, 75, 76, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78, + 0, 79, 80, 0, 0, 0, 81, 0, 0, 0, 0, 0, 0, 3, 0, 0, + 0, 0, 82, 83, 0, 84, 0, 0, 85, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 86, 87, 88, 0, 0, 89, 0, 0, 90, 90, 0, 91, 0, 0, + 0, 0, 0, 92, 0, 0, 0, 0, 0, 0, 93, 94, 0, 0, 0, 1, + 0, 95, 0, 0, 0, 0, 1, 96, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 97, + 98, 99, 100, 101, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, + 0, 0, 102, 103, 0, 0, 0, 0, 0, 0, 0, 104, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 105, 106, 0, 0, 0, 0, 0, 0, 107, 0, + 108, 0, 0, 0, 0, 0, 0, 0, 109, 0, 0, 110, 0, 0, 0, 111, + 0, 112, 0, 0, 0, 0, 0, 0, 0, 113, 0, 0, 114, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 115, 116, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 117, 0, 0, 0, + 118, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 0, 0, 0, + 0, 0, 0, 0, 120, 0, 0, 0, }; static RE_UINT8 re_numeric_value_stage_4[] = { @@ -10527,23 +11158,29 @@ static RE_UINT8 re_numeric_value_stage_4[] = { 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 110, 0, 111, 8, 9, 57, 58, 112, 113, 114, 115, 116, 117, 118, 0, 0, 0, 119, 120, 121, 122, 123, 124, 125, 126, - 127, 128, 129, 130, 122, 131, 132, 0, 0, 0, 103, 0, 0, 0, 0, 0, - 133, 0, 0, 0, 0, 0, 0, 0, 134, 0, 135, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 136, 137, 0, 0, 0, 0, 0, 0, 0, 0, 138, 139, - 0, 0, 0, 0, 0, 140, 141, 0, 34, 142, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 143, 0, 0, 0, 0, 0, 0, 34, 142, - 34, 35, 144, 145, 146, 147, 148, 149, 0, 0, 0, 0, 48, 49, 50, 150, - 151, 152, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, - 8, 9, 49, 153, 35, 154, 2, 155, 156, 157, 9, 158, 159, 158, 160, 161, - 162, 163, 164, 165, 166, 167, 168, 169, 170, 0, 0, 0, 0, 0, 0, 0, - 34, 35, 144, 145, 171, 0, 0, 0, 0, 0, 0, 7, 8, 9, 1, 2, - 172, 8, 9, 1, 2, 172, 8, 9, 173, 49, 174, 0, 0, 0, 0, 0, - 70, 0, 0, 0, 0, 0, 0, 0, 0, 175, 0, 0, 0, 0, 0, 0, - 98, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 91, 0, 0, 0, 0, 0, 176, 0, 0, 88, 0, 0, 0, 88, - 0, 0, 101, 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, 73, 0, - 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 107, 0, - 0, 0, 0, 177, 0, 0, 0, 0, 0, 0, 0, 0, 178, 0, 0, 0, + 127, 128, 129, 130, 122, 131, 132, 0, 0, 0, 133, 0, 0, 0, 0, 0, + 21, 2, 22, 23, 24, 134, 135, 0, 136, 0, 0, 0, 0, 0, 0, 0, + 137, 0, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139, 140, 0, 0, + 0, 0, 0, 0, 0, 0, 141, 142, 0, 0, 0, 0, 0, 0, 21, 143, + 0, 111, 144, 145, 0, 0, 0, 0, 0, 0, 0, 0, 0, 146, 147, 0, + 34, 148, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 149, + 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 111, 145, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 34, 148, 0, 0, 21, 151, 0, 0, 0, 0, + 34, 35, 152, 153, 154, 155, 156, 157, 0, 0, 0, 0, 48, 49, 50, 158, + 159, 160, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, + 21, 2, 22, 23, 24, 161, 0, 0, 1, 2, 22, 23, 162, 0, 0, 0, + 8, 9, 49, 163, 35, 164, 2, 165, 166, 167, 9, 168, 169, 168, 170, 171, + 172, 173, 174, 175, 144, 176, 177, 178, 179, 180, 181, 182, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 183, 184, 185, 0, 0, 0, 0, 0, 0, 0, + 34, 35, 152, 153, 186, 0, 0, 0, 0, 0, 0, 7, 8, 9, 1, 2, + 187, 8, 9, 1, 2, 187, 8, 9, 0, 111, 8, 9, 0, 0, 0, 0, + 188, 49, 104, 29, 0, 0, 0, 0, 70, 0, 0, 0, 0, 0, 0, 0, + 0, 189, 0, 0, 0, 0, 0, 0, 98, 0, 0, 0, 0, 0, 0, 0, + 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91, 0, 0, 0, 0, 0, + 190, 0, 0, 88, 0, 0, 0, 88, 0, 0, 101, 0, 0, 0, 0, 73, + 0, 0, 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, 0, 80, 0, + 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 191, 0, 0, 0, 0, + 0, 0, 0, 0, 192, 0, 0, 0, }; static RE_UINT8 re_numeric_value_stage_5[] = { @@ -10566,8 +11203,8 @@ static RE_UINT8 re_numeric_value_stage_5[] = { 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0, 31, 0, 0, 0, 0, 0, 25, 0, 0, 31, 0, 0, 35, 0, 0, 23, 0, 0, 35, 0, 0, 0, 103, 0, 27, 0, 0, 0, 39, 0, 0, 25, 0, 0, 0, - 31, 0, 29, 0, 0, 0, 0, 115, 40, 0, 0, 0, 0, 0, 0, 94, - 27, 0, 0, 0, 85, 0, 0, 0, 115, 0, 0, 0, 0, 0, 116, 0, + 31, 0, 29, 0, 0, 0, 0, 116, 40, 0, 0, 0, 0, 0, 0, 94, + 27, 0, 0, 0, 85, 0, 0, 0, 116, 0, 0, 0, 0, 0, 118, 0, 0, 25, 0, 37, 0, 33, 0, 0, 0, 40, 0, 94, 50, 60, 0, 0, 70, 0, 0, 0, 0, 27, 27, 27, 0, 0, 0, 29, 0, 0, 23, 0, 0, 0, 39, 50, 0, 0, 40, 0, 37, 0, 0, 0, 0, 0, 35, 0, @@ -10580,21 +11217,25 @@ static RE_UINT8 re_numeric_value_stage_5[] = { 80, 89, 98, 107, 31, 40, 80, 85, 89, 94, 98, 31, 40, 80, 85, 89, 94, 103, 107, 40, 23, 23, 23, 25, 25, 25, 25, 31, 40, 40, 40, 40, 40, 60, 80, 80, 80, 80, 85, 87, 89, 89, 89, 89, 80, 15, 15, 18, - 19, 0, 0, 0, 23, 31, 40, 80, 0, 84, 0, 0, 0, 0, 93, 0, - 0, 23, 25, 40, 50, 85, 0, 0, 23, 25, 27, 40, 50, 85, 94, 103, - 0, 0, 23, 40, 50, 85, 25, 27, 40, 50, 85, 94, 0, 23, 80, 0, + 19, 0, 0, 0, 0, 0, 2, 11, 86, 87, 88, 89, 90, 91, 92, 93, + 23, 31, 40, 80, 0, 84, 0, 0, 0, 0, 93, 0, 0, 23, 25, 40, + 50, 85, 0, 0, 23, 25, 27, 40, 50, 85, 94, 103, 29, 31, 40, 50, + 25, 27, 29, 29, 31, 40, 50, 85, 0, 0, 23, 40, 50, 85, 25, 27, + 40, 50, 85, 94, 0, 23, 80, 0, 0, 23, 40, 50, 29, 40, 50, 85, 39, 40, 50, 60, 70, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 15, 11, 12, 18, 0, 50, 60, 70, 80, 81, 82, 83, 84, - 85, 94, 2, 23, 35, 37, 39, 29, 39, 23, 25, 27, 37, 39, 23, 25, - 27, 29, 31, 25, 27, 27, 29, 31, 23, 25, 27, 27, 29, 31, 113, 114, - 29, 31, 27, 27, 29, 29, 29, 29, 33, 35, 35, 35, 37, 37, 39, 39, - 39, 39, 25, 27, 29, 31, 33, 23, 25, 27, 29, 29, 31, 31, 25, 27, - 23, 25, 12, 18, 21, 12, 18, 6, 11, 8, 11, 0, 83, 84, 0, 0, - 37, 39, 2, 23, 2, 2, 23, 25, 35, 37, 39, 0, 29, 0, 0, 0, - 0, 0, 0, 60, 0, 29, 0, 0, 39, 0, 0, 0, + 85, 94, 2, 23, 94, 0, 0, 0, 82, 83, 84, 0, 35, 37, 39, 29, + 39, 23, 25, 27, 37, 39, 23, 25, 27, 29, 31, 25, 27, 27, 29, 31, + 23, 25, 27, 27, 29, 31, 113, 114, 29, 31, 27, 27, 29, 29, 29, 29, + 33, 35, 35, 35, 37, 37, 39, 39, 39, 39, 25, 27, 29, 31, 33, 23, + 31, 31, 25, 27, 23, 25, 12, 18, 21, 12, 18, 6, 11, 8, 11, 11, + 15, 12, 18, 70, 80, 29, 31, 33, 35, 37, 39, 0, 37, 39, 0, 40, + 85, 103, 115, 116, 117, 118, 0, 0, 83, 84, 0, 0, 37, 39, 2, 23, + 2, 2, 23, 25, 29, 0, 0, 0, 0, 0, 0, 60, 0, 29, 0, 0, + 39, 0, 0, 0, }; -/* Numeric_Value: 2876 bytes. */ +/* Numeric_Value: 3108 bytes. */ RE_UINT32 re_get_numeric_value(RE_UINT32 ch) { RE_UINT32 code; @@ -10717,32 +11358,36 @@ static RE_UINT8 re_indic_matra_category_stage_2[] = { 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 16, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, - 19, 20, 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; static RE_UINT8 re_indic_matra_category_stage_3[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 5, 6, 7, 4, 0, - 0, 0, 0, 5, 8, 0, 0, 0, 0, 0, 0, 5, 9, 0, 4, 0, - 0, 0, 0, 10, 11, 12, 4, 0, 0, 0, 0, 13, 14, 7, 0, 0, - 0, 0, 0, 15, 16, 17, 4, 0, 0, 0, 0, 10, 18, 19, 4, 0, - 0, 0, 0, 13, 20, 7, 4, 0, 0, 0, 0, 0, 21, 22, 0, 23, - 0, 0, 0, 24, 25, 0, 0, 0, 0, 0, 0, 26, 27, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 30, 31, 0, 32, 33, 34, 35, 36, 0, 0, 0, 0, 0, 0, - 0, 37, 0, 37, 0, 38, 0, 38, 0, 0, 0, 39, 40, 41, 0, 0, - 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 43, 44, 0, 0, 0, - 0, 45, 0, 0, 0, 0, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 48, 49, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 23, - 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 52, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 54, 55, 0, 0, 0, - 0, 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 58, 59, 0, 0, 0, - 0, 0, 60, 61, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 63, 64, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, - 66, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 68, 69, 0, 0, 0, 0, 0, 0, 70, 0, 0, 0, 0, - 0, 0, 71, 72, 0, 0, 0, 0, 0, 0, 0, 73, 44, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74, 69, 0, 0, 0, 0, + 0, 0, 0, 5, 8, 0, 0, 9, 0, 0, 0, 5, 10, 0, 4, 0, + 0, 0, 0, 11, 12, 13, 4, 0, 0, 0, 0, 14, 15, 7, 0, 0, + 0, 0, 0, 16, 17, 18, 4, 0, 0, 0, 0, 11, 19, 20, 4, 0, + 0, 0, 0, 14, 21, 7, 4, 0, 0, 0, 0, 0, 22, 23, 0, 24, + 0, 0, 0, 25, 26, 0, 0, 0, 0, 0, 0, 27, 28, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 29, 30, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 31, 32, 0, 33, 34, 35, 36, 37, 0, 0, 0, 0, 0, 0, + 0, 38, 0, 38, 0, 39, 0, 39, 0, 0, 0, 40, 41, 42, 0, 0, + 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 0, 44, 45, 0, 0, 0, + 0, 46, 0, 0, 0, 47, 48, 49, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 50, 51, 0, 0, 0, 0, 0, 52, 0, 0, 0, 53, 24, + 0, 0, 54, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 56, 0, 57, 0, 0, 0, 0, 0, 0, 0, 0, 58, 59, 0, 0, 0, + 0, 0, 0, 0, 60, 61, 0, 0, 0, 0, 0, 62, 63, 0, 0, 0, + 0, 0, 64, 65, 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 67, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 0, + 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 70, 71, 0, 0, 0, 0, 0, 0, 72, 0, 0, 0, 0, + 0, 0, 73, 74, 0, 0, 0, 0, 0, 0, 0, 75, 45, 0, 0, 0, + 0, 0, 76, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78, 0, + 0, 0, 0, 14, 79, 7, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 81, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 83, 0, 0, 0, 0, + 0, 0, 0, 84, 0, 0, 0, 0, 0, 0, 85, 71, 0, 0, 0, 0, }; static RE_UINT8 re_indic_matra_category_stage_4[] = { @@ -10750,54 +11395,59 @@ static RE_UINT8 re_indic_matra_category_stage_4[] = { 3, 4, 5, 6, 1, 7, 3, 8, 0, 0, 9, 4, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 4, 10, 11, 12, 13, 14, 0, 0, 0, 0, 15, 0, 0, 0, 0, - 3, 10, 0, 9, 16, 9, 17, 0, 3, 4, 5, 9, 18, 15, 3, 0, - 0, 0, 0, 0, 0, 0, 0, 19, 3, 4, 10, 11, 20, 13, 21, 0, - 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, - 17, 10, 0, 22, 12, 23, 24, 0, 0, 0, 0, 0, 0, 0, 0, 6, - 1, 7, 25, 6, 26, 6, 6, 0, 0, 0, 9, 10, 0, 0, 0, 0, - 27, 7, 25, 18, 28, 29, 6, 0, 0, 0, 15, 25, 0, 0, 0, 0, - 7, 3, 10, 22, 12, 23, 24, 0, 0, 0, 0, 0, 0, 16, 0, 15, - 7, 6, 10, 10, 2, 30, 23, 31, 0, 7, 0, 0, 0, 0, 0, 0, - 19, 7, 6, 6, 4, 10, 0, 0, 32, 32, 33, 9, 0, 0, 0, 16, - 19, 7, 6, 6, 4, 9, 0, 0, 32, 32, 34, 0, 0, 0, 0, 0, - 35, 36, 4, 37, 37, 6, 6, 0, 36, 0, 10, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 15, 19, 17, 38, 6, 6, 0, 39, 16, 0, 0, - 0, 0, 0, 7, 4, 0, 0, 0, 0, 25, 0, 15, 25, 0, 0, 0, - 9, 6, 16, 0, 0, 0, 0, 0, 0, 15, 40, 16, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 19, 0, 0, 17, 10, 0, 0, 0, 0, 0, - 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 6, 17, 4, 41, - 42, 22, 23, 0, 25, 0, 0, 0, 9, 43, 0, 0, 0, 0, 0, 0, - 6, 44, 45, 46, 16, 0, 0, 0, 7, 7, 2, 22, 7, 8, 7, 7, - 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 38, 2, 0, 0, - 47, 1, 19, 6, 17, 5, 44, 22, 22, 40, 16, 0, 0, 0, 0, 0, - 0, 0, 15, 6, 4, 48, 49, 22, 23, 18, 25, 0, 0, 0, 0, 0, - 0, 0, 17, 8, 6, 25, 0, 0, 0, 0, 0, 2, 50, 7, 10, 0, + 3, 10, 0, 9, 16, 9, 17, 0, 9, 0, 0, 0, 0, 0, 0, 0, + 3, 4, 5, 9, 18, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0, 19, + 3, 4, 10, 11, 20, 13, 21, 0, 0, 0, 0, 18, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 7, 17, 10, 0, 22, 12, 23, 24, 0, + 0, 0, 0, 0, 0, 0, 0, 6, 1, 7, 25, 6, 26, 6, 6, 0, + 0, 0, 9, 10, 0, 0, 0, 0, 27, 7, 25, 18, 28, 29, 6, 0, + 0, 0, 15, 25, 0, 0, 0, 0, 7, 3, 10, 22, 12, 23, 24, 0, + 0, 0, 0, 0, 0, 16, 0, 15, 7, 6, 10, 10, 2, 30, 31, 32, + 0, 7, 0, 0, 0, 0, 0, 0, 19, 7, 6, 6, 4, 10, 0, 0, + 33, 33, 34, 9, 0, 0, 0, 16, 19, 7, 6, 6, 4, 9, 0, 0, + 33, 33, 35, 0, 0, 0, 0, 0, 36, 37, 4, 38, 38, 6, 6, 0, + 37, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 19, 17, + 39, 6, 6, 0, 0, 16, 0, 0, 0, 0, 0, 7, 4, 0, 0, 0, + 0, 25, 0, 15, 25, 0, 0, 0, 9, 6, 16, 0, 0, 0, 0, 0, + 0, 15, 40, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, + 0, 17, 10, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 19, 6, 17, 4, 41, 42, 22, 23, 0, 19, 6, 6, 6, + 6, 9, 0, 0, 0, 0, 0, 0, 6, 43, 44, 45, 16, 0, 0, 0, + 7, 7, 2, 22, 7, 8, 7, 7, 25, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 39, 19, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, + 15, 1, 19, 6, 17, 5, 43, 22, 22, 40, 16, 0, 0, 0, 0, 0, + 0, 0, 15, 6, 4, 46, 47, 22, 23, 18, 25, 0, 0, 0, 0, 0, + 0, 0, 17, 8, 6, 25, 0, 0, 0, 0, 0, 15, 6, 7, 19, 19, + 0, 0, 0, 2, 48, 7, 10, 0, 0, 0, 22, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 15, 3, 1, 0, 0, 0, 0, 0, 0, 15, 7, 7, 7, 7, 7, 7, 7, 10, 0, 0, 0, 0, 0, - 0, 0, 0, 35, 4, 17, 4, 10, 0, 15, 0, 0, 0, 0, 0, 0, - 0, 0, 7, 6, 4, 22, 16, 0, 51, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 9, 6, 17, 52, 40, 10, 0, 0, 0, 0, 0, 0, - 1, 6, 53, 54, 55, 56, 33, 16, 0, 0, 0, 0, 0, 11, 5, 8, - 0, 0, 0, 43, 0, 0, 0, 0, 0, 15, 19, 7, 44, 25, 35, 0, - 57, 4, 9, 58, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 39, + 0, 0, 0, 36, 4, 17, 4, 10, 0, 15, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 6, 4, 22, 16, 0, 49, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 9, 6, 17, 50, 40, 10, 12, 0, 0, 0, 0, 0, + 1, 6, 51, 52, 53, 54, 34, 16, 0, 0, 0, 0, 0, 11, 5, 8, + 0, 15, 19, 7, 43, 25, 36, 0, 55, 4, 9, 56, 0, 0, 10, 0, 0, 0, 0, 0, 6, 6, 4, 4, 4, 6, 6, 16, 0, 0, 0, 0, - 2, 3, 5, 1, 3, 0, 0, 0, 0, 0, 0, 9, 6, 4, 40, 37, - 17, 59, 16, 0, 0, 0, 0, 0, 0, 15, 8, 4, 4, 4, 6, 18, - 0, 0, 0, 0, 0, 0, 9, 8, + 2, 3, 5, 1, 3, 0, 0, 0, 0, 0, 0, 9, 6, 4, 40, 38, + 17, 10, 16, 0, 0, 0, 0, 0, 0, 15, 8, 4, 4, 4, 6, 18, + 0, 0, 0, 0, 0, 0, 7, 3, 6, 29, 15, 9, 0, 0, 0, 0, + 2, 3, 5, 6, 16, 10, 0, 0, 1, 7, 25, 11, 12, 13, 32, 0, + 2, 3, 4, 4, 39, 57, 32, 58, 0, 10, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 15, 8, 4, 4, 0, 48, 31, 0, 36, + 7, 3, 4, 4, 5, 1, 25, 36, 0, 0, 0, 0, 0, 0, 9, 8, }; static RE_UINT8 re_indic_matra_category_stage_5[] = { 0, 0, 5, 1, 1, 2, 1, 6, 6, 6, 6, 5, 5, 5, 1, 1, 2, 1, 0, 5, 6, 0, 0, 2, 2, 0, 0, 4, 4, 6, 0, 1, 5, 0, 5, 6, 5, 8, 1, 5, 9, 0, 10, 6, 2, 2, 4, 4, - 4, 5, 1, 0, 7, 0, 8, 1, 8, 0, 8, 8, 9, 2, 4, 1, - 3, 3, 3, 1, 3, 0, 0, 6, 5, 7, 7, 7, 6, 2, 0, 14, - 2, 5, 9, 10, 4, 2, 14, 0, 6, 1, 1, 8, 8, 5, 14, 1, - 6, 11, 7, 12, 2, 9, 11, 0, 5, 2, 6, 3, 3, 5, 5, 3, - 1, 3, 0, 13, 13, 0, 6, 14, + 4, 5, 1, 0, 7, 0, 8, 1, 8, 0, 8, 8, 9, 2, 4, 10, + 4, 1, 3, 3, 3, 1, 3, 0, 0, 6, 5, 7, 7, 7, 6, 2, + 2, 5, 9, 10, 4, 2, 6, 1, 1, 8, 8, 5, 6, 11, 7, 12, + 2, 9, 11, 0, 5, 2, 6, 3, 3, 5, 5, 3, 1, 3, 0, 13, + 13, 0, 5, 9, 4, 0, }; -/* Indic_Matra_Category: 1336 bytes. */ +/* Indic_Matra_Category: 1486 bytes. */ RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch) { RE_UINT32 code; @@ -10839,131 +11489,153 @@ static RE_UINT8 re_indic_syllabic_category_stage_1[] = { static RE_UINT8 re_indic_syllabic_category_stage_2[] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 1, 1, 1, 1, 1, 10, 1, 11, 12, 13, 14, 1, 1, 1, - 1, 1, 1, 1, 1, 15, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 15, 1, 1, 1, 1, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 16, 17, 18, 19, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 17, 18, 19, 20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20, 1, 1, 1, 1, 1, - 21, 22, 1, 1, 1, 1, 23, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 21, 1, 1, 1, 1, 1, + 22, 23, 24, 25, 26, 27, 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; static RE_UINT8 re_indic_syllabic_category_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 8, 16, - 17, 11, 12, 18, 19, 20, 0, 21, 22, 23, 12, 24, 25, 0, 8, 0, - 10, 11, 12, 24, 26, 27, 8, 28, 29, 30, 31, 32, 33, 34, 0, 0, - 35, 36, 12, 37, 38, 39, 8, 0, 40, 36, 12, 41, 38, 42, 8, 0, - 40, 36, 4, 43, 44, 34, 8, 45, 46, 47, 4, 48, 49, 50, 0, 51, - 52, 4, 53, 54, 55, 0, 0, 0, 56, 57, 58, 59, 60, 61, 0, 0, - 0, 0, 0, 0, 62, 4, 63, 64, 65, 66, 67, 68, 0, 0, 0, 0, - 4, 4, 69, 70, 0, 71, 72, 73, 74, 75, 0, 0, 0, 0, 0, 0, - 76, 77, 78, 77, 78, 79, 76, 80, 4, 4, 81, 82, 83, 84, 0, 0, - 85, 63, 86, 87, 0, 4, 88, 89, 4, 4, 90, 91, 92, 0, 0, 0, - 4, 93, 4, 4, 94, 95, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, - 98, 78, 4, 99, 100, 0, 0, 0, 101, 4, 102, 103, 4, 4, 104, 105, - 4, 4, 106, 107, 108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, - 111, 4, 112, 0, 4, 113, 114, 115, 116, 117, 4, 118, 119, 0, 0, 0, - 120, 4, 121, 4, 122, 123, 0, 0, 124, 4, 4, 125, 126, 0, 0, 0, - 127, 4, 128, 129, 130, 0, 4, 131, 4, 4, 4, 132, 133, 0, 134, 135, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 137, 138, 0, - 139, 140, 4, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 142, 78, 4, 143, 144, 0, 0, 0, 145, 4, 4, 146, 0, 0, 0, 0, - 147, 4, 148, 149, 0, 0, 0, 0, 150, 151, 4, 152, 153, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 154, 4, 155, 156, 0, 0, 0, 0, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 11, 19, + 20, 14, 15, 21, 22, 23, 24, 25, 26, 27, 15, 28, 29, 0, 11, 0, + 13, 14, 15, 28, 30, 31, 11, 32, 33, 34, 35, 36, 37, 38, 24, 0, + 39, 40, 15, 41, 42, 43, 11, 0, 44, 40, 15, 45, 42, 46, 11, 0, + 44, 40, 7, 47, 48, 38, 11, 49, 50, 51, 7, 52, 53, 54, 24, 55, + 56, 7, 57, 58, 59, 2, 0, 0, 60, 61, 62, 63, 64, 65, 0, 0, + 0, 0, 66, 67, 68, 7, 69, 70, 71, 72, 73, 74, 0, 0, 0, 0, + 7, 7, 75, 76, 77, 78, 79, 80, 81, 82, 0, 0, 0, 0, 0, 0, + 83, 84, 85, 84, 85, 86, 83, 87, 7, 7, 88, 89, 90, 91, 2, 0, + 92, 57, 93, 94, 24, 7, 95, 96, 7, 7, 97, 98, 99, 2, 0, 0, + 7, 100, 7, 7, 101, 102, 103, 104, 2, 2, 0, 0, 0, 0, 0, 0, + 105, 85, 7, 106, 107, 2, 0, 0, 108, 7, 109, 110, 7, 7, 111, 112, + 7, 7, 113, 114, 115, 0, 0, 0, 0, 0, 0, 0, 0, 116, 117, 118, + 119, 120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 0, 0, + 122, 7, 123, 0, 7, 124, 125, 126, 127, 128, 7, 129, 130, 2, 131, 132, + 133, 7, 134, 7, 135, 136, 0, 0, 137, 7, 7, 138, 139, 2, 140, 141, + 142, 7, 143, 144, 145, 2, 7, 146, 7, 7, 7, 147, 148, 0, 149, 150, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 151, 152, 153, 2, + 154, 155, 7, 156, 157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 158, 85, 7, 159, 160, 161, 162, 163, 164, 7, 7, 165, 0, 0, 0, 0, + 166, 7, 167, 168, 0, 169, 7, 170, 171, 172, 7, 173, 174, 2, 175, 176, + 177, 178, 179, 180, 0, 0, 0, 0, 0, 0, 0, 181, 7, 182, 183, 2, + 13, 14, 15, 28, 30, 38, 184, 185, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 186, 7, 7, 187, 188, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 189, 7, 190, 191, 192, 0, 0, 0, + 189, 7, 7, 193, 0, 2, 0, 0, 181, 7, 194, 195, 2, 0, 0, 0, }; static RE_UINT8 re_indic_syllabic_category_stage_4[] = { - 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 3, 3, 3, 4, 5, 5, - 5, 5, 5, 5, 5, 5, 6, 7, 8, 8, 8, 9, 0, 10, 5, 5, - 11, 0, 0, 0, 12, 3, 13, 5, 14, 15, 3, 16, 16, 4, 5, 5, - 5, 5, 17, 5, 18, 19, 20, 7, 8, 21, 21, 22, 0, 23, 0, 24, - 20, 0, 0, 0, 14, 15, 25, 26, 17, 27, 20, 28, 29, 23, 21, 30, - 0, 0, 13, 18, 31, 32, 0, 0, 14, 15, 3, 33, 33, 4, 5, 5, - 17, 13, 20, 7, 8, 34, 34, 30, 8, 21, 21, 30, 0, 35, 0, 24, - 36, 0, 0, 0, 37, 15, 25, 12, 38, 39, 27, 17, 40, 41, 42, 19, - 5, 5, 20, 35, 29, 35, 43, 30, 0, 23, 0, 0, 14, 15, 3, 38, - 38, 4, 5, 5, 5, 13, 20, 44, 8, 43, 43, 30, 0, 45, 20, 0, - 46, 15, 3, 38, 5, 13, 20, 7, 0, 45, 0, 47, 5, 5, 42, 44, - 8, 43, 43, 48, 0, 0, 49, 50, 46, 15, 3, 3, 3, 25, 19, 5, - 24, 5, 5, 36, 5, 42, 51, 23, 8, 52, 8, 8, 35, 0, 0, 0, - 13, 5, 5, 5, 5, 5, 5, 42, 8, 8, 53, 0, 8, 34, 54, 55, - 27, 56, 18, 36, 0, 5, 13, 5, 13, 57, 19, 27, 8, 8, 34, 58, - 8, 59, 54, 60, 0, 0, 0, 20, 5, 5, 13, 5, 5, 5, 5, 41, - 10, 8, 8, 61, 62, 63, 64, 65, 66, 66, 67, 66, 66, 66, 66, 66, - 66, 66, 66, 68, 69, 3, 70, 8, 8, 71, 72, 73, 74, 11, 75, 76, - 77, 78, 79, 80, 81, 82, 5, 5, 83, 84, 54, 85, 0, 0, 86, 87, - 88, 5, 5, 17, 6, 89, 0, 0, 88, 5, 5, 5, 6, 0, 0, 0, - 90, 0, 0, 0, 91, 3, 3, 3, 3, 35, 8, 8, 8, 61, 92, 93, - 94, 0, 0, 95, 96, 5, 5, 5, 8, 8, 97, 0, 98, 99, 100, 0, - 101, 102, 102, 103, 104, 105, 0, 0, 5, 5, 5, 0, 8, 8, 8, 8, - 106, 99, 107, 0, 5, 108, 8, 0, 5, 5, 5, 69, 88, 109, 99, 110, - 111, 8, 8, 8, 8, 79, 107, 0, 112, 113, 3, 3, 5, 114, 8, 8, - 8, 115, 5, 0, 116, 3, 117, 5, 118, 8, 119, 120, 0, 0, 121, 122, - 5, 123, 8, 8, 124, 0, 0, 0, 5, 125, 8, 106, 99, 126, 0, 0, - 0, 0, 0, 13, 127, 0, 0, 0, 0, 0, 0, 1, 33, 128, 129, 5, - 108, 8, 0, 0, 5, 5, 5, 130, 131, 132, 133, 5, 134, 0, 0, 0, - 135, 3, 3, 3, 117, 5, 5, 5, 5, 136, 8, 8, 8, 89, 0, 0, - 0, 0, 19, 5, 130, 102, 137, 107, 5, 108, 8, 138, 139, 0, 0, 0, - 140, 3, 4, 88, 141, 8, 8, 142, 89, 0, 0, 0, 3, 117, 5, 5, - 5, 5, 81, 8, 143, 144, 0, 0, 99, 99, 99, 145, 13, 0, 146, 0, - 8, 8, 8, 84, 147, 0, 0, 0, 117, 5, 108, 8, 0, 148, 0, 0, - 5, 5, 5, 74, 149, 5, 150, 99, 151, 8, 29, 152, 81, 45, 0, 153, - 5, 13, 13, 5, 5, 0, 0, 154, 155, 15, 3, 3, 5, 5, 8, 8, - 8, 53, 0, 0, 156, 3, 3, 4, 8, 8, 157, 0, 156, 88, 5, 5, - 5, 108, 8, 8, 158, 89, 0, 0, 156, 3, 3, 3, 4, 5, 5, 5, - 108, 8, 8, 8, 63, 0, 0, 0, 3, 3, 117, 5, 5, 5, 129, 159, - 8, 160, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 4, 0, 0, 0, + 0, 5, 0, 0, 6, 7, 7, 7, 7, 8, 9, 9, 9, 9, 9, 9, + 9, 9, 10, 11, 12, 12, 12, 13, 14, 15, 9, 9, 16, 17, 2, 2, + 18, 7, 9, 9, 19, 20, 7, 21, 21, 8, 9, 9, 9, 9, 22, 9, + 23, 24, 25, 11, 12, 26, 26, 27, 0, 28, 0, 29, 25, 0, 0, 0, + 19, 20, 30, 31, 22, 32, 25, 33, 34, 28, 26, 35, 0, 0, 36, 23, + 0, 17, 2, 2, 37, 38, 0, 0, 19, 20, 7, 39, 39, 8, 9, 9, + 22, 36, 25, 11, 12, 40, 40, 35, 12, 26, 26, 35, 0, 41, 0, 29, + 42, 0, 0, 0, 43, 20, 30, 18, 44, 45, 32, 22, 46, 47, 48, 24, + 9, 9, 25, 41, 34, 41, 49, 35, 0, 28, 0, 0, 6, 20, 7, 44, + 44, 8, 9, 9, 9, 9, 25, 50, 12, 49, 49, 35, 0, 51, 25, 0, + 19, 20, 7, 44, 9, 36, 25, 11, 0, 51, 0, 52, 9, 9, 48, 50, + 12, 49, 49, 53, 0, 0, 54, 55, 56, 20, 7, 7, 7, 30, 24, 9, + 29, 9, 9, 42, 9, 48, 57, 28, 12, 58, 12, 12, 41, 0, 0, 0, + 36, 9, 9, 9, 9, 9, 9, 48, 12, 12, 59, 0, 12, 40, 60, 61, + 32, 62, 23, 42, 0, 9, 36, 9, 36, 63, 24, 32, 12, 12, 40, 64, + 12, 65, 60, 66, 2, 2, 3, 9, 2, 2, 2, 2, 2, 0, 0, 0, + 9, 9, 36, 9, 9, 9, 9, 47, 15, 12, 12, 67, 68, 69, 70, 71, + 72, 72, 73, 72, 72, 72, 72, 72, 72, 72, 72, 74, 75, 7, 76, 12, + 12, 77, 78, 79, 2, 2, 3, 80, 81, 16, 82, 83, 84, 85, 86, 87, + 88, 89, 9, 9, 90, 91, 60, 92, 2, 2, 93, 94, 95, 9, 9, 22, + 10, 96, 0, 0, 95, 9, 9, 9, 10, 0, 0, 0, 97, 0, 0, 0, + 98, 7, 7, 7, 7, 41, 12, 12, 12, 67, 99, 100, 101, 0, 0, 102, + 103, 9, 9, 9, 12, 12, 104, 0, 105, 106, 107, 0, 108, 109, 109, 110, + 111, 112, 0, 0, 9, 9, 9, 0, 12, 12, 12, 12, 113, 106, 114, 0, + 9, 115, 12, 0, 9, 9, 9, 75, 95, 116, 106, 117, 118, 12, 12, 12, + 12, 86, 114, 0, 119, 120, 7, 7, 9, 121, 12, 12, 12, 122, 9, 0, + 123, 7, 124, 9, 125, 12, 126, 127, 2, 2, 128, 129, 9, 130, 12, 12, + 131, 0, 0, 0, 9, 132, 12, 113, 106, 133, 0, 0, 2, 2, 3, 36, + 134, 60, 60, 60, 114, 0, 0, 0, 135, 136, 0, 0, 0, 0, 0, 137, + 138, 4, 0, 0, 0, 0, 0, 4, 39, 139, 140, 9, 115, 12, 0, 0, + 9, 9, 9, 141, 142, 143, 144, 9, 145, 0, 0, 0, 146, 7, 7, 7, + 124, 9, 9, 9, 9, 147, 12, 12, 12, 148, 0, 0, 149, 149, 149, 149, + 150, 0, 0, 0, 2, 2, 151, 9, 141, 109, 152, 114, 9, 115, 12, 153, + 154, 0, 0, 0, 155, 7, 8, 95, 156, 12, 12, 157, 148, 0, 0, 0, + 9, 62, 9, 9, 2, 2, 151, 48, 7, 124, 9, 9, 9, 9, 88, 12, + 158, 159, 0, 0, 106, 106, 106, 160, 36, 0, 161, 87, 12, 12, 12, 91, + 162, 0, 0, 0, 124, 9, 115, 12, 0, 163, 0, 0, 9, 9, 9, 81, + 164, 9, 165, 106, 166, 12, 34, 167, 88, 51, 0, 168, 9, 36, 36, 9, + 9, 0, 0, 169, 2, 2, 0, 0, 170, 20, 7, 7, 9, 9, 12, 12, + 12, 171, 0, 0, 172, 173, 173, 173, 173, 174, 2, 2, 0, 0, 0, 175, + 176, 7, 7, 8, 12, 12, 177, 0, 176, 95, 9, 9, 9, 115, 12, 12, + 178, 179, 2, 2, 109, 180, 9, 9, 156, 0, 0, 0, 176, 7, 7, 7, + 8, 9, 9, 9, 115, 12, 12, 12, 181, 0, 0, 0, 182, 2, 2, 2, + 2, 183, 0, 0, 7, 7, 9, 9, 29, 9, 9, 9, 9, 9, 9, 12, + 12, 184, 0, 0, 7, 7, 124, 9, 9, 9, 9, 140, 12, 12, 185, 0, + 16, 186, 149, 187, 149, 187, 0, 0, 20, 7, 7, 95, 12, 12, 12, 188, + 189, 102, 0, 0, 7, 7, 7, 124, 9, 9, 9, 115, 12, 94, 12, 190, + 191, 0, 0, 0, 12, 12, 12, 192, 9, 9, 140, 193, 12, 194, 0, 0, }; static RE_UINT8 re_indic_syllabic_category_stage_5[] = { - 0, 0, 0, 0, 9, 0, 0, 0, 1, 1, 1, 2, 6, 6, 6, 6, - 6, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 4, 3, 7, 7, - 7, 7, 7, 7, 7, 5, 7, 7, 0, 7, 7, 7, 6, 6, 7, 7, - 0, 0, 6, 6, 0, 10, 10, 10, 0, 1, 1, 2, 0, 6, 6, 6, - 6, 0, 0, 6, 10, 0, 10, 10, 10, 0, 10, 0, 0, 0, 10, 10, - 10, 10, 0, 0, 7, 0, 0, 7, 7, 5, 11, 0, 0, 0, 0, 7, - 10, 10, 0, 10, 6, 6, 6, 0, 0, 0, 0, 6, 0, 10, 10, 0, - 4, 0, 7, 7, 7, 7, 7, 0, 7, 5, 0, 0, 1, 0, 9, 9, - 0, 14, 0, 0, 6, 6, 0, 6, 7, 7, 0, 7, 0, 0, 7, 7, - 0, 10, 0, 0, 0, 0, 1, 17, 6, 0, 6, 6, 6, 10, 0, 0, - 0, 0, 0, 10, 10, 0, 0, 0, 10, 10, 10, 0, 7, 0, 7, 7, - 0, 3, 7, 7, 0, 7, 7, 0, 0, 0, 1, 2, 0, 0, 10, 0, - 7, 5, 12, 0, 0, 0, 11, 11, 11, 11, 11, 11, 0, 0, 5, 0, - 7, 0, 7, 0, 7, 7, 5, 0, 19, 19, 19, 19, 0, 1, 5, 0, - 10, 0, 0, 10, 0, 10, 0, 10, 14, 14, 0, 0, 7, 0, 0, 0, - 0, 1, 0, 0, 7, 7, 1, 2, 7, 7, 1, 1, 5, 3, 0, 0, - 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 0, 13, 13, 13, - 13, 0, 0, 0, 10, 6, 6, 6, 6, 6, 6, 7, 7, 7, 1, 19, - 2, 5, 5, 14, 14, 14, 14, 10, 10, 10, 6, 6, 7, 7, 10, 10, - 10, 10, 14, 14, 14, 10, 7, 19, 19, 10, 10, 7, 7, 19, 19, 19, - 19, 19, 10, 10, 10, 7, 7, 7, 7, 10, 10, 10, 10, 10, 14, 7, - 7, 7, 7, 19, 19, 19, 10, 19, 0, 0, 19, 19, 7, 7, 0, 0, - 6, 6, 6, 10, 5, 0, 0, 0, 10, 0, 7, 7, 10, 10, 10, 6, - 7, 20, 20, 0, 12, 0, 0, 0, 0, 5, 5, 0, 3, 0, 0, 0, - 9, 10, 10, 10, 7, 13, 13, 13, 15, 15, 1, 15, 15, 15, 15, 15, - 15, 0, 0, 0, 10, 10, 10, 8, 8, 8, 8, 8, 8, 8, 0, 0, - 18, 18, 18, 18, 18, 0, 0, 0, 7, 15, 15, 15, 19, 19, 0, 0, - 10, 10, 10, 7, 10, 14, 14, 15, 15, 15, 15, 0, 5, 7, 7, 7, - 1, 1, 1, 12, 2, 6, 6, 6, 4, 7, 7, 7, 5, 10, 10, 10, - 1, 12, 2, 6, 6, 6, 10, 10, 10, 13, 13, 13, 7, 7, 5, 5, - 13, 13, 10, 10, 0, 0, 3, 10, 10, 10, 15, 15, 6, 6, 4, 7, - 15, 15, 5, 5, 13, 13, 7, 7, 1, 1, 0, 4, 0, 0, 2, 2, - 6, 6, 5, 10, 10, 10, 10, 1, 10, 10, 8, 8, 8, 8, 10, 10, - 10, 10, 8, 13, 13, 10, 10, 10, 10, 13, 10, 1, 1, 2, 6, 6, - 15, 7, 7, 7, 8, 8, 8, 19, 7, 7, 7, 15, 15, 15, 15, 5, - 1, 1, 12, 2, 10, 10, 10, 4, 7, 13, 14, 14, 7, 7, 7, 14, - 14, 14, 14, 0, 15, 15, 0, 0, 0, 0, 10, 19, 18, 19, 18, 0, - 0, 2, 5, 0, 10, 6, 10, 10, 10, 10, 10, 15, 15, 15, 15, 7, - 19, 5, 0, 0, 7, 0, 1, 2, 0, 0, 0, 5, 1, 1, 2, 0, - 1, 1, 2, 6, 7, 5, 4, 0, 7, 7, 7, 5, 2, 7, 7, 7, - 7, 7, 5, 4, + 0, 0, 0, 0, 0, 11, 0, 0, 29, 29, 29, 29, 29, 29, 0, 0, + 11, 0, 0, 0, 0, 0, 0, 11, 1, 1, 1, 2, 8, 8, 8, 8, + 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 9, 9, 4, 3, 9, 9, + 9, 9, 9, 9, 9, 5, 9, 9, 0, 22, 22, 0, 0, 9, 9, 9, + 8, 8, 9, 9, 0, 0, 29, 29, 0, 0, 8, 8, 0, 1, 1, 2, + 0, 8, 8, 8, 8, 0, 0, 8, 12, 0, 12, 12, 12, 0, 12, 0, + 0, 0, 12, 12, 12, 12, 0, 0, 9, 0, 0, 9, 9, 5, 13, 0, + 0, 0, 0, 9, 12, 12, 0, 12, 8, 8, 8, 0, 0, 0, 0, 8, + 0, 12, 12, 0, 4, 0, 9, 9, 9, 9, 9, 0, 9, 5, 0, 0, + 0, 12, 12, 12, 1, 23, 11, 11, 0, 17, 0, 0, 8, 8, 0, 8, + 9, 9, 0, 9, 0, 0, 9, 9, 0, 12, 0, 0, 0, 0, 1, 20, + 8, 0, 8, 8, 8, 12, 0, 0, 0, 0, 0, 12, 12, 0, 0, 0, + 12, 12, 12, 0, 9, 0, 9, 9, 0, 3, 9, 9, 0, 9, 9, 0, + 0, 0, 12, 0, 9, 5, 14, 0, 0, 0, 13, 13, 13, 13, 13, 13, + 0, 0, 1, 2, 0, 0, 5, 0, 9, 0, 9, 0, 9, 9, 6, 0, + 22, 22, 22, 22, 0, 1, 6, 0, 12, 0, 0, 12, 0, 12, 0, 12, + 17, 17, 0, 0, 9, 0, 0, 0, 0, 1, 0, 0, 9, 9, 1, 2, + 9, 9, 1, 1, 6, 3, 0, 0, 19, 19, 19, 19, 19, 16, 16, 16, + 16, 16, 16, 16, 0, 16, 16, 16, 16, 0, 0, 0, 12, 8, 8, 8, + 8, 8, 8, 9, 9, 9, 1, 22, 2, 7, 6, 17, 17, 17, 17, 12, + 0, 0, 11, 0, 12, 12, 8, 8, 9, 9, 12, 12, 12, 12, 17, 17, + 17, 12, 9, 22, 22, 12, 12, 9, 9, 22, 22, 22, 22, 22, 12, 12, + 12, 9, 9, 9, 9, 12, 12, 12, 12, 12, 17, 9, 9, 9, 9, 22, + 22, 22, 12, 22, 29, 29, 22, 22, 9, 9, 0, 0, 8, 8, 8, 12, + 6, 0, 0, 0, 12, 0, 9, 9, 12, 12, 12, 8, 9, 25, 25, 25, + 15, 0, 0, 0, 0, 6, 7, 0, 3, 0, 0, 0, 11, 12, 12, 12, + 9, 16, 16, 16, 18, 18, 1, 18, 18, 18, 18, 18, 18, 0, 0, 0, + 12, 12, 12, 10, 10, 10, 10, 10, 10, 10, 0, 0, 21, 21, 21, 21, + 21, 0, 0, 0, 9, 18, 18, 18, 22, 22, 0, 0, 12, 12, 12, 9, + 12, 17, 17, 18, 18, 18, 18, 0, 7, 9, 9, 9, 1, 1, 1, 15, + 2, 8, 8, 8, 4, 9, 9, 9, 5, 12, 12, 12, 1, 15, 2, 8, + 8, 8, 12, 12, 12, 16, 16, 16, 9, 9, 6, 7, 16, 16, 12, 12, + 29, 29, 3, 12, 12, 12, 18, 18, 8, 8, 4, 9, 18, 18, 6, 6, + 16, 16, 9, 9, 1, 1, 0, 4, 22, 22, 22, 0, 0, 0, 2, 2, + 22, 0, 0, 0, 26, 27, 0, 0, 0, 0, 11, 11, 8, 8, 6, 12, + 12, 12, 12, 1, 12, 12, 10, 10, 10, 10, 12, 12, 12, 12, 10, 16, + 16, 12, 12, 12, 12, 16, 12, 1, 1, 2, 8, 8, 18, 9, 9, 9, + 5, 0, 0, 0, 24, 24, 24, 24, 24, 24, 0, 0, 29, 29, 12, 12, + 10, 10, 10, 22, 9, 9, 9, 18, 18, 18, 18, 6, 1, 1, 15, 2, + 12, 12, 12, 4, 9, 16, 17, 17, 9, 9, 9, 17, 17, 17, 17, 0, + 18, 18, 0, 0, 0, 0, 12, 22, 21, 22, 21, 0, 0, 2, 7, 0, + 12, 8, 12, 12, 12, 12, 12, 18, 18, 18, 18, 9, 22, 6, 0, 0, + 9, 0, 1, 2, 0, 0, 0, 7, 1, 1, 2, 0, 9, 9, 5, 0, + 0, 0, 30, 30, 30, 30, 30, 30, 30, 30, 29, 29, 0, 0, 0, 28, + 1, 1, 2, 8, 9, 5, 4, 0, 9, 9, 9, 7, 6, 0, 29, 29, + 10, 12, 12, 12, 5, 3, 0, 0, 0, 29, 29, 29, 29, 0, 0, 0, + 1, 5, 4, 23, 9, 4, 6, 0, 0, 0, 24, 24, 24, 0, 0, 0, + 9, 9, 9, 1, 1, 2, 5, 4, 1, 1, 2, 5, 4, 0, 0, 0, + 9, 1, 2, 5, 2, 9, 9, 9, 9, 9, 5, 4, }; -/* Indic_Syllabic_Category: 1952 bytes. */ +/* Indic_Syllabic_Category: 2324 bytes. */ RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch) { RE_UINT32 code; @@ -10990,8 +11662,174 @@ RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch) { /* Alphanumeric. */ +static RE_UINT8 re_alphanumeric_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, +}; + +static RE_UINT8 re_alphanumeric_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 18, 19, 13, 20, 13, 13, 13, 13, 13, 13, 21, 13, 13, + 13, 13, 13, 13, 13, 13, 22, 23, 13, 13, 24, 13, 13, 25, 26, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 27, 7, 28, 29, 13, 13, 13, 13, 13, 13, 13, 30, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_alphanumeric_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, + 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, + 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 1, 59, + 60, 61, 62, 63, 64, 31, 31, 31, 65, 66, 67, 68, 69, 70, 71, 31, + 72, 31, 73, 31, 31, 31, 31, 31, 1, 1, 1, 74, 75, 31, 31, 31, + 1, 1, 1, 1, 76, 31, 31, 31, 1, 1, 77, 78, 31, 31, 31, 79, + 80, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 81, 31, 31, 31, + 31, 31, 31, 31, 82, 83, 84, 85, 86, 31, 31, 31, 31, 31, 87, 31, + 31, 88, 31, 31, 31, 31, 31, 31, 1, 1, 1, 1, 1, 1, 89, 1, + 1, 1, 1, 1, 1, 1, 1, 90, 91, 31, 31, 31, 31, 31, 31, 31, + 1, 1, 91, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_alphanumeric_stage_4[] = { + 0, 1, 2, 2, 0, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 6, 7, 0, 0, 8, 9, 10, 11, 5, 12, + 5, 5, 5, 5, 13, 5, 5, 5, 5, 14, 15, 16, 17, 18, 19, 20, + 21, 5, 22, 23, 5, 5, 24, 25, 26, 5, 27, 5, 5, 28, 5, 29, + 30, 31, 32, 0, 0, 33, 0, 34, 5, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 47, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 49, 60, 61, 62, 63, 60, 64, 65, 66, 67, 68, 69, 70, + 16, 71, 72, 0, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 0, 0, + 5, 82, 83, 84, 85, 5, 86, 87, 5, 5, 88, 5, 89, 90, 91, 5, + 92, 5, 93, 0, 94, 5, 5, 95, 16, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 96, 2, 5, 5, 97, 98, 99, 99, 100, 5, 101, 102, 77, + 1, 5, 5, 103, 5, 104, 5, 105, 106, 107, 108, 109, 5, 110, 111, 0, + 112, 5, 106, 113, 111, 114, 0, 0, 5, 115, 116, 0, 5, 117, 5, 118, + 5, 105, 119, 120, 0, 0, 0, 121, 5, 5, 5, 5, 5, 5, 0, 122, + 123, 5, 124, 120, 5, 125, 126, 127, 0, 0, 0, 128, 129, 0, 0, 0, + 130, 131, 132, 5, 133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 134, 5, 77, 5, 135, 106, 5, 5, 5, 5, 136, + 5, 86, 5, 137, 138, 139, 139, 5, 0, 140, 0, 0, 0, 0, 0, 0, + 141, 142, 16, 5, 143, 16, 5, 87, 144, 145, 5, 5, 146, 71, 0, 26, + 5, 5, 5, 5, 5, 105, 0, 0, 5, 5, 5, 5, 5, 5, 31, 0, + 5, 5, 5, 5, 31, 0, 26, 120, 147, 148, 5, 149, 150, 5, 5, 94, + 151, 152, 5, 5, 153, 154, 0, 151, 155, 17, 5, 99, 5, 5, 156, 157, + 5, 104, 33, 81, 5, 158, 159, 160, 5, 138, 161, 162, 5, 106, 163, 164, + 165, 166, 87, 167, 0, 0, 5, 168, 5, 5, 5, 5, 5, 169, 170, 112, + 5, 5, 5, 171, 5, 5, 172, 0, 173, 174, 175, 5, 5, 28, 176, 5, + 5, 120, 26, 5, 177, 5, 17, 178, 0, 0, 0, 179, 5, 5, 5, 81, + 1, 2, 2, 108, 5, 106, 180, 0, 181, 182, 183, 0, 5, 5, 5, 71, + 0, 0, 5, 95, 0, 0, 0, 0, 0, 0, 0, 0, 81, 5, 184, 0, + 5, 26, 104, 71, 120, 5, 185, 0, 5, 5, 5, 5, 120, 77, 0, 0, + 5, 186, 5, 187, 0, 0, 0, 0, 5, 138, 105, 17, 0, 0, 0, 0, + 188, 189, 105, 138, 106, 0, 0, 0, 105, 172, 0, 0, 5, 190, 0, 0, + 191, 99, 0, 81, 81, 0, 78, 192, 5, 105, 105, 33, 28, 0, 0, 0, + 5, 5, 133, 0, 0, 0, 0, 0, 5, 5, 193, 56, 152, 32, 26, 194, + 5, 195, 26, 196, 5, 5, 197, 0, 198, 199, 0, 0, 0, 26, 5, 194, + 50, 47, 200, 187, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 201, 0, + 0, 0, 0, 0, 5, 202, 0, 0, 5, 106, 203, 0, 5, 105, 77, 0, + 0, 0, 0, 0, 0, 5, 5, 204, 0, 0, 0, 0, 0, 0, 5, 32, + 5, 5, 5, 5, 32, 0, 0, 0, 5, 5, 5, 146, 0, 0, 0, 0, + 5, 146, 0, 0, 0, 0, 0, 0, 5, 32, 106, 77, 0, 0, 26, 205, + 5, 138, 156, 206, 94, 0, 0, 0, 5, 5, 207, 106, 176, 0, 0, 0, + 208, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 209, 210, 0, 0, 0, + 5, 5, 211, 5, 212, 213, 214, 5, 215, 216, 217, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 218, 219, 87, 211, 211, 135, 135, 220, 220, 221, 5, + 5, 5, 5, 5, 5, 5, 192, 0, 214, 222, 223, 224, 225, 226, 0, 0, + 0, 26, 83, 83, 77, 0, 0, 0, 5, 5, 5, 5, 5, 5, 138, 0, + 5, 95, 5, 5, 5, 5, 5, 5, 120, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_alphanumeric_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 7, 0, 4, 32, 4, + 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, + 32, 0, 0, 0, 0, 0, 223, 188, 64, 215, 255, 255, 251, 255, 255, 255, + 255, 255, 191, 255, 3, 252, 255, 255, 255, 255, 254, 255, 255, 255, 127, 2, + 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 255, 191, 182, 0, 255, 255, + 255, 7, 7, 0, 0, 0, 255, 7, 255, 255, 255, 254, 255, 195, 255, 255, + 255, 255, 239, 31, 254, 225, 255, 159, 0, 0, 255, 255, 0, 224, 255, 255, + 255, 255, 3, 0, 255, 7, 48, 4, 255, 255, 255, 252, 255, 31, 0, 0, + 255, 255, 255, 1, 255, 255, 7, 0, 240, 3, 255, 255, 255, 255, 255, 239, + 255, 223, 225, 255, 207, 255, 254, 255, 239, 159, 249, 255, 255, 253, 197, 227, + 159, 89, 128, 176, 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 195, + 135, 25, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 227, + 191, 27, 1, 0, 207, 255, 0, 0, 238, 159, 249, 255, 159, 25, 192, 176, + 207, 255, 2, 0, 236, 199, 61, 214, 24, 199, 255, 195, 199, 29, 129, 0, + 192, 255, 0, 0, 239, 223, 253, 255, 255, 253, 255, 227, 223, 29, 96, 3, + 238, 223, 253, 255, 255, 253, 239, 227, 223, 29, 96, 64, 207, 255, 6, 0, + 255, 255, 255, 231, 223, 93, 128, 0, 207, 255, 0, 252, 236, 255, 127, 252, + 255, 255, 251, 47, 127, 128, 95, 255, 192, 255, 12, 0, 255, 255, 255, 7, + 127, 32, 255, 3, 150, 37, 240, 254, 174, 236, 255, 59, 95, 32, 255, 243, + 1, 0, 0, 0, 255, 3, 0, 0, 255, 254, 255, 255, 255, 31, 254, 255, + 3, 255, 255, 254, 255, 255, 255, 31, 255, 255, 127, 249, 255, 3, 255, 255, + 231, 193, 255, 255, 127, 64, 255, 51, 191, 32, 255, 255, 255, 255, 255, 247, + 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, + 255, 255, 61, 255, 255, 255, 255, 135, 255, 255, 0, 0, 255, 255, 31, 0, + 255, 159, 255, 255, 255, 199, 255, 1, 255, 223, 15, 0, 255, 255, 15, 0, + 255, 223, 13, 0, 255, 255, 207, 255, 255, 1, 128, 16, 255, 255, 255, 0, + 255, 7, 255, 255, 255, 255, 63, 0, 255, 255, 255, 127, 255, 15, 255, 1, + 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, 255, 3, 255, 3, + 255, 255, 255, 15, 254, 255, 31, 0, 128, 0, 0, 0, 255, 255, 239, 255, + 239, 15, 255, 3, 255, 243, 255, 255, 191, 255, 3, 0, 255, 227, 255, 255, + 255, 255, 255, 63, 0, 222, 111, 0, 128, 255, 31, 0, 255, 255, 63, 63, + 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, + 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, 80, 189, 255, 243, + 224, 67, 0, 0, 255, 1, 0, 0, 0, 0, 192, 255, 255, 127, 255, 255, + 31, 120, 12, 0, 255, 128, 0, 0, 255, 255, 127, 0, 127, 127, 127, 127, + 0, 128, 0, 0, 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 224, + 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, + 255, 15, 0, 0, 255, 127, 240, 143, 255, 255, 255, 191, 0, 0, 128, 255, + 252, 255, 255, 255, 255, 121, 255, 255, 255, 63, 3, 0, 187, 247, 255, 255, + 15, 0, 255, 3, 0, 0, 252, 8, 255, 255, 247, 255, 0, 128, 255, 3, + 223, 255, 255, 127, 255, 63, 255, 3, 255, 255, 127, 196, 5, 0, 0, 56, + 255, 255, 60, 0, 126, 126, 126, 0, 127, 127, 255, 255, 48, 0, 0, 0, + 255, 7, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 63, 255, 255, + 255, 255, 255, 3, 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, + 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 15, 0, 0, 223, 255, + 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, + 255, 255, 1, 0, 15, 255, 62, 0, 255, 0, 255, 255, 15, 0, 0, 0, + 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, + 31, 0, 0, 0, 63, 0, 0, 0, 255, 1, 255, 3, 255, 255, 199, 255, + 255, 255, 71, 0, 30, 0, 255, 7, 255, 255, 251, 255, 255, 255, 159, 0, + 159, 25, 128, 224, 179, 0, 255, 3, 255, 255, 63, 127, 17, 0, 255, 3, + 255, 3, 0, 128, 255, 63, 0, 0, 248, 255, 255, 224, 31, 0, 255, 255, + 3, 0, 0, 0, 255, 7, 255, 31, 255, 1, 255, 67, 255, 255, 223, 255, + 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, + 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, + 255, 253, 255, 255, 247, 207, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, + 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, +}; + +/* Alphanumeric: 2037 bytes. */ + RE_UINT32 re_get_alphanumeric(RE_UINT32 ch) { - return re_get_alphabetic(ch) || re_get_general_category(ch) == RE_PROP_ND; + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_alphanumeric_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_alphanumeric_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_alphanumeric_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_alphanumeric_stage_4[pos + f] << 5; + pos += code; + value = (re_alphanumeric_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; } /* Any. */ @@ -11000,52 +11838,674 @@ RE_UINT32 re_get_any(RE_UINT32 ch) { return 1; } -/* ASCII. */ - -RE_UINT32 re_get_ascii(RE_UINT32 ch) { - if (ch <= RE_ASCII_MAX) - return 1; - - return 0; -} - -/* Assigned. */ - -RE_UINT32 re_get_assigned(RE_UINT32 ch) { - return re_get_general_category(ch) != RE_PROP_CN; -} - /* Blank. */ +static RE_UINT8 re_blank_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_blank_stage_2[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_blank_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_blank_stage_4[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 3, 1, 1, 1, 1, 1, 4, 5, 1, 1, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_blank_stage_5[] = { + 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 255, 7, 0, 0, 0, 128, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, +}; + +/* Blank: 169 bytes. */ + RE_UINT32 re_get_blank(RE_UINT32 ch) { - return ch == 0x09 || re_get_general_category(ch) == RE_PROP_ZS; + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_blank_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_blank_stage_2[pos + f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_blank_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_blank_stage_4[pos + f] << 6; + pos += code; + value = (re_blank_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; } /* Graph. */ +static RE_UINT8 re_graph_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 10, 12, 13, 14, + 3, 3, 3, 3, 3, 15, 10, 16, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 17, 10, 10, 10, 10, 10, 10, 10, 3, 3, 3, 3, 3, 3, 3, 18, + 3, 3, 3, 3, 3, 3, 3, 18, +}; + +static RE_UINT8 re_graph_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 10, 10, 19, 20, 21, 22, 23, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 24, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 25, + 10, 10, 26, 27, 28, 29, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 30, 31, 31, 31, 31, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 31, 31, + 10, 49, 50, 31, 31, 31, 31, 31, 10, 10, 51, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 10, 52, 31, 53, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 54, 31, 31, 31, 31, 31, 55, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 56, 57, 58, 59, 31, 31, 31, 31, + 31, 31, 31, 31, 60, 31, 31, 61, 62, 63, 64, 65, 66, 31, 31, 31, + 10, 10, 10, 67, 10, 10, 10, 10, 10, 10, 10, 68, 69, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 10, 69, 31, 31, + 70, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 71, +}; + +static RE_UINT8 re_graph_stage_3[] = { + 0, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 2, + 2, 2, 2, 2, 5, 6, 7, 8, 9, 2, 2, 2, 10, 11, 12, 13, + 14, 15, 16, 17, 2, 2, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 2, 40, 41, 42, + 2, 2, 2, 43, 2, 2, 2, 2, 2, 44, 45, 46, 47, 48, 49, 50, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 51, 52, 53, 54, 2, 55, + 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 2, 68, 2, 69, + 70, 71, 67, 72, 2, 2, 2, 73, 2, 2, 2, 2, 74, 75, 76, 77, + 78, 79, 80, 81, 2, 2, 82, 2, 2, 2, 2, 2, 2, 2, 2, 13, + 83, 84, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 85, 86, 87, + 88, 89, 2, 90, 91, 92, 93, 94, 2, 95, 96, 97, 2, 2, 2, 98, + 99, 99, 100, 2, 101, 2, 102, 103, 89, 2, 2, 1, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 59, 2, 2, 2, 2, 2, 2, 2, 2, 104, + 2, 2, 105, 106, 2, 2, 2, 2, 107, 2, 108, 57, 2, 2, 109, 110, + 111, 57, 2, 112, 2, 113, 2, 114, 115, 116, 2, 117, 118, 119, 67, 120, + 2, 2, 2, 2, 2, 2, 103, 121, 67, 67, 67, 67, 67, 67, 67, 67, + 2, 122, 2, 123, 124, 125, 2, 126, 2, 2, 2, 2, 2, 127, 128, 129, + 130, 131, 2, 132, 99, 2, 1, 133, 134, 135, 2, 13, 136, 2, 137, 138, + 67, 67, 139, 140, 103, 141, 108, 142, 2, 2, 143, 67, 144, 145, 67, 67, + 2, 2, 2, 2, 115, 146, 67, 67, 147, 148, 149, 67, 150, 67, 151, 67, + 152, 153, 154, 155, 156, 157, 158, 67, 2, 159, 67, 67, 67, 67, 67, 67, + 67, 160, 67, 67, 67, 67, 67, 67, 2, 161, 2, 162, 76, 163, 2, 164, + 165, 67, 166, 167, 24, 168, 67, 67, 67, 67, 2, 169, 67, 67, 170, 171, + 2, 172, 57, 171, 67, 67, 67, 67, 67, 67, 173, 174, 67, 67, 67, 67, + 67, 67, 67, 52, 67, 67, 67, 67, 2, 2, 2, 2, 2, 2, 175, 67, + 2, 176, 67, 67, 67, 67, 67, 67, 177, 67, 67, 67, 67, 67, 67, 67, + 52, 178, 67, 179, 2, 180, 181, 67, 67, 67, 67, 67, 2, 182, 183, 67, + 184, 67, 67, 67, 67, 67, 67, 67, 2, 185, 186, 67, 67, 67, 67, 67, + 2, 2, 2, 59, 187, 2, 2, 188, 2, 189, 67, 67, 2, 190, 67, 67, + 2, 191, 192, 193, 194, 195, 2, 2, 2, 2, 196, 2, 2, 2, 2, 197, + 2, 2, 2, 198, 67, 67, 67, 67, 199, 200, 201, 202, 67, 67, 67, 67, + 62, 2, 203, 204, 205, 62, 206, 207, 208, 209, 67, 67, 210, 211, 2, 212, + 2, 2, 2, 1, 2, 213, 214, 2, 2, 215, 2, 216, 2, 97, 2, 217, + 218, 219, 220, 67, 67, 67, 67, 67, 2, 2, 2, 221, 2, 2, 2, 2, + 2, 2, 2, 2, 50, 2, 2, 2, 188, 67, 67, 67, 67, 67, 67, 67, + 222, 2, 67, 67, 2, 2, 2, 223, 2, 2, 2, 2, 2, 2, 2, 211, +}; + +static RE_UINT8 re_graph_stage_4[] = { + 0, 0, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 4, + 5, 2, 6, 2, 2, 2, 2, 1, 2, 7, 1, 2, 8, 1, 2, 2, + 9, 2, 10, 11, 2, 12, 2, 2, 13, 2, 2, 2, 14, 2, 2, 2, + 2, 2, 2, 15, 2, 2, 2, 10, 2, 2, 16, 3, 2, 17, 0, 0, + 0, 0, 2, 18, 0, 0, 19, 2, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 21, 22, 29, 30, 31, 32, 33, 34, 6, 22, 35, 36, 37, 26, 15, + 38, 21, 22, 35, 39, 40, 26, 9, 41, 42, 43, 44, 45, 46, 32, 10, + 47, 48, 22, 49, 50, 51, 26, 52, 53, 48, 22, 54, 50, 55, 26, 56, + 53, 48, 2, 14, 57, 58, 26, 59, 60, 61, 2, 62, 63, 64, 32, 65, + 1, 2, 2, 66, 2, 27, 0, 0, 67, 68, 69, 70, 71, 72, 0, 0, + 73, 2, 74, 1, 2, 73, 2, 12, 12, 10, 0, 0, 75, 2, 2, 2, + 76, 77, 2, 2, 76, 2, 2, 78, 79, 80, 2, 2, 2, 79, 2, 2, + 2, 14, 2, 74, 2, 81, 2, 2, 2, 2, 2, 11, 1, 74, 2, 2, + 2, 2, 2, 82, 12, 11, 2, 83, 2, 84, 12, 85, 2, 16, 81, 81, + 3, 81, 2, 2, 2, 2, 2, 9, 2, 2, 10, 2, 2, 2, 2, 33, + 2, 3, 27, 27, 86, 2, 16, 11, 2, 2, 27, 2, 81, 87, 2, 2, + 2, 88, 2, 2, 2, 3, 2, 89, 81, 81, 16, 3, 0, 0, 0, 0, + 27, 2, 2, 74, 2, 2, 2, 90, 2, 2, 2, 91, 49, 2, 2, 2, + 9, 2, 2, 92, 2, 2, 2, 93, 2, 94, 2, 2, 94, 95, 2, 16, + 2, 2, 2, 96, 96, 97, 2, 98, 99, 2, 100, 2, 2, 3, 96, 101, + 3, 74, 2, 16, 0, 2, 2, 37, 81, 2, 2, 2, 2, 2, 83, 0, + 10, 0, 2, 2, 2, 2, 2, 26, 2, 102, 2, 49, 22, 15, 0, 0, + 2, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 103, 2, 2, 75, 2, + 2, 2, 104, 105, 2, 83, 106, 106, 106, 106, 2, 2, 18, 0, 0, 0, + 2, 107, 2, 2, 2, 2, 2, 84, 2, 33, 0, 27, 1, 2, 2, 2, + 2, 7, 2, 2, 108, 2, 16, 1, 3, 2, 2, 10, 2, 2, 84, 2, + 74, 0, 0, 0, 74, 2, 2, 2, 83, 2, 2, 2, 2, 2, 27, 0, + 2, 13, 2, 2, 3, 2, 16, 15, 0, 0, 0, 109, 2, 2, 27, 81, + 110, 81, 2, 27, 2, 111, 2, 74, 13, 44, 2, 3, 2, 2, 2, 83, + 16, 72, 2, 2, 18, 99, 2, 83, 112, 113, 106, 2, 2, 2, 114, 0, + 2, 2, 16, 81, 115, 2, 2, 27, 2, 2, 16, 2, 2, 81, 0, 0, + 83, 116, 2, 117, 118, 2, 2, 2, 15, 119, 2, 2, 0, 2, 2, 2, + 2, 120, 2, 2, 9, 0, 0, 16, 2, 81, 16, 2, 2, 121, 122, 96, + 2, 2, 2, 89, 123, 124, 106, 125, 126, 2, 80, 127, 16, 16, 0, 0, + 128, 2, 2, 129, 74, 27, 37, 0, 0, 2, 2, 16, 2, 74, 2, 2, + 2, 37, 2, 27, 10, 2, 2, 10, 130, 33, 0, 0, 2, 16, 81, 0, + 2, 2, 9, 2, 2, 2, 111, 0, 2, 33, 9, 0, 131, 2, 2, 132, + 2, 133, 2, 2, 2, 3, 109, 0, 2, 134, 2, 135, 2, 2, 2, 136, + 137, 138, 2, 139, 9, 82, 2, 2, 2, 2, 0, 0, 2, 2, 115, 83, + 2, 2, 2, 59, 2, 102, 2, 140, 2, 141, 142, 0, 82, 0, 0, 0, + 0, 0, 2, 3, 16, 120, 2, 143, 15, 2, 82, 81, 84, 2, 2, 83, + 144, 10, 1, 11, 2, 6, 2, 16, 0, 0, 0, 2, 2, 2, 10, 81, + 39, 145, 146, 11, 9, 81, 0, 0, 2, 2, 2, 102, 81, 0, 0, 0, + 11, 81, 0, 0, 0, 0, 2, 2, 2, 2, 2, 147, 2, 82, 0, 0, + 2, 2, 3, 11, 2, 2, 3, 0, 2, 3, 44, 0, 0, 2, 16, 33, + 33, 107, 6, 148, 2, 0, 0, 0, 11, 2, 2, 3, 143, 2, 0, 0, + 15, 0, 0, 0, 2, 2, 10, 74, 82, 72, 84, 0, 2, 2, 7, 2, + 2, 16, 0, 0, 33, 0, 0, 0, 2, 83, 2, 15, 2, 96, 2, 2, + 2, 12, 149, 150, 151, 2, 2, 2, 152, 153, 2, 154, 155, 48, 2, 2, + 2, 2, 102, 2, 88, 2, 2, 2, 156, 83, 0, 0, 151, 2, 157, 158, + 159, 160, 161, 162, 107, 27, 163, 27, 0, 0, 0, 15, 2, 84, 3, 1, + 1, 1, 2, 33, 74, 2, 3, 2, 2, 10, 0, 0, 0, 0, 32, 2, + 18, 2, 2, 10, 82, 15, 0, 0, 2, 2, 74, 2, 2, 2, 2, 16, + 3, 19, 2, 9, 10, 2, 2, 107, 2, 2, 151, 2, 164, 2, 2, 2, + 2, 0, 74, 84, 2, 11, 0, 0, 27, 2, 2, 2, 9, 81, 2, 2, + 9, 2, 16, 0, 2, 83, 0, 0, 165, 0, 2, 2, 2, 2, 2, 0, +}; + +static RE_UINT8 re_graph_stage_5[] = { + 0, 0, 254, 255, 255, 255, 255, 127, 255, 252, 240, 215, 251, 255, 127, 254, + 255, 230, 255, 0, 255, 7, 31, 0, 255, 223, 255, 191, 255, 231, 3, 0, + 255, 63, 255, 79, 7, 0, 240, 255, 239, 159, 249, 255, 255, 253, 197, 243, + 159, 121, 128, 176, 207, 255, 255, 15, 238, 135, 109, 211, 135, 57, 2, 94, + 192, 255, 63, 0, 238, 191, 237, 243, 191, 59, 1, 0, 238, 159, 159, 57, + 192, 176, 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, 239, 223, + 253, 255, 255, 227, 223, 61, 96, 3, 0, 255, 238, 223, 239, 243, 96, 64, + 6, 0, 223, 125, 128, 0, 63, 254, 236, 255, 127, 252, 251, 47, 127, 132, + 95, 255, 28, 0, 255, 135, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, + 255, 243, 255, 254, 255, 31, 191, 32, 255, 61, 127, 61, 61, 127, 61, 255, + 127, 255, 255, 3, 255, 1, 127, 0, 15, 0, 13, 0, 241, 255, 255, 199, + 255, 207, 255, 159, 15, 240, 255, 248, 127, 3, 63, 240, 63, 63, 255, 170, + 223, 255, 207, 239, 220, 127, 0, 248, 255, 124, 243, 255, 63, 255, 15, 254, + 255, 128, 1, 128, 127, 127, 255, 251, 224, 255, 128, 255, 31, 192, 15, 128, + 126, 126, 126, 0, 48, 0, 127, 248, 248, 224, 127, 95, 219, 255, 248, 255, + 252, 255, 247, 255, 127, 15, 252, 252, 252, 28, 0, 62, 255, 239, 255, 183, + 135, 255, 143, 255, 15, 255, 63, 253, 191, 145, 191, 255, 255, 143, 255, 131, + 255, 192, 111, 240, 239, 254, 15, 135, 7, 255, 3, 30, 0, 254, 0, 128, + 255, 33, 128, 224, 207, 31, 7, 128, 255, 224, 100, 222, 255, 235, 239, 255, + 191, 231, 223, 223, 255, 123, 95, 252, 159, 255, 150, 254, 247, 10, 132, 234, + 150, 170, 150, 247, 247, 94, 238, 251, 231, 255, 2, 0, +}; + +/* Graph: 2244 bytes. */ + RE_UINT32 re_get_graph(RE_UINT32 ch) { - return !re_get_white_space(ch) && - (RE_GRAPH_MASK & (1 << re_get_general_category(ch))) == 0; + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_graph_stage_1[f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_graph_stage_2[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_graph_stage_3[pos + f] << 2; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_graph_stage_4[pos + f] << 4; + pos += code; + value = (re_graph_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; } /* Print. */ +static RE_UINT8 re_print_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 10, 12, 13, 14, + 3, 3, 3, 3, 3, 15, 10, 16, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 17, 10, 10, 10, 10, 10, 10, 10, 3, 3, 3, 3, 3, 3, 3, 18, + 3, 3, 3, 3, 3, 3, 3, 18, +}; + +static RE_UINT8 re_print_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 10, 10, 19, 20, 21, 22, 23, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 24, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 25, + 10, 10, 26, 27, 28, 29, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 30, 31, 31, 31, 31, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 31, 31, + 10, 49, 50, 31, 31, 31, 31, 31, 10, 10, 51, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 10, 52, 31, 53, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 54, 31, 31, 31, 31, 31, 55, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 56, 57, 58, 59, 31, 31, 31, 31, + 31, 31, 31, 31, 60, 31, 31, 61, 62, 63, 64, 65, 66, 31, 31, 31, + 10, 10, 10, 67, 10, 10, 10, 10, 10, 10, 10, 68, 69, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 10, 69, 31, 31, + 70, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 71, +}; + +static RE_UINT8 re_print_stage_3[] = { + 0, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 2, + 2, 2, 2, 2, 5, 6, 7, 8, 9, 2, 2, 2, 10, 11, 12, 13, + 14, 15, 16, 17, 2, 2, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 2, 40, 41, 42, + 2, 2, 2, 43, 2, 2, 2, 2, 2, 44, 45, 46, 47, 48, 49, 50, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 51, 52, 53, 54, 2, 55, + 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 2, 68, 2, 69, + 70, 71, 67, 72, 2, 2, 2, 73, 2, 2, 2, 2, 74, 75, 76, 77, + 78, 79, 80, 81, 2, 2, 82, 2, 2, 2, 2, 2, 2, 2, 2, 13, + 83, 84, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 85, 86, 87, + 88, 89, 2, 90, 91, 92, 93, 94, 2, 95, 96, 97, 2, 2, 2, 98, + 2, 99, 100, 2, 101, 2, 102, 103, 89, 2, 2, 1, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 59, 2, 2, 2, 2, 2, 2, 2, 2, 104, + 2, 2, 105, 106, 2, 2, 2, 2, 107, 2, 108, 57, 2, 2, 109, 110, + 111, 57, 2, 112, 2, 113, 2, 114, 115, 116, 2, 117, 118, 119, 67, 120, + 2, 2, 2, 2, 2, 2, 103, 121, 67, 67, 67, 67, 67, 67, 67, 67, + 2, 122, 2, 123, 124, 125, 2, 126, 2, 2, 2, 2, 2, 127, 128, 129, + 130, 131, 2, 132, 99, 2, 1, 133, 134, 135, 2, 13, 136, 2, 137, 138, + 67, 67, 51, 139, 103, 140, 108, 141, 2, 2, 142, 67, 143, 144, 67, 67, + 2, 2, 2, 2, 115, 145, 67, 67, 146, 147, 148, 67, 149, 67, 150, 67, + 151, 152, 153, 154, 155, 156, 157, 67, 2, 158, 67, 67, 67, 67, 67, 67, + 67, 159, 67, 67, 67, 67, 67, 67, 2, 160, 2, 161, 76, 162, 2, 163, + 164, 67, 165, 166, 24, 167, 67, 67, 67, 67, 2, 168, 67, 67, 169, 170, + 2, 171, 57, 170, 67, 67, 67, 67, 67, 67, 0, 172, 67, 67, 67, 67, + 67, 67, 67, 52, 67, 67, 67, 67, 2, 2, 2, 2, 2, 2, 173, 67, + 2, 174, 67, 67, 67, 67, 67, 67, 175, 67, 67, 67, 67, 67, 67, 67, + 52, 176, 67, 177, 2, 178, 179, 67, 67, 67, 67, 67, 2, 180, 181, 67, + 182, 67, 67, 67, 67, 67, 67, 67, 2, 183, 184, 67, 67, 67, 67, 67, + 2, 2, 2, 59, 185, 2, 2, 186, 2, 187, 67, 67, 2, 188, 67, 67, + 2, 189, 190, 191, 192, 193, 2, 2, 2, 2, 194, 2, 2, 2, 2, 195, + 2, 2, 2, 196, 67, 67, 67, 67, 197, 198, 199, 200, 67, 67, 67, 67, + 62, 2, 201, 202, 203, 62, 204, 205, 206, 207, 67, 67, 208, 209, 2, 210, + 2, 2, 2, 1, 2, 211, 212, 2, 2, 213, 2, 214, 2, 97, 2, 215, + 216, 217, 218, 67, 67, 67, 67, 67, 2, 2, 2, 219, 2, 2, 2, 2, + 2, 2, 2, 2, 50, 2, 2, 2, 186, 67, 67, 67, 67, 67, 67, 67, + 220, 2, 67, 67, 2, 2, 2, 221, 2, 2, 2, 2, 2, 2, 2, 209, +}; + +static RE_UINT8 re_print_stage_4[] = { + 0, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, + 4, 1, 5, 1, 1, 1, 1, 6, 1, 7, 6, 1, 8, 6, 1, 1, + 9, 1, 10, 11, 1, 12, 1, 1, 13, 1, 1, 1, 14, 1, 1, 1, + 1, 1, 1, 15, 1, 1, 1, 10, 1, 1, 16, 2, 1, 17, 0, 0, + 0, 0, 1, 18, 0, 0, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 21, 22, 29, 30, 31, 32, 33, 34, 5, 22, 35, 36, 37, 26, 15, + 38, 21, 22, 35, 39, 40, 26, 9, 41, 42, 43, 44, 45, 46, 32, 10, + 47, 48, 22, 49, 50, 51, 26, 52, 53, 48, 22, 54, 50, 55, 26, 56, + 53, 48, 1, 14, 57, 58, 26, 59, 60, 61, 1, 62, 63, 64, 32, 65, + 6, 1, 1, 66, 1, 27, 0, 0, 67, 68, 69, 70, 71, 72, 0, 0, + 73, 1, 74, 6, 1, 73, 1, 12, 12, 10, 0, 0, 75, 1, 1, 1, + 76, 77, 1, 1, 76, 1, 1, 78, 79, 80, 1, 1, 1, 79, 1, 1, + 1, 14, 1, 74, 1, 81, 1, 1, 1, 1, 1, 11, 1, 74, 1, 1, + 1, 1, 1, 82, 12, 11, 1, 83, 1, 84, 12, 85, 1, 16, 81, 81, + 2, 81, 1, 1, 1, 1, 1, 9, 1, 1, 10, 1, 1, 1, 1, 33, + 1, 2, 27, 27, 86, 1, 16, 11, 1, 1, 27, 1, 81, 87, 1, 1, + 1, 88, 1, 1, 1, 2, 1, 89, 81, 81, 16, 2, 0, 0, 0, 0, + 27, 1, 1, 74, 1, 1, 1, 90, 1, 1, 1, 91, 49, 1, 1, 1, + 9, 1, 1, 92, 1, 1, 1, 93, 1, 94, 1, 1, 94, 95, 1, 16, + 1, 1, 1, 96, 96, 97, 1, 98, 1, 1, 3, 1, 1, 1, 96, 99, + 2, 74, 1, 16, 0, 1, 1, 37, 81, 1, 1, 1, 1, 1, 83, 0, + 10, 0, 1, 1, 1, 1, 1, 26, 1, 100, 1, 49, 22, 15, 0, 0, + 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 101, 1, 1, 75, 1, + 1, 1, 102, 103, 1, 83, 104, 104, 104, 104, 1, 1, 18, 0, 0, 0, + 1, 105, 1, 1, 1, 1, 1, 84, 1, 33, 0, 27, 6, 1, 1, 1, + 1, 7, 1, 1, 106, 1, 16, 6, 2, 1, 1, 10, 1, 1, 84, 1, + 74, 0, 0, 0, 74, 1, 1, 1, 83, 1, 1, 1, 1, 1, 27, 0, + 1, 13, 1, 1, 2, 1, 16, 15, 0, 0, 0, 107, 1, 1, 27, 81, + 108, 81, 1, 27, 1, 109, 1, 74, 13, 44, 1, 2, 1, 1, 1, 83, + 16, 72, 1, 1, 18, 110, 1, 83, 111, 112, 104, 1, 1, 1, 113, 0, + 1, 1, 16, 81, 114, 1, 1, 27, 1, 1, 16, 1, 1, 81, 0, 0, + 83, 115, 1, 116, 117, 1, 1, 1, 15, 118, 1, 1, 0, 1, 1, 1, + 1, 119, 1, 1, 9, 0, 0, 16, 1, 81, 16, 1, 1, 120, 121, 96, + 1, 1, 1, 89, 122, 123, 104, 124, 125, 1, 80, 126, 16, 16, 0, 0, + 127, 1, 1, 128, 74, 27, 37, 0, 0, 1, 1, 16, 1, 37, 1, 27, + 10, 1, 1, 10, 129, 33, 0, 0, 1, 16, 81, 0, 1, 1, 9, 1, + 1, 1, 109, 0, 1, 33, 9, 0, 130, 1, 1, 131, 1, 132, 1, 1, + 1, 2, 107, 0, 1, 133, 1, 134, 1, 1, 1, 135, 136, 137, 1, 138, + 9, 82, 1, 1, 1, 1, 0, 0, 1, 1, 114, 83, 1, 1, 1, 59, + 1, 100, 1, 139, 1, 140, 141, 0, 82, 0, 0, 0, 0, 0, 1, 2, + 16, 119, 1, 142, 15, 1, 82, 81, 84, 1, 1, 83, 143, 10, 6, 11, + 1, 5, 1, 16, 0, 0, 0, 1, 1, 1, 10, 81, 39, 144, 145, 11, + 9, 81, 0, 0, 1, 1, 1, 100, 81, 0, 0, 0, 11, 81, 0, 0, + 1, 1, 1, 146, 1, 82, 0, 0, 1, 1, 2, 11, 1, 1, 2, 0, + 1, 2, 44, 0, 0, 1, 16, 33, 33, 105, 5, 147, 1, 0, 0, 0, + 11, 1, 1, 2, 142, 1, 0, 0, 15, 0, 0, 0, 1, 1, 10, 74, + 82, 72, 84, 0, 1, 1, 7, 1, 1, 16, 0, 0, 33, 0, 0, 0, + 1, 83, 1, 15, 1, 96, 1, 1, 1, 12, 148, 149, 150, 1, 1, 1, + 151, 152, 1, 153, 154, 48, 1, 1, 1, 1, 100, 1, 88, 1, 1, 1, + 155, 83, 0, 0, 150, 1, 156, 157, 158, 159, 160, 161, 105, 27, 162, 27, + 0, 0, 0, 15, 1, 84, 2, 6, 6, 6, 1, 33, 74, 1, 2, 1, + 1, 10, 0, 0, 0, 0, 32, 1, 18, 1, 1, 10, 82, 15, 0, 0, + 1, 1, 74, 1, 1, 1, 1, 16, 2, 19, 1, 9, 10, 1, 1, 105, + 1, 1, 150, 1, 163, 1, 1, 1, 1, 0, 74, 84, 1, 11, 0, 0, + 27, 1, 1, 1, 9, 81, 1, 1, 9, 1, 16, 0, 1, 83, 0, 0, + 164, 0, 1, 1, 1, 1, 1, 0, +}; + +static RE_UINT8 re_print_stage_5[] = { + 0, 0, 255, 255, 255, 127, 255, 252, 240, 215, 251, 255, 254, 255, 127, 254, + 255, 230, 255, 0, 255, 7, 31, 0, 255, 223, 255, 191, 255, 231, 3, 0, + 255, 63, 255, 79, 7, 0, 240, 255, 239, 159, 249, 255, 255, 253, 197, 243, + 159, 121, 128, 176, 207, 255, 255, 15, 238, 135, 109, 211, 135, 57, 2, 94, + 192, 255, 63, 0, 238, 191, 237, 243, 191, 59, 1, 0, 238, 159, 159, 57, + 192, 176, 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, 239, 223, + 253, 255, 255, 227, 223, 61, 96, 3, 0, 255, 238, 223, 239, 243, 96, 64, + 6, 0, 223, 125, 128, 0, 63, 254, 236, 255, 127, 252, 251, 47, 127, 132, + 95, 255, 28, 0, 255, 135, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, + 255, 243, 255, 254, 255, 31, 191, 32, 255, 61, 127, 61, 61, 127, 61, 255, + 127, 255, 255, 3, 255, 1, 127, 0, 15, 0, 13, 0, 241, 255, 255, 199, + 255, 207, 255, 159, 15, 240, 255, 248, 127, 3, 63, 240, 63, 63, 255, 170, + 223, 255, 207, 239, 220, 127, 243, 255, 63, 255, 15, 254, 255, 128, 1, 128, + 127, 127, 255, 251, 224, 255, 128, 255, 31, 192, 15, 128, 0, 248, 126, 126, + 126, 0, 48, 0, 127, 248, 248, 224, 127, 95, 219, 255, 248, 255, 252, 255, + 247, 255, 127, 15, 252, 252, 252, 28, 0, 62, 255, 239, 255, 183, 135, 255, + 143, 255, 15, 255, 63, 253, 191, 145, 191, 255, 255, 143, 255, 131, 255, 192, + 111, 240, 239, 254, 15, 135, 7, 255, 3, 30, 0, 254, 0, 128, 255, 33, + 128, 224, 207, 31, 7, 128, 255, 224, 100, 222, 255, 235, 239, 255, 191, 231, + 223, 223, 255, 123, 95, 252, 159, 255, 150, 254, 247, 10, 132, 234, 150, 170, + 150, 247, 247, 94, 238, 251, 231, 255, 2, 0, +}; + +/* Print: 2234 bytes. */ + RE_UINT32 re_get_print(RE_UINT32 ch) { - return (re_get_graph(ch) || re_get_blank(ch)) && - re_get_general_category(ch) != RE_PROP_CC; + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_print_stage_1[f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_print_stage_2[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_print_stage_3[pos + f] << 2; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_print_stage_4[pos + f] << 4; + pos += code; + value = (re_print_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; } /* Word. */ +static RE_UINT8 re_word_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, + 6, 6, +}; + +static RE_UINT8 re_word_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 18, 19, 13, 20, 13, 13, 13, 13, 13, 13, 21, 13, 13, + 13, 13, 13, 13, 13, 13, 22, 23, 13, 13, 24, 13, 13, 25, 26, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 27, 7, 28, 29, 13, 13, 13, 13, 13, 13, 13, 30, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 31, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_word_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, + 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, + 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 1, 59, + 60, 61, 62, 63, 64, 31, 31, 31, 65, 66, 67, 68, 69, 70, 71, 31, + 72, 31, 73, 31, 31, 31, 31, 31, 1, 1, 1, 74, 75, 31, 31, 31, + 1, 1, 1, 1, 76, 31, 31, 31, 1, 1, 77, 78, 31, 31, 31, 79, + 80, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 81, 31, 31, 31, + 31, 82, 83, 31, 84, 85, 86, 87, 88, 31, 31, 31, 31, 31, 89, 31, + 31, 90, 31, 31, 31, 31, 31, 31, 1, 1, 1, 1, 1, 1, 91, 1, + 1, 1, 1, 1, 1, 1, 1, 92, 93, 31, 31, 31, 31, 31, 31, 31, + 1, 1, 93, 31, 31, 31, 31, 31, 31, 94, 31, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_word_stage_4[] = { + 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, + 6, 6, 6, 6, 11, 6, 6, 6, 6, 13, 14, 15, 16, 17, 18, 19, + 20, 6, 6, 21, 6, 6, 22, 23, 24, 6, 25, 6, 6, 26, 6, 27, + 6, 28, 29, 0, 0, 30, 0, 31, 6, 6, 6, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 45, 42, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 44, 55, 56, 57, 58, 55, 59, 60, 61, 62, 63, 64, 65, + 15, 66, 67, 0, 68, 69, 70, 0, 71, 72, 73, 74, 75, 76, 77, 0, + 6, 6, 78, 6, 79, 6, 80, 81, 6, 6, 82, 6, 83, 84, 85, 6, + 86, 6, 59, 0, 87, 6, 6, 88, 15, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 89, 3, 6, 6, 90, 91, 88, 92, 93, 6, 6, 94, 95, + 96, 6, 6, 97, 6, 98, 6, 99, 100, 101, 102, 103, 6, 104, 105, 0, + 29, 6, 100, 106, 105, 107, 0, 0, 6, 6, 108, 109, 6, 6, 6, 92, + 6, 97, 110, 79, 0, 0, 111, 112, 6, 6, 6, 6, 6, 6, 6, 113, + 114, 6, 115, 79, 6, 116, 117, 118, 119, 120, 121, 122, 123, 0, 24, 124, + 125, 126, 127, 6, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 129, 6, 95, 6, 130, 100, 6, 6, 6, 6, 131, + 6, 80, 6, 132, 133, 134, 134, 6, 0, 135, 0, 0, 0, 0, 0, 0, + 136, 137, 15, 6, 138, 15, 6, 81, 139, 140, 6, 6, 141, 66, 0, 24, + 6, 6, 6, 6, 6, 99, 0, 0, 6, 6, 6, 6, 6, 6, 142, 0, + 6, 6, 6, 6, 142, 0, 24, 79, 143, 144, 6, 145, 17, 6, 6, 26, + 146, 147, 6, 6, 148, 149, 0, 146, 6, 150, 6, 92, 6, 6, 151, 152, + 6, 153, 92, 76, 6, 6, 154, 100, 6, 133, 155, 156, 6, 6, 157, 158, + 159, 160, 81, 161, 0, 0, 6, 162, 6, 6, 6, 6, 6, 163, 164, 29, + 6, 6, 6, 153, 6, 6, 165, 0, 166, 167, 168, 6, 6, 26, 169, 6, + 6, 79, 24, 6, 170, 6, 150, 171, 87, 172, 173, 174, 6, 6, 6, 76, + 1, 2, 3, 102, 6, 100, 175, 0, 176, 177, 178, 0, 6, 6, 6, 66, + 0, 0, 6, 88, 0, 0, 0, 179, 0, 0, 0, 0, 76, 6, 124, 180, + 6, 24, 98, 66, 79, 6, 181, 0, 6, 6, 6, 6, 79, 95, 0, 0, + 6, 182, 6, 183, 0, 0, 0, 0, 6, 133, 99, 150, 0, 0, 0, 0, + 184, 185, 99, 133, 100, 0, 0, 0, 99, 165, 0, 0, 6, 186, 0, 0, + 187, 188, 0, 76, 76, 0, 73, 189, 6, 99, 99, 30, 26, 0, 0, 0, + 6, 6, 128, 0, 0, 0, 0, 0, 6, 6, 189, 190, 6, 66, 24, 191, + 6, 192, 24, 193, 6, 6, 194, 0, 195, 97, 0, 0, 0, 24, 6, 196, + 45, 42, 197, 198, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 199, 0, + 0, 0, 0, 0, 6, 200, 180, 0, 6, 6, 201, 0, 6, 97, 95, 0, + 0, 0, 0, 0, 0, 6, 6, 202, 0, 0, 0, 0, 0, 0, 6, 203, + 6, 6, 6, 6, 203, 0, 0, 0, 6, 6, 6, 141, 0, 0, 0, 0, + 6, 141, 0, 0, 0, 0, 0, 0, 6, 203, 100, 95, 0, 0, 24, 103, + 6, 133, 204, 205, 87, 0, 0, 0, 6, 6, 206, 100, 207, 0, 0, 0, + 208, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 209, 210, 0, 0, 0, + 0, 0, 0, 211, 212, 213, 0, 0, 0, 0, 214, 0, 0, 0, 0, 0, + 6, 6, 192, 6, 215, 216, 217, 6, 218, 219, 220, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 221, 222, 81, 192, 192, 130, 130, 223, 223, 224, 6, + 6, 6, 6, 6, 6, 6, 225, 0, 217, 226, 227, 228, 229, 230, 0, 0, + 0, 24, 78, 78, 95, 0, 0, 0, 6, 6, 6, 6, 6, 6, 133, 0, + 6, 88, 6, 6, 6, 6, 6, 6, 79, 0, 0, 0, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 6, 87, +}; + +static RE_UINT8 re_word_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, + 0, 4, 32, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, + 31, 80, 0, 0, 255, 255, 223, 188, 64, 215, 255, 255, 251, 255, 255, 255, + 255, 255, 191, 255, 255, 255, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, + 255, 0, 254, 255, 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, + 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, + 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, + 255, 63, 0, 0, 255, 255, 255, 15, 255, 255, 7, 0, 240, 255, 255, 255, + 207, 255, 254, 255, 239, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, + 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, + 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, + 207, 255, 0, 0, 238, 159, 249, 255, 159, 57, 192, 176, 207, 255, 2, 0, + 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 0, 0, + 239, 223, 253, 255, 255, 253, 255, 227, 223, 61, 96, 3, 238, 223, 253, 255, + 255, 253, 239, 243, 223, 61, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, + 223, 125, 128, 0, 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, + 127, 132, 95, 255, 192, 255, 12, 0, 255, 255, 255, 7, 255, 127, 255, 3, + 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, 255, 243, 1, 0, 0, 3, + 255, 3, 160, 194, 255, 254, 255, 255, 255, 31, 254, 255, 223, 255, 255, 254, + 255, 255, 255, 31, 64, 0, 0, 0, 255, 3, 255, 255, 255, 255, 255, 63, + 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, + 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 0, 0, + 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 255, 1, 255, 223, 31, 0, + 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, + 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, + 255, 255, 255, 127, 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, + 255, 15, 255, 255, 255, 3, 255, 3, 255, 255, 255, 159, 128, 0, 255, 127, + 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, 0, 0, 247, 255, + 255, 255, 127, 3, 255, 255, 63, 240, 255, 255, 63, 63, 63, 63, 255, 170, + 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, 0, 48, 0, 0, + 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, + 255, 255, 1, 0, 132, 252, 47, 62, 80, 189, 255, 243, 224, 67, 0, 0, + 255, 1, 0, 0, 0, 0, 192, 255, 255, 127, 255, 255, 31, 248, 15, 0, + 255, 128, 0, 128, 255, 255, 127, 0, 127, 127, 127, 127, 0, 128, 0, 0, + 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 230, 224, 255, 255, 255, + 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, 255, 31, 255, 255, + 255, 15, 0, 0, 255, 255, 247, 191, 0, 0, 128, 255, 252, 255, 255, 255, + 255, 121, 255, 255, 255, 63, 3, 0, 255, 0, 0, 0, 31, 0, 255, 3, + 255, 255, 255, 8, 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, + 255, 255, 127, 252, 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, + 127, 127, 255, 255, 48, 0, 0, 0, 255, 55, 255, 3, 15, 0, 255, 255, + 127, 248, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, 255, 253, 127, 95, + 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 15, + 255, 63, 24, 0, 0, 224, 0, 0, 0, 0, 223, 255, 252, 252, 252, 28, + 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 0, 0, 0, 32, + 1, 0, 0, 0, 15, 255, 62, 0, 255, 0, 255, 255, 15, 0, 0, 0, + 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, + 255, 255, 15, 135, 127, 0, 0, 0, 192, 255, 0, 128, 255, 1, 255, 3, + 255, 255, 223, 255, 255, 255, 79, 0, 31, 0, 255, 7, 255, 255, 251, 255, + 255, 7, 255, 3, 159, 57, 128, 224, 207, 31, 31, 0, 191, 0, 255, 3, + 255, 255, 63, 255, 17, 0, 255, 3, 255, 3, 0, 128, 255, 255, 255, 1, + 15, 0, 255, 3, 248, 255, 255, 224, 31, 0, 255, 255, 0, 128, 255, 255, + 3, 0, 0, 0, 255, 7, 255, 31, 255, 1, 255, 99, 224, 227, 7, 248, + 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 255, 223, + 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, + 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, + 247, 207, 255, 255, 31, 0, 127, 0, 150, 254, 247, 10, 132, 234, 150, 170, + 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, +}; + +/* Word: 2102 bytes. */ + RE_UINT32 re_get_word(RE_UINT32 ch) { - return re_get_alphabetic(ch) || (RE_WORD_MASK & (1 << - re_get_general_category(ch))) != 0 || re_get_join_control(ch); + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_word_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_word_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_word_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_word_stage_4[pos + f] << 5; + pos += code; + value = (re_word_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; } /* XDigit. */ +static RE_UINT8 re_xdigit_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_xdigit_stage_2[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 4, + 5, 6, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 8, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_xdigit_stage_3[] = { + 0, 1, 1, 1, 1, 1, 2, 3, 1, 4, 4, 4, 4, 4, 5, 6, + 7, 1, 1, 1, 1, 1, 1, 8, 9, 10, 11, 12, 13, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 6, 1, 14, 15, 16, 17, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 18, + 1, 1, 1, 1, 19, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 20, 21, 17, 1, 14, 1, 22, 1, 8, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 16, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 24, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_xdigit_stage_4[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 2, 0, 2, 2, 2, 4, + 2, 5, 2, 5, 2, 6, 2, 6, 3, 2, 2, 2, 2, 4, 6, 2, + 2, 2, 2, 3, 6, 2, 2, 2, 2, 7, 2, 6, 2, 2, 8, 2, + 2, 6, 0, 2, 2, 8, 2, 2, 2, 2, 2, 6, 4, 2, 2, 9, + 2, 6, 2, 2, 2, 2, 2, 0, 10, 11, 2, 2, 2, 2, 3, 2, + 2, 5, 2, 0, 12, 2, 2, 6, 2, 6, 2, 4, 2, 3, 2, 2, + 2, 2, 2, 13, +}; + +static RE_UINT8 re_xdigit_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 3, 0, 0, + 255, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 255, 0, 0, + 0, 0, 255, 3, 0, 0, 0, 0, 192, 255, 0, 0, 0, 0, 0, 0, + 255, 3, 255, 3, 0, 0, 0, 0, 0, 0, 255, 3, 0, 0, 255, 3, + 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 192, 255, 0, 192, 255, 255, 255, 255, 255, 255, +}; + +/* XDigit: 421 bytes. */ + RE_UINT32 re_get_xdigit(RE_UINT32 ch) { - return re_get_general_category(ch) == RE_PROP_ND || re_get_hex_digit(ch); + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_xdigit_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_xdigit_stage_2[pos + f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_xdigit_stage_3[pos + f] << 2; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_xdigit_stage_4[pos + f] << 6; + pos += code; + value = (re_xdigit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; } /* All_Cases. */ @@ -11190,186 +12650,186 @@ static RE_UINT8 re_all_cases_stage_4[] = { /* All_Cases: 1984 bytes. */ static RE_AllCases re_all_cases_table[] = { - { 0, 0, 0}, - { 32, 0, 0}, - { 32, 232, 0}, - { 32, 8415, 0}, - { 32, 300, 0}, - { -32, 0, 0}, - { -32, 199, 0}, - { -32, 8383, 0}, - { -32, 268, 0}, - { 743, 775, 0}, - { 32, 8294, 0}, - { 7615, 0, 0}, - { -32, 8262, 0}, - { 121, 0, 0}, - { 1, 0, 0}, - { -1, 0, 0}, - { -199, 0, 0}, - { -232, 0, 0}, - { -121, 0, 0}, - { -300, -268, 0}, - { 195, 0, 0}, - { 210, 0, 0}, - { 206, 0, 0}, - { 205, 0, 0}, - { 79, 0, 0}, - { 202, 0, 0}, - { 203, 0, 0}, - { 207, 0, 0}, - { 97, 0, 0}, - { 211, 0, 0}, - { 209, 0, 0}, - { 163, 0, 0}, - { 213, 0, 0}, - { 130, 0, 0}, - { 214, 0, 0}, - { 218, 0, 0}, - { 217, 0, 0}, - { 219, 0, 0}, - { 56, 0, 0}, - { 1, 2, 0}, - { -1, 1, 0}, - { -2, -1, 0}, - { -79, 0, 0}, - { -97, 0, 0}, - { -56, 0, 0}, - { -130, 0, 0}, - { 10795, 0, 0}, - { -163, 0, 0}, - { 10792, 0, 0}, - { 10815, 0, 0}, - { -195, 0, 0}, - { 69, 0, 0}, - { 71, 0, 0}, - { 10783, 0, 0}, - { 10780, 0, 0}, - { 10782, 0, 0}, - { -210, 0, 0}, - { -206, 0, 0}, - { -205, 0, 0}, - { -202, 0, 0}, - { -203, 0, 0}, - { -207, 0, 0}, - { 42280, 0, 0}, - { 42308, 0, 0}, - { -209, 0, 0}, - { -211, 0, 0}, - { 10743, 0, 0}, - { 10749, 0, 0}, - { -213, 0, 0}, - { -214, 0, 0}, - { 10727, 0, 0}, - { -218, 0, 0}, - { -69, 0, 0}, - { -217, 0, 0}, - { -71, 0, 0}, - { -219, 0, 0}, - { 84, 116, 7289}, - { 38, 0, 0}, - { 37, 0, 0}, - { 64, 0, 0}, - { 63, 0, 0}, - { 7235, 0, 0}, - { 32, 62, 0}, - { 32, 96, 0}, - { 32, 57, 92}, - { -84, 32, 7205}, - { 32, 86, 0}, - { -743, 32, 0}, - { 32, 54, 0}, - { 32, 80, 0}, - { 31, 32, 0}, - { 32, 47, 0}, - { 32, 7549, 0}, - { -38, 0, 0}, - { -37, 0, 0}, - { 7219, 0, 0}, - { -32, 30, 0}, - { -32, 64, 0}, - { -32, 25, 60}, - { -116, -32, 7173}, - { -32, 54, 0}, - { -775, -32, 0}, - { -32, 22, 0}, - { -32, 48, 0}, - { -31, 1, 0}, - { -32, -1, 0}, - { -32, 15, 0}, - { -32, 7517, 0}, - { -64, 0, 0}, - { -63, 0, 0}, - { 8, 0, 0}, - { -62, -30, 0}, - { -57, -25, 35}, - { -47, -15, 0}, - { -54, -22, 0}, - { -8, 0, 0}, - { -86, -54, 0}, - { -80, -48, 0}, - { 7, 0, 0}, - { -92, -60, -35}, - { -96, -64, 0}, - { -7, 0, 0}, - { 80, 0, 0}, - { -80, 0, 0}, - { 15, 0, 0}, - { -15, 0, 0}, - { 48, 0, 0}, - { -48, 0, 0}, - { 7264, 0, 0}, - { 35332, 0, 0}, - { 3814, 0, 0}, - { 1, 59, 0}, - { -1, 58, 0}, - { -59, -58, 0}, - { -7615, 0, 0}, - { 74, 0, 0}, - { 86, 0, 0}, - { 100, 0, 0}, - { 128, 0, 0}, - { 112, 0, 0}, - { 126, 0, 0}, - { 9, 0, 0}, - { -74, 0, 0}, - { -9, 0, 0}, - { -7289, -7205, -7173}, - { -86, 0, 0}, - { -7235, 0, 0}, - { -100, 0, 0}, - { -7219, 0, 0}, - { -112, 0, 0}, - { -128, 0, 0}, - { -126, 0, 0}, - { -7549, -7517, 0}, - { -8415, -8383, 0}, - { -8294, -8262, 0}, - { 28, 0, 0}, - { -28, 0, 0}, - { 16, 0, 0}, - { -16, 0, 0}, - { 26, 0, 0}, - { -26, 0, 0}, - {-10743, 0, 0}, - { -3814, 0, 0}, - {-10727, 0, 0}, - {-10795, 0, 0}, - {-10792, 0, 0}, - {-10780, 0, 0}, - {-10749, 0, 0}, - {-10783, 0, 0}, - {-10782, 0, 0}, - {-10815, 0, 0}, - { -7264, 0, 0}, - {-35332, 0, 0}, - {-42280, 0, 0}, - {-42308, 0, 0}, - { 40, 0, 0}, - { -40, 0, 0}, + {{ 0, 0, 0}}, + {{ 32, 0, 0}}, + {{ 32, 232, 0}}, + {{ 32, 8415, 0}}, + {{ 32, 300, 0}}, + {{ -32, 0, 0}}, + {{ -32, 199, 0}}, + {{ -32, 8383, 0}}, + {{ -32, 268, 0}}, + {{ 743, 775, 0}}, + {{ 32, 8294, 0}}, + {{ 7615, 0, 0}}, + {{ -32, 8262, 0}}, + {{ 121, 0, 0}}, + {{ 1, 0, 0}}, + {{ -1, 0, 0}}, + {{ -199, 0, 0}}, + {{ -232, 0, 0}}, + {{ -121, 0, 0}}, + {{ -300, -268, 0}}, + {{ 195, 0, 0}}, + {{ 210, 0, 0}}, + {{ 206, 0, 0}}, + {{ 205, 0, 0}}, + {{ 79, 0, 0}}, + {{ 202, 0, 0}}, + {{ 203, 0, 0}}, + {{ 207, 0, 0}}, + {{ 97, 0, 0}}, + {{ 211, 0, 0}}, + {{ 209, 0, 0}}, + {{ 163, 0, 0}}, + {{ 213, 0, 0}}, + {{ 130, 0, 0}}, + {{ 214, 0, 0}}, + {{ 218, 0, 0}}, + {{ 217, 0, 0}}, + {{ 219, 0, 0}}, + {{ 56, 0, 0}}, + {{ 1, 2, 0}}, + {{ -1, 1, 0}}, + {{ -2, -1, 0}}, + {{ -79, 0, 0}}, + {{ -97, 0, 0}}, + {{ -56, 0, 0}}, + {{ -130, 0, 0}}, + {{ 10795, 0, 0}}, + {{ -163, 0, 0}}, + {{ 10792, 0, 0}}, + {{ 10815, 0, 0}}, + {{ -195, 0, 0}}, + {{ 69, 0, 0}}, + {{ 71, 0, 0}}, + {{ 10783, 0, 0}}, + {{ 10780, 0, 0}}, + {{ 10782, 0, 0}}, + {{ -210, 0, 0}}, + {{ -206, 0, 0}}, + {{ -205, 0, 0}}, + {{ -202, 0, 0}}, + {{ -203, 0, 0}}, + {{ -207, 0, 0}}, + {{ 42280, 0, 0}}, + {{ 42308, 0, 0}}, + {{ -209, 0, 0}}, + {{ -211, 0, 0}}, + {{ 10743, 0, 0}}, + {{ 10749, 0, 0}}, + {{ -213, 0, 0}}, + {{ -214, 0, 0}}, + {{ 10727, 0, 0}}, + {{ -218, 0, 0}}, + {{ -69, 0, 0}}, + {{ -217, 0, 0}}, + {{ -71, 0, 0}}, + {{ -219, 0, 0}}, + {{ 84, 116, 7289}}, + {{ 38, 0, 0}}, + {{ 37, 0, 0}}, + {{ 64, 0, 0}}, + {{ 63, 0, 0}}, + {{ 7235, 0, 0}}, + {{ 32, 62, 0}}, + {{ 32, 96, 0}}, + {{ 32, 57, 92}}, + {{ -84, 32, 7205}}, + {{ 32, 86, 0}}, + {{ -743, 32, 0}}, + {{ 32, 54, 0}}, + {{ 32, 80, 0}}, + {{ 31, 32, 0}}, + {{ 32, 47, 0}}, + {{ 32, 7549, 0}}, + {{ -38, 0, 0}}, + {{ -37, 0, 0}}, + {{ 7219, 0, 0}}, + {{ -32, 30, 0}}, + {{ -32, 64, 0}}, + {{ -32, 25, 60}}, + {{ -116, -32, 7173}}, + {{ -32, 54, 0}}, + {{ -775, -32, 0}}, + {{ -32, 22, 0}}, + {{ -32, 48, 0}}, + {{ -31, 1, 0}}, + {{ -32, -1, 0}}, + {{ -32, 15, 0}}, + {{ -32, 7517, 0}}, + {{ -64, 0, 0}}, + {{ -63, 0, 0}}, + {{ 8, 0, 0}}, + {{ -62, -30, 0}}, + {{ -57, -25, 35}}, + {{ -47, -15, 0}}, + {{ -54, -22, 0}}, + {{ -8, 0, 0}}, + {{ -86, -54, 0}}, + {{ -80, -48, 0}}, + {{ 7, 0, 0}}, + {{ -92, -60, -35}}, + {{ -96, -64, 0}}, + {{ -7, 0, 0}}, + {{ 80, 0, 0}}, + {{ -80, 0, 0}}, + {{ 15, 0, 0}}, + {{ -15, 0, 0}}, + {{ 48, 0, 0}}, + {{ -48, 0, 0}}, + {{ 7264, 0, 0}}, + {{ 35332, 0, 0}}, + {{ 3814, 0, 0}}, + {{ 1, 59, 0}}, + {{ -1, 58, 0}}, + {{ -59, -58, 0}}, + {{ -7615, 0, 0}}, + {{ 74, 0, 0}}, + {{ 86, 0, 0}}, + {{ 100, 0, 0}}, + {{ 128, 0, 0}}, + {{ 112, 0, 0}}, + {{ 126, 0, 0}}, + {{ 9, 0, 0}}, + {{ -74, 0, 0}}, + {{ -9, 0, 0}}, + {{ -7289, -7205, -7173}}, + {{ -86, 0, 0}}, + {{ -7235, 0, 0}}, + {{ -100, 0, 0}}, + {{ -7219, 0, 0}}, + {{ -112, 0, 0}}, + {{ -128, 0, 0}}, + {{ -126, 0, 0}}, + {{ -7549, -7517, 0}}, + {{ -8415, -8383, 0}}, + {{ -8294, -8262, 0}}, + {{ 28, 0, 0}}, + {{ -28, 0, 0}}, + {{ 16, 0, 0}}, + {{ -16, 0, 0}}, + {{ 26, 0, 0}}, + {{ -26, 0, 0}}, + {{-10743, 0, 0}}, + {{ -3814, 0, 0}}, + {{-10727, 0, 0}}, + {{-10795, 0, 0}}, + {{-10792, 0, 0}}, + {{-10780, 0, 0}}, + {{-10749, 0, 0}}, + {{-10783, 0, 0}}, + {{-10782, 0, 0}}, + {{-10815, 0, 0}}, + {{ -7264, 0, 0}}, + {{-35332, 0, 0}}, + {{-42280, 0, 0}}, + {{-42308, 0, 0}}, + {{ 40, 0, 0}}, + {{ -40, 0, 0}}, }; -/* All_Cases: 1062 bytes. */ +/* All_Cases: 2124 bytes. */ int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints) { RE_UINT32 code; @@ -11396,7 +12856,8 @@ int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints) { count = 1; while (count < RE_MAX_CASES && all_cases->diffs[count - 1] != 0) { - codepoints[count] = ch + all_cases->diffs[count - 1]; + codepoints[count] = (RE_UINT32)((RE_INT32)ch + all_cases->diffs[count - + 1]); ++count; } @@ -11614,7 +13075,7 @@ RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch) { diff = re_simple_case_folding_table[value]; - return ch + diff; + return (RE_UINT32)((RE_INT32)ch + diff); } /* Full_Case_Folding. */ @@ -11738,144 +13199,144 @@ static RE_UINT8 re_full_case_folding_stage_4[] = { /* Full_Case_Folding: 1656 bytes. */ static RE_FullCaseFolding re_full_case_folding_table[] = { - { 0, 0, 0}, - { 32, 0, 0}, - { 775, 0, 0}, - { -108, 115, 0}, - { 1, 0, 0}, - { -199, 775, 0}, - { 371, 110, 0}, - { -121, 0, 0}, - { -268, 0, 0}, - { 210, 0, 0}, - { 206, 0, 0}, - { 205, 0, 0}, - { 79, 0, 0}, - { 202, 0, 0}, - { 203, 0, 0}, - { 207, 0, 0}, - { 211, 0, 0}, - { 209, 0, 0}, - { 213, 0, 0}, - { 214, 0, 0}, - { 218, 0, 0}, - { 217, 0, 0}, - { 219, 0, 0}, - { 2, 0, 0}, - { -390, 780, 0}, - { -97, 0, 0}, - { -56, 0, 0}, - { -130, 0, 0}, - { 10795, 0, 0}, - { -163, 0, 0}, - { 10792, 0, 0}, - { -195, 0, 0}, - { 69, 0, 0}, - { 71, 0, 0}, - { 116, 0, 0}, - { 38, 0, 0}, - { 37, 0, 0}, - { 64, 0, 0}, - { 63, 0, 0}, - { 41, 776, 769}, - { 21, 776, 769}, - { 8, 0, 0}, - { -30, 0, 0}, - { -25, 0, 0}, - { -15, 0, 0}, - { -22, 0, 0}, - { -54, 0, 0}, - { -48, 0, 0}, - { -60, 0, 0}, - { -64, 0, 0}, - { -7, 0, 0}, - { 80, 0, 0}, - { 15, 0, 0}, - { 48, 0, 0}, - { -34, 1410, 0}, - { 7264, 0, 0}, - { -7726, 817, 0}, - { -7715, 776, 0}, - { -7713, 778, 0}, - { -7712, 778, 0}, - { -7737, 702, 0}, - { -58, 0, 0}, - { -7723, 115, 0}, - { -8, 0, 0}, - { -7051, 787, 0}, - { -7053, 787, 768}, - { -7055, 787, 769}, - { -7057, 787, 834}, - { -128, 953, 0}, - { -136, 953, 0}, - { -112, 953, 0}, - { -120, 953, 0}, - { -64, 953, 0}, - { -72, 953, 0}, - { -66, 953, 0}, - { -7170, 953, 0}, - { -7176, 953, 0}, - { -7173, 834, 0}, - { -7174, 834, 953}, - { -74, 0, 0}, - { -7179, 953, 0}, - { -7173, 0, 0}, - { -78, 953, 0}, - { -7180, 953, 0}, - { -7190, 953, 0}, - { -7183, 834, 0}, - { -7184, 834, 953}, - { -86, 0, 0}, - { -7189, 953, 0}, - { -7193, 776, 768}, - { -7194, 776, 769}, - { -7197, 834, 0}, - { -7198, 776, 834}, - { -100, 0, 0}, - { -7197, 776, 768}, - { -7198, 776, 769}, - { -7203, 787, 0}, - { -7201, 834, 0}, - { -7202, 776, 834}, - { -112, 0, 0}, - { -118, 953, 0}, - { -7210, 953, 0}, - { -7206, 953, 0}, - { -7213, 834, 0}, - { -7214, 834, 953}, - { -128, 0, 0}, - { -126, 0, 0}, - { -7219, 953, 0}, - { -7517, 0, 0}, - { -8383, 0, 0}, - { -8262, 0, 0}, - { 28, 0, 0}, - { 16, 0, 0}, - { 26, 0, 0}, - {-10743, 0, 0}, - { -3814, 0, 0}, - {-10727, 0, 0}, - {-10780, 0, 0}, - {-10749, 0, 0}, - {-10783, 0, 0}, - {-10782, 0, 0}, - {-10815, 0, 0}, - {-35332, 0, 0}, - {-42280, 0, 0}, - {-42308, 0, 0}, - {-64154, 102, 0}, - {-64155, 105, 0}, - {-64156, 108, 0}, - {-64157, 102, 105}, - {-64158, 102, 108}, - {-64146, 116, 0}, - {-64147, 116, 0}, - {-62879, 1398, 0}, - {-62880, 1381, 0}, - {-62881, 1387, 0}, - {-62872, 1398, 0}, - {-62883, 1389, 0}, - { 40, 0, 0}, + { 0, { 0, 0}}, + { 32, { 0, 0}}, + { 775, { 0, 0}}, + { -108, { 115, 0}}, + { 1, { 0, 0}}, + { -199, { 775, 0}}, + { 371, { 110, 0}}, + { -121, { 0, 0}}, + { -268, { 0, 0}}, + { 210, { 0, 0}}, + { 206, { 0, 0}}, + { 205, { 0, 0}}, + { 79, { 0, 0}}, + { 202, { 0, 0}}, + { 203, { 0, 0}}, + { 207, { 0, 0}}, + { 211, { 0, 0}}, + { 209, { 0, 0}}, + { 213, { 0, 0}}, + { 214, { 0, 0}}, + { 218, { 0, 0}}, + { 217, { 0, 0}}, + { 219, { 0, 0}}, + { 2, { 0, 0}}, + { -390, { 780, 0}}, + { -97, { 0, 0}}, + { -56, { 0, 0}}, + { -130, { 0, 0}}, + { 10795, { 0, 0}}, + { -163, { 0, 0}}, + { 10792, { 0, 0}}, + { -195, { 0, 0}}, + { 69, { 0, 0}}, + { 71, { 0, 0}}, + { 116, { 0, 0}}, + { 38, { 0, 0}}, + { 37, { 0, 0}}, + { 64, { 0, 0}}, + { 63, { 0, 0}}, + { 41, { 776, 769}}, + { 21, { 776, 769}}, + { 8, { 0, 0}}, + { -30, { 0, 0}}, + { -25, { 0, 0}}, + { -15, { 0, 0}}, + { -22, { 0, 0}}, + { -54, { 0, 0}}, + { -48, { 0, 0}}, + { -60, { 0, 0}}, + { -64, { 0, 0}}, + { -7, { 0, 0}}, + { 80, { 0, 0}}, + { 15, { 0, 0}}, + { 48, { 0, 0}}, + { -34, {1410, 0}}, + { 7264, { 0, 0}}, + { -7726, { 817, 0}}, + { -7715, { 776, 0}}, + { -7713, { 778, 0}}, + { -7712, { 778, 0}}, + { -7737, { 702, 0}}, + { -58, { 0, 0}}, + { -7723, { 115, 0}}, + { -8, { 0, 0}}, + { -7051, { 787, 0}}, + { -7053, { 787, 768}}, + { -7055, { 787, 769}}, + { -7057, { 787, 834}}, + { -128, { 953, 0}}, + { -136, { 953, 0}}, + { -112, { 953, 0}}, + { -120, { 953, 0}}, + { -64, { 953, 0}}, + { -72, { 953, 0}}, + { -66, { 953, 0}}, + { -7170, { 953, 0}}, + { -7176, { 953, 0}}, + { -7173, { 834, 0}}, + { -7174, { 834, 953}}, + { -74, { 0, 0}}, + { -7179, { 953, 0}}, + { -7173, { 0, 0}}, + { -78, { 953, 0}}, + { -7180, { 953, 0}}, + { -7190, { 953, 0}}, + { -7183, { 834, 0}}, + { -7184, { 834, 953}}, + { -86, { 0, 0}}, + { -7189, { 953, 0}}, + { -7193, { 776, 768}}, + { -7194, { 776, 769}}, + { -7197, { 834, 0}}, + { -7198, { 776, 834}}, + { -100, { 0, 0}}, + { -7197, { 776, 768}}, + { -7198, { 776, 769}}, + { -7203, { 787, 0}}, + { -7201, { 834, 0}}, + { -7202, { 776, 834}}, + { -112, { 0, 0}}, + { -118, { 953, 0}}, + { -7210, { 953, 0}}, + { -7206, { 953, 0}}, + { -7213, { 834, 0}}, + { -7214, { 834, 953}}, + { -128, { 0, 0}}, + { -126, { 0, 0}}, + { -7219, { 953, 0}}, + { -7517, { 0, 0}}, + { -8383, { 0, 0}}, + { -8262, { 0, 0}}, + { 28, { 0, 0}}, + { 16, { 0, 0}}, + { 26, { 0, 0}}, + {-10743, { 0, 0}}, + { -3814, { 0, 0}}, + {-10727, { 0, 0}}, + {-10780, { 0, 0}}, + {-10749, { 0, 0}}, + {-10783, { 0, 0}}, + {-10782, { 0, 0}}, + {-10815, { 0, 0}}, + {-35332, { 0, 0}}, + {-42280, { 0, 0}}, + {-42308, { 0, 0}}, + {-64154, { 102, 0}}, + {-64155, { 105, 0}}, + {-64156, { 108, 0}}, + {-64157, { 102, 105}}, + {-64158, { 102, 108}}, + {-64146, { 116, 0}}, + {-64147, { 116, 0}}, + {-62879, {1398, 0}}, + {-62880, {1381, 0}}, + {-62881, {1387, 0}}, + {-62872, {1398, 0}}, + {-62883, {1389, 0}}, + { 40, { 0, 0}}, }; /* Full_Case_Folding: 1104 bytes. */ @@ -11901,7 +13362,7 @@ int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints) { case_folding = &re_full_case_folding_table[value]; - codepoints[0] = ch + case_folding->diff; + codepoints[0] = (RE_UINT32)((RE_INT32)ch + case_folding->diff); count = 1; while (count < RE_MAX_FOLDED && case_folding->codepoints[count - 1] != 0) { @@ -11987,8 +13448,6 @@ RE_GetPropertyFunc re_get_property[] = { re_get_indic_syllabic_category, re_get_alphanumeric, re_get_any, - re_get_ascii, - re_get_assigned, re_get_blank, re_get_graph, re_get_print, diff --git a/src/regex/_regex_unicode.h b/src/regex/_regex_unicode.h index 950d02e161..f0d49b4d39 100644 --- a/src/regex/_regex_unicode.h +++ b/src/regex/_regex_unicode.h @@ -41,6 +41,8 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch); #define RE_PROP_P 34 #define RE_PROP_S 35 #define RE_PROP_Z 36 +#define RE_PROP_ASSIGNED 38 +#define RE_PROP_CASEDLETTER 37 #define RE_PROP_CN 0 #define RE_PROP_LU 1 @@ -84,19 +86,17 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch); #define RE_PROP_ALNUM 0x460001 #define RE_PROP_ALPHA 0x070001 #define RE_PROP_ANY 0x470001 -#define RE_PROP_ASCII 0x480001 -#define RE_PROP_ASSIGNED 0x490001 -#define RE_PROP_BLANK 0x4A0001 +#define RE_PROP_ASCII 0x010001 +#define RE_PROP_BLANK 0x480001 #define RE_PROP_CNTRL 0x00000F #define RE_PROP_DIGIT 0x000009 -#define RE_PROP_GRAPH 0x4B0001 +#define RE_PROP_GRAPH 0x490001 #define RE_PROP_LOWER 0x080001 -#define RE_PROP_PRINT 0x4C0001 -#define RE_PROP_PUNCT 0x000022 +#define RE_PROP_PRINT 0x4A0001 #define RE_PROP_SPACE 0x190001 #define RE_PROP_UPPER 0x090001 -#define RE_PROP_WORD 0x4D0001 -#define RE_PROP_XDIGIT 0x4E0001 +#define RE_PROP_WORD 0x4B0001 +#define RE_PROP_XDIGIT 0x4C0001 #define RE_BREAK_OTHER 0 #define RE_BREAK_DOUBLEQUOTE 1 @@ -130,11 +130,11 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch); #define RE_GBREAK_LVT 11 #define RE_GBREAK_PREPEND 12 -extern char* re_strings[1155]; -extern RE_Property re_properties[145]; -extern RE_PropertyValue re_property_values[1244]; +extern char* re_strings[1257]; +extern RE_Property re_properties[143]; +extern RE_PropertyValue re_property_values[1372]; extern RE_UINT16 re_expand_on_folding[104]; -extern RE_GetPropertyFunc re_get_property[79]; +extern RE_GetPropertyFunc re_get_property[77]; RE_UINT32 re_get_general_category(RE_UINT32 ch); RE_UINT32 re_get_block(RE_UINT32 ch); @@ -208,8 +208,6 @@ RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch); RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch); RE_UINT32 re_get_alphanumeric(RE_UINT32 ch); RE_UINT32 re_get_any(RE_UINT32 ch); -RE_UINT32 re_get_ascii(RE_UINT32 ch); -RE_UINT32 re_get_assigned(RE_UINT32 ch); RE_UINT32 re_get_blank(RE_UINT32 ch); RE_UINT32 re_get_graph(RE_UINT32 ch); RE_UINT32 re_get_print(RE_UINT32 ch);