mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Edit Book: Update regex engine to support Unicode 9.0
This commit is contained in:
parent
a99da9be69
commit
24c7756df1
@ -76,9 +76,21 @@ The special characters are:
|
|||||||
(?<!...) Matches if not preceded by ....
|
(?<!...) Matches if not preceded by ....
|
||||||
(?(id)yes|no) Matches yes pattern if group id matched, the (optional)
|
(?(id)yes|no) Matches yes pattern if group id matched, the (optional)
|
||||||
no pattern otherwise.
|
no pattern otherwise.
|
||||||
|
(?(DEFINE)...) If there's no group called "DEFINE", then ... will be
|
||||||
|
ignored, but any group definitions will be available.
|
||||||
(?|...|...) (?|A|B), creates an RE that will match either A or B,
|
(?|...|...) (?|A|B), creates an RE that will match either A or B,
|
||||||
but reuses capture group numbers across the
|
but reuses capture group numbers across the
|
||||||
alternatives.
|
alternatives.
|
||||||
|
(*FAIL) Forces matching to fail, which means immediate
|
||||||
|
backtracking.
|
||||||
|
(*F) Abbreviation for (*FAIL).
|
||||||
|
(*PRUNE) Discards the current backtracking information. Its
|
||||||
|
effect doesn't extend outside an atomic group or a
|
||||||
|
lookaround.
|
||||||
|
(*SKIP) Similar to (*PRUNE), except that it also sets where in
|
||||||
|
the text the next attempt at matching the entire
|
||||||
|
pattern will start. Its effect doesn't extend outside
|
||||||
|
an atomic group or a lookaround.
|
||||||
|
|
||||||
The fuzzy matching constraints are: "i" to permit insertions, "d" to permit
|
The fuzzy matching constraints are: "i" to permit insertions, "d" to permit
|
||||||
deletions, "s" to permit substitutions, "e" to permit any of these. Limits are
|
deletions, "s" to permit substitutions, "e" to permit any of these. Limits are
|
||||||
@ -124,6 +136,7 @@ second character.
|
|||||||
\g<name> Matches the text matched by the group named name.
|
\g<name> Matches the text matched by the group named name.
|
||||||
\G Matches the empty string, but only at the position where
|
\G Matches the empty string, but only at the position where
|
||||||
the search started.
|
the search started.
|
||||||
|
\K Keeps only what follows for the entire match.
|
||||||
\L<name> Named list. The list is provided as a keyword argument.
|
\L<name> Named list. The list is provided as a keyword argument.
|
||||||
\m Matches the empty string, but only at the start of a word.
|
\m Matches the empty string, but only at the start of a word.
|
||||||
\M Matches the empty string, but only at the end of a word.
|
\M Matches the empty string, but only at the end of a word.
|
||||||
@ -188,6 +201,8 @@ these flags can also be set within an RE:
|
|||||||
when matching a bytestring.
|
when matching a bytestring.
|
||||||
B b BESTMATCH Find the best fuzzy match (default is first).
|
B b BESTMATCH Find the best fuzzy match (default is first).
|
||||||
D DEBUG Print the parsed pattern.
|
D DEBUG Print the parsed pattern.
|
||||||
|
E e ENHANCEMATCH Attempt to improve the fit after finding the first
|
||||||
|
fuzzy match.
|
||||||
F f FULLCASE Use full case-folding when performing
|
F f FULLCASE Use full case-folding when performing
|
||||||
case-insensitive matching in Unicode.
|
case-insensitive matching in Unicode.
|
||||||
I i IGNORECASE Perform case-insensitive matching.
|
I i IGNORECASE Perform case-insensitive matching.
|
||||||
@ -196,8 +211,7 @@ these flags can also be set within an RE:
|
|||||||
M m MULTILINE "^" matches the beginning of lines (after a newline)
|
M m MULTILINE "^" matches the beginning of lines (after a newline)
|
||||||
as well as the string. "$" matches the end of lines
|
as well as the string. "$" matches the end of lines
|
||||||
(before a newline) as well as the end of the string.
|
(before a newline) as well as the end of the string.
|
||||||
E e ENHANCEMATCH Attempt to improve the fit after finding the first
|
P p POSIX Perform POSIX-standard matching (leftmost longest).
|
||||||
fuzzy match.
|
|
||||||
R r REVERSE Searches backwards.
|
R r REVERSE Searches backwards.
|
||||||
S s DOTALL "." matches any character at all, including the
|
S s DOTALL "." matches any character at all, including the
|
||||||
newline.
|
newline.
|
||||||
@ -221,11 +235,11 @@ __all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match",
|
|||||||
"purge", "search", "split", "splititer", "sub", "subf", "subfn", "subn",
|
"purge", "search", "split", "splititer", "sub", "subf", "subfn", "subn",
|
||||||
"template", "Scanner", "A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E",
|
"template", "Scanner", "A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E",
|
||||||
"ENHANCEMATCH", "S", "DOTALL", "F", "FULLCASE", "I", "IGNORECASE", "L",
|
"ENHANCEMATCH", "S", "DOTALL", "F", "FULLCASE", "I", "IGNORECASE", "L",
|
||||||
"LOCALE", "M", "MULTILINE", "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE",
|
"LOCALE", "M", "MULTILINE", "P", "POSIX", "R", "REVERSE", "T", "TEMPLATE",
|
||||||
"V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
|
"U", "UNICODE", "V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W",
|
||||||
"Regex"]
|
"WORD", "error", "Regex"]
|
||||||
|
|
||||||
__version__ = "2.4.66"
|
__version__ = "2.4.105"
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# Public interface.
|
# Public interface.
|
||||||
@ -341,50 +355,27 @@ def template(pattern, flags=0):
|
|||||||
|
|
||||||
def escape(pattern, special_only=False):
|
def escape(pattern, special_only=False):
|
||||||
"Escape all non-alphanumeric characters or special characters in pattern."
|
"Escape all non-alphanumeric characters or special characters in pattern."
|
||||||
if isinstance(pattern, unicode):
|
s = []
|
||||||
s = []
|
if special_only:
|
||||||
if special_only:
|
for c in pattern:
|
||||||
for c in pattern:
|
if c in _METACHARS:
|
||||||
if c in _METACHARS:
|
s.append("\\")
|
||||||
s.append(u"\\")
|
s.append(c)
|
||||||
s.append(c)
|
elif c == "\x00":
|
||||||
elif c == u"\x00":
|
s.append("\\000")
|
||||||
s.append(u"\\000")
|
else:
|
||||||
else:
|
s.append(c)
|
||||||
s.append(c)
|
|
||||||
else:
|
|
||||||
for c in pattern:
|
|
||||||
if c in _ALNUM:
|
|
||||||
s.append(c)
|
|
||||||
elif c == u"\x00":
|
|
||||||
s.append(u"\\000")
|
|
||||||
else:
|
|
||||||
s.append(u"\\")
|
|
||||||
s.append(c)
|
|
||||||
|
|
||||||
return u"".join(s)
|
|
||||||
else:
|
else:
|
||||||
s = []
|
for c in pattern:
|
||||||
if special_only:
|
if c in _ALNUM:
|
||||||
for c in pattern:
|
s.append(c)
|
||||||
if c in _METACHARS:
|
elif c == "\x00":
|
||||||
s.append("\\")
|
s.append("\\000")
|
||||||
s.append(c)
|
else:
|
||||||
elif c == "\x00":
|
s.append("\\")
|
||||||
s.append("\\000")
|
s.append(c)
|
||||||
else:
|
|
||||||
s.append(c)
|
|
||||||
else:
|
|
||||||
for c in pattern:
|
|
||||||
if c in _ALNUM:
|
|
||||||
s.append(c)
|
|
||||||
elif c == "\x00":
|
|
||||||
s.append("\\000")
|
|
||||||
else:
|
|
||||||
s.append("\\")
|
|
||||||
s.append(c)
|
|
||||||
|
|
||||||
return "".join(s)
|
return pattern[ : 0].join(s)
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# Internals.
|
# Internals.
|
||||||
@ -478,10 +469,10 @@ def _compile(pattern, flags=0, kwargs={}):
|
|||||||
# Set the default version in the core code in case it has been changed.
|
# Set the default version in the core code in case it has been changed.
|
||||||
_regex_core.DEFAULT_VERSION = DEFAULT_VERSION
|
_regex_core.DEFAULT_VERSION = DEFAULT_VERSION
|
||||||
|
|
||||||
caught_exception = None
|
|
||||||
global_flags = flags
|
global_flags = flags
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
caught_exception = None
|
||||||
try:
|
try:
|
||||||
source = _Source(pattern)
|
source = _Source(pattern)
|
||||||
info = _Info(global_flags, source.char_type, kwargs)
|
info = _Info(global_flags, source.char_type, kwargs)
|
||||||
@ -522,15 +513,23 @@ def _compile(pattern, flags=0, kwargs={}):
|
|||||||
# Remember whether this pattern as an inline locale flag.
|
# Remember whether this pattern as an inline locale flag.
|
||||||
_locale_sensitive[locale_key] = info.inline_locale
|
_locale_sensitive[locale_key] = info.inline_locale
|
||||||
|
|
||||||
|
# Fix the group references.
|
||||||
|
caught_exception = None
|
||||||
|
try:
|
||||||
|
parsed.fix_groups(pattern, reverse, False)
|
||||||
|
except error, e:
|
||||||
|
caught_exception = e
|
||||||
|
|
||||||
|
if caught_exception:
|
||||||
|
raise error(caught_exception.msg, caught_exception.pattern,
|
||||||
|
caught_exception.pos)
|
||||||
|
|
||||||
# Should we print the parsed pattern?
|
# Should we print the parsed pattern?
|
||||||
if flags & DEBUG:
|
if flags & DEBUG:
|
||||||
parsed.dump(indent=0, reverse=reverse)
|
parsed.dump(indent=0, reverse=reverse)
|
||||||
|
|
||||||
# Fix the group references.
|
|
||||||
parsed.fix_groups(pattern, reverse, False)
|
|
||||||
|
|
||||||
# Optimise the parsed pattern.
|
# Optimise the parsed pattern.
|
||||||
parsed = parsed.optimise(info)
|
parsed = parsed.optimise(info, reverse)
|
||||||
parsed = parsed.pack_characters(info)
|
parsed = parsed.pack_characters(info)
|
||||||
|
|
||||||
# Get the required string.
|
# Get the required string.
|
||||||
@ -680,10 +679,10 @@ Regex = compile
|
|||||||
# Register myself for pickling.
|
# Register myself for pickling.
|
||||||
import copy_reg as _copy_reg
|
import copy_reg as _copy_reg
|
||||||
|
|
||||||
def _pickle(p):
|
def _pickle(pattern):
|
||||||
return _compile, (p.pattern, p.flags)
|
return _regex.compile, pattern._pickled_data
|
||||||
|
|
||||||
_copy_reg.pickle(_pattern_type, _pickle, _compile)
|
_copy_reg.pickle(_pattern_type, _pickle)
|
||||||
|
|
||||||
if not hasattr(str, "format"):
|
if not hasattr(str, "format"):
|
||||||
# Strings don't have the .format method (below Python 2.6).
|
# Strings don't have the .format method (below Python 2.6).
|
||||||
|
3468
src/regex/_regex.c
3468
src/regex/_regex.c
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,7 @@
|
|||||||
* 2010-01-16 mrab Re-written
|
* 2010-01-16 mrab Re-written
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Supports Unicode version 7.0.0. */
|
/* Supports Unicode version 9.0.0. */
|
||||||
|
|
||||||
#define RE_MAGIC 20100116
|
#define RE_MAGIC 20100116
|
||||||
|
|
||||||
@ -34,84 +34,91 @@
|
|||||||
#define RE_OP_CHARACTER_IGN 13
|
#define RE_OP_CHARACTER_IGN 13
|
||||||
#define RE_OP_CHARACTER_IGN_REV 14
|
#define RE_OP_CHARACTER_IGN_REV 14
|
||||||
#define RE_OP_CHARACTER_REV 15
|
#define RE_OP_CHARACTER_REV 15
|
||||||
#define RE_OP_DEFAULT_BOUNDARY 16
|
#define RE_OP_CONDITIONAL 16
|
||||||
#define RE_OP_DEFAULT_END_OF_WORD 17
|
#define RE_OP_DEFAULT_BOUNDARY 17
|
||||||
#define RE_OP_DEFAULT_START_OF_WORD 18
|
#define RE_OP_DEFAULT_END_OF_WORD 18
|
||||||
#define RE_OP_END 19
|
#define RE_OP_DEFAULT_START_OF_WORD 19
|
||||||
#define RE_OP_END_OF_LINE 20
|
#define RE_OP_END 20
|
||||||
#define RE_OP_END_OF_LINE_U 21
|
#define RE_OP_END_OF_LINE 21
|
||||||
#define RE_OP_END_OF_STRING 22
|
#define RE_OP_END_OF_LINE_U 22
|
||||||
#define RE_OP_END_OF_STRING_LINE 23
|
#define RE_OP_END_OF_STRING 23
|
||||||
#define RE_OP_END_OF_STRING_LINE_U 24
|
#define RE_OP_END_OF_STRING_LINE 24
|
||||||
#define RE_OP_END_OF_WORD 25
|
#define RE_OP_END_OF_STRING_LINE_U 25
|
||||||
#define RE_OP_FUZZY 26
|
#define RE_OP_END_OF_WORD 26
|
||||||
#define RE_OP_GRAPHEME_BOUNDARY 27
|
#define RE_OP_FUZZY 27
|
||||||
#define RE_OP_GREEDY_REPEAT 28
|
#define RE_OP_GRAPHEME_BOUNDARY 28
|
||||||
#define RE_OP_GROUP 29
|
#define RE_OP_GREEDY_REPEAT 29
|
||||||
#define RE_OP_GROUP_CALL 30
|
#define RE_OP_GROUP 30
|
||||||
#define RE_OP_GROUP_EXISTS 31
|
#define RE_OP_GROUP_CALL 31
|
||||||
#define RE_OP_LAZY_REPEAT 32
|
#define RE_OP_GROUP_EXISTS 32
|
||||||
#define RE_OP_LOOKAROUND 33
|
#define RE_OP_KEEP 33
|
||||||
#define RE_OP_NEXT 34
|
#define RE_OP_LAZY_REPEAT 34
|
||||||
#define RE_OP_PROPERTY 35
|
#define RE_OP_LOOKAROUND 35
|
||||||
#define RE_OP_PROPERTY_IGN 36
|
#define RE_OP_NEXT 36
|
||||||
#define RE_OP_PROPERTY_IGN_REV 37
|
#define RE_OP_PROPERTY 37
|
||||||
#define RE_OP_PROPERTY_REV 38
|
#define RE_OP_PROPERTY_IGN 38
|
||||||
#define RE_OP_RANGE 39
|
#define RE_OP_PROPERTY_IGN_REV 39
|
||||||
#define RE_OP_RANGE_IGN 40
|
#define RE_OP_PROPERTY_REV 40
|
||||||
#define RE_OP_RANGE_IGN_REV 41
|
#define RE_OP_PRUNE 41
|
||||||
#define RE_OP_RANGE_REV 42
|
#define RE_OP_RANGE 42
|
||||||
#define RE_OP_REF_GROUP 43
|
#define RE_OP_RANGE_IGN 43
|
||||||
#define RE_OP_REF_GROUP_FLD 44
|
#define RE_OP_RANGE_IGN_REV 44
|
||||||
#define RE_OP_REF_GROUP_FLD_REV 45
|
#define RE_OP_RANGE_REV 45
|
||||||
#define RE_OP_REF_GROUP_IGN 46
|
#define RE_OP_REF_GROUP 46
|
||||||
#define RE_OP_REF_GROUP_IGN_REV 47
|
#define RE_OP_REF_GROUP_FLD 47
|
||||||
#define RE_OP_REF_GROUP_REV 48
|
#define RE_OP_REF_GROUP_FLD_REV 48
|
||||||
#define RE_OP_SEARCH_ANCHOR 49
|
#define RE_OP_REF_GROUP_IGN 49
|
||||||
#define RE_OP_SET_DIFF 50
|
#define RE_OP_REF_GROUP_IGN_REV 50
|
||||||
#define RE_OP_SET_DIFF_IGN 51
|
#define RE_OP_REF_GROUP_REV 51
|
||||||
#define RE_OP_SET_DIFF_IGN_REV 52
|
#define RE_OP_SEARCH_ANCHOR 52
|
||||||
#define RE_OP_SET_DIFF_REV 53
|
#define RE_OP_SET_DIFF 53
|
||||||
#define RE_OP_SET_INTER 54
|
#define RE_OP_SET_DIFF_IGN 54
|
||||||
#define RE_OP_SET_INTER_IGN 55
|
#define RE_OP_SET_DIFF_IGN_REV 55
|
||||||
#define RE_OP_SET_INTER_IGN_REV 56
|
#define RE_OP_SET_DIFF_REV 56
|
||||||
#define RE_OP_SET_INTER_REV 57
|
#define RE_OP_SET_INTER 57
|
||||||
#define RE_OP_SET_SYM_DIFF 58
|
#define RE_OP_SET_INTER_IGN 58
|
||||||
#define RE_OP_SET_SYM_DIFF_IGN 59
|
#define RE_OP_SET_INTER_IGN_REV 59
|
||||||
#define RE_OP_SET_SYM_DIFF_IGN_REV 60
|
#define RE_OP_SET_INTER_REV 60
|
||||||
#define RE_OP_SET_SYM_DIFF_REV 61
|
#define RE_OP_SET_SYM_DIFF 61
|
||||||
#define RE_OP_SET_UNION 62
|
#define RE_OP_SET_SYM_DIFF_IGN 62
|
||||||
#define RE_OP_SET_UNION_IGN 63
|
#define RE_OP_SET_SYM_DIFF_IGN_REV 63
|
||||||
#define RE_OP_SET_UNION_IGN_REV 64
|
#define RE_OP_SET_SYM_DIFF_REV 64
|
||||||
#define RE_OP_SET_UNION_REV 65
|
#define RE_OP_SET_UNION 65
|
||||||
#define RE_OP_START_OF_LINE 66
|
#define RE_OP_SET_UNION_IGN 66
|
||||||
#define RE_OP_START_OF_LINE_U 67
|
#define RE_OP_SET_UNION_IGN_REV 67
|
||||||
#define RE_OP_START_OF_STRING 68
|
#define RE_OP_SET_UNION_REV 68
|
||||||
#define RE_OP_START_OF_WORD 69
|
#define RE_OP_SKIP 69
|
||||||
#define RE_OP_STRING 70
|
#define RE_OP_START_OF_LINE 70
|
||||||
#define RE_OP_STRING_FLD 71
|
#define RE_OP_START_OF_LINE_U 71
|
||||||
#define RE_OP_STRING_FLD_REV 72
|
#define RE_OP_START_OF_STRING 72
|
||||||
#define RE_OP_STRING_IGN 73
|
#define RE_OP_START_OF_WORD 73
|
||||||
#define RE_OP_STRING_IGN_REV 74
|
#define RE_OP_STRING 74
|
||||||
#define RE_OP_STRING_REV 75
|
#define RE_OP_STRING_FLD 75
|
||||||
#define RE_OP_STRING_SET 76
|
#define RE_OP_STRING_FLD_REV 76
|
||||||
#define RE_OP_STRING_SET_FLD 77
|
#define RE_OP_STRING_IGN 77
|
||||||
#define RE_OP_STRING_SET_FLD_REV 78
|
#define RE_OP_STRING_IGN_REV 78
|
||||||
#define RE_OP_STRING_SET_IGN 79
|
#define RE_OP_STRING_REV 79
|
||||||
#define RE_OP_STRING_SET_IGN_REV 80
|
#define RE_OP_STRING_SET 80
|
||||||
#define RE_OP_STRING_SET_REV 81
|
#define RE_OP_STRING_SET_FLD 81
|
||||||
#define RE_OP_BODY_END 82
|
#define RE_OP_STRING_SET_FLD_REV 82
|
||||||
#define RE_OP_BODY_START 83
|
#define RE_OP_STRING_SET_IGN 83
|
||||||
#define RE_OP_END_FUZZY 84
|
#define RE_OP_STRING_SET_IGN_REV 84
|
||||||
#define RE_OP_END_GREEDY_REPEAT 85
|
#define RE_OP_STRING_SET_REV 85
|
||||||
#define RE_OP_END_GROUP 86
|
#define RE_OP_BODY_END 86
|
||||||
#define RE_OP_END_LAZY_REPEAT 87
|
#define RE_OP_BODY_START 87
|
||||||
#define RE_OP_GREEDY_REPEAT_ONE 88
|
#define RE_OP_END_ATOMIC 88
|
||||||
#define RE_OP_GROUP_RETURN 89
|
#define RE_OP_END_CONDITIONAL 89
|
||||||
#define RE_OP_LAZY_REPEAT_ONE 90
|
#define RE_OP_END_FUZZY 90
|
||||||
#define RE_OP_MATCH_BODY 91
|
#define RE_OP_END_GREEDY_REPEAT 91
|
||||||
#define RE_OP_MATCH_TAIL 92
|
#define RE_OP_END_GROUP 92
|
||||||
#define RE_OP_START_GROUP 93
|
#define RE_OP_END_LAZY_REPEAT 93
|
||||||
|
#define RE_OP_END_LOOKAROUND 94
|
||||||
|
#define RE_OP_GREEDY_REPEAT_ONE 95
|
||||||
|
#define RE_OP_GROUP_RETURN 96
|
||||||
|
#define RE_OP_LAZY_REPEAT_ONE 97
|
||||||
|
#define RE_OP_MATCH_BODY 98
|
||||||
|
#define RE_OP_MATCH_TAIL 99
|
||||||
|
#define RE_OP_START_GROUP 100
|
||||||
|
|
||||||
char* re_op_text[] = {
|
char* re_op_text[] = {
|
||||||
"RE_OP_FAILURE",
|
"RE_OP_FAILURE",
|
||||||
@ -130,6 +137,7 @@ char* re_op_text[] = {
|
|||||||
"RE_OP_CHARACTER_IGN",
|
"RE_OP_CHARACTER_IGN",
|
||||||
"RE_OP_CHARACTER_IGN_REV",
|
"RE_OP_CHARACTER_IGN_REV",
|
||||||
"RE_OP_CHARACTER_REV",
|
"RE_OP_CHARACTER_REV",
|
||||||
|
"RE_OP_CONDITIONAL",
|
||||||
"RE_OP_DEFAULT_BOUNDARY",
|
"RE_OP_DEFAULT_BOUNDARY",
|
||||||
"RE_OP_DEFAULT_END_OF_WORD",
|
"RE_OP_DEFAULT_END_OF_WORD",
|
||||||
"RE_OP_DEFAULT_START_OF_WORD",
|
"RE_OP_DEFAULT_START_OF_WORD",
|
||||||
@ -146,6 +154,7 @@ char* re_op_text[] = {
|
|||||||
"RE_OP_GROUP",
|
"RE_OP_GROUP",
|
||||||
"RE_OP_GROUP_CALL",
|
"RE_OP_GROUP_CALL",
|
||||||
"RE_OP_GROUP_EXISTS",
|
"RE_OP_GROUP_EXISTS",
|
||||||
|
"RE_OP_KEEP",
|
||||||
"RE_OP_LAZY_REPEAT",
|
"RE_OP_LAZY_REPEAT",
|
||||||
"RE_OP_LOOKAROUND",
|
"RE_OP_LOOKAROUND",
|
||||||
"RE_OP_NEXT",
|
"RE_OP_NEXT",
|
||||||
@ -153,6 +162,7 @@ char* re_op_text[] = {
|
|||||||
"RE_OP_PROPERTY_IGN",
|
"RE_OP_PROPERTY_IGN",
|
||||||
"RE_OP_PROPERTY_IGN_REV",
|
"RE_OP_PROPERTY_IGN_REV",
|
||||||
"RE_OP_PROPERTY_REV",
|
"RE_OP_PROPERTY_REV",
|
||||||
|
"RE_OP_PRUNE",
|
||||||
"RE_OP_RANGE",
|
"RE_OP_RANGE",
|
||||||
"RE_OP_RANGE_IGN",
|
"RE_OP_RANGE_IGN",
|
||||||
"RE_OP_RANGE_IGN_REV",
|
"RE_OP_RANGE_IGN_REV",
|
||||||
@ -180,6 +190,7 @@ char* re_op_text[] = {
|
|||||||
"RE_OP_SET_UNION_IGN",
|
"RE_OP_SET_UNION_IGN",
|
||||||
"RE_OP_SET_UNION_IGN_REV",
|
"RE_OP_SET_UNION_IGN_REV",
|
||||||
"RE_OP_SET_UNION_REV",
|
"RE_OP_SET_UNION_REV",
|
||||||
|
"RE_OP_SKIP",
|
||||||
"RE_OP_START_OF_LINE",
|
"RE_OP_START_OF_LINE",
|
||||||
"RE_OP_START_OF_LINE_U",
|
"RE_OP_START_OF_LINE_U",
|
||||||
"RE_OP_START_OF_STRING",
|
"RE_OP_START_OF_STRING",
|
||||||
@ -198,10 +209,13 @@ char* re_op_text[] = {
|
|||||||
"RE_OP_STRING_SET_REV",
|
"RE_OP_STRING_SET_REV",
|
||||||
"RE_OP_BODY_END",
|
"RE_OP_BODY_END",
|
||||||
"RE_OP_BODY_START",
|
"RE_OP_BODY_START",
|
||||||
|
"RE_OP_END_ATOMIC",
|
||||||
|
"RE_OP_END_CONDITIONAL",
|
||||||
"RE_OP_END_FUZZY",
|
"RE_OP_END_FUZZY",
|
||||||
"RE_OP_END_GREEDY_REPEAT",
|
"RE_OP_END_GREEDY_REPEAT",
|
||||||
"RE_OP_END_GROUP",
|
"RE_OP_END_GROUP",
|
||||||
"RE_OP_END_LAZY_REPEAT",
|
"RE_OP_END_LAZY_REPEAT",
|
||||||
|
"RE_OP_END_LOOKAROUND",
|
||||||
"RE_OP_GREEDY_REPEAT_ONE",
|
"RE_OP_GREEDY_REPEAT_ONE",
|
||||||
"RE_OP_GROUP_RETURN",
|
"RE_OP_GROUP_RETURN",
|
||||||
"RE_OP_LAZY_REPEAT_ONE",
|
"RE_OP_LAZY_REPEAT_ONE",
|
||||||
@ -219,6 +233,7 @@ char* re_op_text[] = {
|
|||||||
#define RE_FLAG_IGNORECASE 0x2
|
#define RE_FLAG_IGNORECASE 0x2
|
||||||
#define RE_FLAG_LOCALE 0x4
|
#define RE_FLAG_LOCALE 0x4
|
||||||
#define RE_FLAG_MULTILINE 0x8
|
#define RE_FLAG_MULTILINE 0x8
|
||||||
|
#define RE_FLAG_POSIX 0x10000
|
||||||
#define RE_FLAG_REVERSE 0x400
|
#define RE_FLAG_REVERSE 0x400
|
||||||
#define RE_FLAG_TEMPLATE 0x1
|
#define RE_FLAG_TEMPLATE 0x1
|
||||||
#define RE_FLAG_UNICODE 0x20
|
#define RE_FLAG_UNICODE 0x20
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -24,7 +24,7 @@ typedef struct RE_Property {
|
|||||||
typedef struct RE_PropertyValue {
|
typedef struct RE_PropertyValue {
|
||||||
RE_UINT16 name;
|
RE_UINT16 name;
|
||||||
RE_UINT8 value_set;
|
RE_UINT8 value_set;
|
||||||
RE_UINT8 id;
|
RE_UINT16 id;
|
||||||
} RE_PropertyValue;
|
} RE_PropertyValue;
|
||||||
|
|
||||||
typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
||||||
@ -83,24 +83,24 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
|||||||
#define RE_PROP_S_MASK 0x0F000000
|
#define RE_PROP_S_MASK 0x0F000000
|
||||||
#define RE_PROP_Z_MASK 0x00007000
|
#define RE_PROP_Z_MASK 0x00007000
|
||||||
|
|
||||||
#define RE_PROP_ALNUM 0x460001
|
#define RE_PROP_ALNUM 0x470001
|
||||||
#define RE_PROP_ALPHA 0x070001
|
#define RE_PROP_ALPHA 0x070001
|
||||||
#define RE_PROP_ANY 0x470001
|
#define RE_PROP_ANY 0x480001
|
||||||
#define RE_PROP_ASCII 0x010001
|
#define RE_PROP_ASCII 0x010001
|
||||||
#define RE_PROP_BLANK 0x480001
|
#define RE_PROP_BLANK 0x490001
|
||||||
#define RE_PROP_CNTRL 0x00000F
|
#define RE_PROP_CNTRL 0x00000F
|
||||||
#define RE_PROP_DIGIT 0x000009
|
#define RE_PROP_DIGIT 0x000009
|
||||||
#define RE_PROP_GRAPH 0x490001
|
#define RE_PROP_GRAPH 0x4A0001
|
||||||
#define RE_PROP_LOWER 0x080001
|
#define RE_PROP_LOWER 0x080001
|
||||||
#define RE_PROP_PRINT 0x4A0001
|
#define RE_PROP_PRINT 0x4B0001
|
||||||
#define RE_PROP_SPACE 0x190001
|
#define RE_PROP_SPACE 0x190001
|
||||||
#define RE_PROP_UPPER 0x090001
|
#define RE_PROP_UPPER 0x090001
|
||||||
#define RE_PROP_WORD 0x4B0001
|
#define RE_PROP_WORD 0x4C0001
|
||||||
#define RE_PROP_XDIGIT 0x4C0001
|
#define RE_PROP_XDIGIT 0x4D0001
|
||||||
#define RE_PROP_POSIX_ALNUM 0x4E0001
|
#define RE_PROP_POSIX_ALNUM 0x4F0001
|
||||||
#define RE_PROP_POSIX_DIGIT 0x4D0001
|
#define RE_PROP_POSIX_DIGIT 0x4E0001
|
||||||
#define RE_PROP_POSIX_PUNCT 0x4F0001
|
#define RE_PROP_POSIX_PUNCT 0x500001
|
||||||
#define RE_PROP_POSIX_XDIGIT 0x500001
|
#define RE_PROP_POSIX_XDIGIT 0x510001
|
||||||
|
|
||||||
#define RE_BREAK_OTHER 0
|
#define RE_BREAK_OTHER 0
|
||||||
#define RE_BREAK_DOUBLEQUOTE 1
|
#define RE_BREAK_DOUBLEQUOTE 1
|
||||||
@ -119,26 +119,36 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
|||||||
#define RE_BREAK_MIDNUMLET 14
|
#define RE_BREAK_MIDNUMLET 14
|
||||||
#define RE_BREAK_NUMERIC 15
|
#define RE_BREAK_NUMERIC 15
|
||||||
#define RE_BREAK_EXTENDNUMLET 16
|
#define RE_BREAK_EXTENDNUMLET 16
|
||||||
|
#define RE_BREAK_EBASE 17
|
||||||
|
#define RE_BREAK_EMODIFIER 18
|
||||||
|
#define RE_BREAK_ZWJ 19
|
||||||
|
#define RE_BREAK_GLUEAFTERZWJ 20
|
||||||
|
#define RE_BREAK_EBASEGAZ 21
|
||||||
|
|
||||||
#define RE_GBREAK_OTHER 0
|
#define RE_GBREAK_OTHER 0
|
||||||
#define RE_GBREAK_CR 1
|
#define RE_GBREAK_PREPEND 1
|
||||||
#define RE_GBREAK_LF 2
|
#define RE_GBREAK_CR 2
|
||||||
#define RE_GBREAK_CONTROL 3
|
#define RE_GBREAK_LF 3
|
||||||
#define RE_GBREAK_EXTEND 4
|
#define RE_GBREAK_CONTROL 4
|
||||||
#define RE_GBREAK_REGIONALINDICATOR 5
|
#define RE_GBREAK_EXTEND 5
|
||||||
#define RE_GBREAK_SPACINGMARK 6
|
#define RE_GBREAK_REGIONALINDICATOR 6
|
||||||
#define RE_GBREAK_L 7
|
#define RE_GBREAK_SPACINGMARK 7
|
||||||
#define RE_GBREAK_V 8
|
#define RE_GBREAK_L 8
|
||||||
#define RE_GBREAK_T 9
|
#define RE_GBREAK_V 9
|
||||||
#define RE_GBREAK_LV 10
|
#define RE_GBREAK_T 10
|
||||||
#define RE_GBREAK_LVT 11
|
#define RE_GBREAK_LV 11
|
||||||
#define RE_GBREAK_PREPEND 12
|
#define RE_GBREAK_LVT 12
|
||||||
|
#define RE_GBREAK_EBASE 13
|
||||||
|
#define RE_GBREAK_EMODIFIER 14
|
||||||
|
#define RE_GBREAK_ZWJ 15
|
||||||
|
#define RE_GBREAK_GLUEAFTERZWJ 16
|
||||||
|
#define RE_GBREAK_EBASEGAZ 17
|
||||||
|
|
||||||
extern char* re_strings[1261];
|
extern char* re_strings[1336];
|
||||||
extern RE_Property re_properties[147];
|
extern RE_Property re_properties[150];
|
||||||
extern RE_PropertyValue re_property_values[1372];
|
extern RE_PropertyValue re_property_values[1469];
|
||||||
extern RE_UINT16 re_expand_on_folding[104];
|
extern RE_UINT16 re_expand_on_folding[104];
|
||||||
extern RE_GetPropertyFunc re_get_property[81];
|
extern RE_GetPropertyFunc re_get_property[82];
|
||||||
|
|
||||||
RE_UINT32 re_get_general_category(RE_UINT32 ch);
|
RE_UINT32 re_get_general_category(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_block(RE_UINT32 ch);
|
RE_UINT32 re_get_block(RE_UINT32 ch);
|
||||||
@ -193,10 +203,11 @@ RE_UINT32 re_get_soft_dotted(RE_UINT32 ch);
|
|||||||
RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch);
|
RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_other_id_start(RE_UINT32 ch);
|
RE_UINT32 re_get_other_id_start(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_other_id_continue(RE_UINT32 ch);
|
RE_UINT32 re_get_other_id_continue(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_sterm(RE_UINT32 ch);
|
RE_UINT32 re_get_sentence_terminal(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_variation_selector(RE_UINT32 ch);
|
RE_UINT32 re_get_variation_selector(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch);
|
RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch);
|
RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_prepended_concatenation_mark(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch);
|
RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_bidi_class(RE_UINT32 ch);
|
RE_UINT32 re_get_bidi_class(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch);
|
RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch);
|
||||||
@ -208,7 +219,7 @@ RE_UINT32 re_get_line_break(RE_UINT32 ch);
|
|||||||
RE_UINT32 re_get_numeric_type(RE_UINT32 ch);
|
RE_UINT32 re_get_numeric_type(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_numeric_value(RE_UINT32 ch);
|
RE_UINT32 re_get_numeric_value(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch);
|
RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch);
|
RE_UINT32 re_get_indic_positional_category(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch);
|
RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_alphanumeric(RE_UINT32 ch);
|
RE_UINT32 re_get_alphanumeric(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_any(RE_UINT32 ch);
|
RE_UINT32 re_get_any(RE_UINT32 ch);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user