mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Edit Book: Update regex engine to support Unicode 9.0
This commit is contained in:
parent
a99da9be69
commit
24c7756df1
@ -76,9 +76,21 @@ The special characters are:
|
||||
(?<!...) Matches if not preceded by ....
|
||||
(?(id)yes|no) Matches yes pattern if group id matched, the (optional)
|
||||
no pattern otherwise.
|
||||
(?(DEFINE)...) If there's no group called "DEFINE", then ... will be
|
||||
ignored, but any group definitions will be available.
|
||||
(?|...|...) (?|A|B), creates an RE that will match either A or B,
|
||||
but reuses capture group numbers across the
|
||||
alternatives.
|
||||
(*FAIL) Forces matching to fail, which means immediate
|
||||
backtracking.
|
||||
(*F) Abbreviation for (*FAIL).
|
||||
(*PRUNE) Discards the current backtracking information. Its
|
||||
effect doesn't extend outside an atomic group or a
|
||||
lookaround.
|
||||
(*SKIP) Similar to (*PRUNE), except that it also sets where in
|
||||
the text the next attempt at matching the entire
|
||||
pattern will start. Its effect doesn't extend outside
|
||||
an atomic group or a lookaround.
|
||||
|
||||
The fuzzy matching constraints are: "i" to permit insertions, "d" to permit
|
||||
deletions, "s" to permit substitutions, "e" to permit any of these. Limits are
|
||||
@ -124,6 +136,7 @@ second character.
|
||||
\g<name> Matches the text matched by the group named name.
|
||||
\G Matches the empty string, but only at the position where
|
||||
the search started.
|
||||
\K Keeps only what follows for the entire match.
|
||||
\L<name> Named list. The list is provided as a keyword argument.
|
||||
\m Matches the empty string, but only at the start of a word.
|
||||
\M Matches the empty string, but only at the end of a word.
|
||||
@ -188,6 +201,8 @@ these flags can also be set within an RE:
|
||||
when matching a bytestring.
|
||||
B b BESTMATCH Find the best fuzzy match (default is first).
|
||||
D DEBUG Print the parsed pattern.
|
||||
E e ENHANCEMATCH Attempt to improve the fit after finding the first
|
||||
fuzzy match.
|
||||
F f FULLCASE Use full case-folding when performing
|
||||
case-insensitive matching in Unicode.
|
||||
I i IGNORECASE Perform case-insensitive matching.
|
||||
@ -196,8 +211,7 @@ these flags can also be set within an RE:
|
||||
M m MULTILINE "^" matches the beginning of lines (after a newline)
|
||||
as well as the string. "$" matches the end of lines
|
||||
(before a newline) as well as the end of the string.
|
||||
E e ENHANCEMATCH Attempt to improve the fit after finding the first
|
||||
fuzzy match.
|
||||
P p POSIX Perform POSIX-standard matching (leftmost longest).
|
||||
R r REVERSE Searches backwards.
|
||||
S s DOTALL "." matches any character at all, including the
|
||||
newline.
|
||||
@ -221,11 +235,11 @@ __all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match",
|
||||
"purge", "search", "split", "splititer", "sub", "subf", "subfn", "subn",
|
||||
"template", "Scanner", "A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E",
|
||||
"ENHANCEMATCH", "S", "DOTALL", "F", "FULLCASE", "I", "IGNORECASE", "L",
|
||||
"LOCALE", "M", "MULTILINE", "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE",
|
||||
"V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
|
||||
"Regex"]
|
||||
"LOCALE", "M", "MULTILINE", "P", "POSIX", "R", "REVERSE", "T", "TEMPLATE",
|
||||
"U", "UNICODE", "V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W",
|
||||
"WORD", "error", "Regex"]
|
||||
|
||||
__version__ = "2.4.66"
|
||||
__version__ = "2.4.105"
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Public interface.
|
||||
@ -341,50 +355,27 @@ def template(pattern, flags=0):
|
||||
|
||||
def escape(pattern, special_only=False):
|
||||
"Escape all non-alphanumeric characters or special characters in pattern."
|
||||
if isinstance(pattern, unicode):
|
||||
s = []
|
||||
if special_only:
|
||||
for c in pattern:
|
||||
if c in _METACHARS:
|
||||
s.append(u"\\")
|
||||
s.append(c)
|
||||
elif c == u"\x00":
|
||||
s.append(u"\\000")
|
||||
else:
|
||||
s.append(c)
|
||||
else:
|
||||
for c in pattern:
|
||||
if c in _ALNUM:
|
||||
s.append(c)
|
||||
elif c == u"\x00":
|
||||
s.append(u"\\000")
|
||||
else:
|
||||
s.append(u"\\")
|
||||
s.append(c)
|
||||
|
||||
return u"".join(s)
|
||||
s = []
|
||||
if special_only:
|
||||
for c in pattern:
|
||||
if c in _METACHARS:
|
||||
s.append("\\")
|
||||
s.append(c)
|
||||
elif c == "\x00":
|
||||
s.append("\\000")
|
||||
else:
|
||||
s.append(c)
|
||||
else:
|
||||
s = []
|
||||
if special_only:
|
||||
for c in pattern:
|
||||
if c in _METACHARS:
|
||||
s.append("\\")
|
||||
s.append(c)
|
||||
elif c == "\x00":
|
||||
s.append("\\000")
|
||||
else:
|
||||
s.append(c)
|
||||
else:
|
||||
for c in pattern:
|
||||
if c in _ALNUM:
|
||||
s.append(c)
|
||||
elif c == "\x00":
|
||||
s.append("\\000")
|
||||
else:
|
||||
s.append("\\")
|
||||
s.append(c)
|
||||
for c in pattern:
|
||||
if c in _ALNUM:
|
||||
s.append(c)
|
||||
elif c == "\x00":
|
||||
s.append("\\000")
|
||||
else:
|
||||
s.append("\\")
|
||||
s.append(c)
|
||||
|
||||
return "".join(s)
|
||||
return pattern[ : 0].join(s)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Internals.
|
||||
@ -478,10 +469,10 @@ def _compile(pattern, flags=0, kwargs={}):
|
||||
# Set the default version in the core code in case it has been changed.
|
||||
_regex_core.DEFAULT_VERSION = DEFAULT_VERSION
|
||||
|
||||
caught_exception = None
|
||||
global_flags = flags
|
||||
|
||||
while True:
|
||||
caught_exception = None
|
||||
try:
|
||||
source = _Source(pattern)
|
||||
info = _Info(global_flags, source.char_type, kwargs)
|
||||
@ -522,15 +513,23 @@ def _compile(pattern, flags=0, kwargs={}):
|
||||
# Remember whether this pattern as an inline locale flag.
|
||||
_locale_sensitive[locale_key] = info.inline_locale
|
||||
|
||||
# Fix the group references.
|
||||
caught_exception = None
|
||||
try:
|
||||
parsed.fix_groups(pattern, reverse, False)
|
||||
except error, e:
|
||||
caught_exception = e
|
||||
|
||||
if caught_exception:
|
||||
raise error(caught_exception.msg, caught_exception.pattern,
|
||||
caught_exception.pos)
|
||||
|
||||
# Should we print the parsed pattern?
|
||||
if flags & DEBUG:
|
||||
parsed.dump(indent=0, reverse=reverse)
|
||||
|
||||
# Fix the group references.
|
||||
parsed.fix_groups(pattern, reverse, False)
|
||||
|
||||
# Optimise the parsed pattern.
|
||||
parsed = parsed.optimise(info)
|
||||
parsed = parsed.optimise(info, reverse)
|
||||
parsed = parsed.pack_characters(info)
|
||||
|
||||
# Get the required string.
|
||||
@ -680,10 +679,10 @@ Regex = compile
|
||||
# Register myself for pickling.
|
||||
import copy_reg as _copy_reg
|
||||
|
||||
def _pickle(p):
|
||||
return _compile, (p.pattern, p.flags)
|
||||
def _pickle(pattern):
|
||||
return _regex.compile, pattern._pickled_data
|
||||
|
||||
_copy_reg.pickle(_pattern_type, _pickle, _compile)
|
||||
_copy_reg.pickle(_pattern_type, _pickle)
|
||||
|
||||
if not hasattr(str, "format"):
|
||||
# Strings don't have the .format method (below Python 2.6).
|
||||
|
3468
src/regex/_regex.c
3468
src/regex/_regex.c
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,7 @@
|
||||
* 2010-01-16 mrab Re-written
|
||||
*/
|
||||
|
||||
/* Supports Unicode version 7.0.0. */
|
||||
/* Supports Unicode version 9.0.0. */
|
||||
|
||||
#define RE_MAGIC 20100116
|
||||
|
||||
@ -34,84 +34,91 @@
|
||||
#define RE_OP_CHARACTER_IGN 13
|
||||
#define RE_OP_CHARACTER_IGN_REV 14
|
||||
#define RE_OP_CHARACTER_REV 15
|
||||
#define RE_OP_DEFAULT_BOUNDARY 16
|
||||
#define RE_OP_DEFAULT_END_OF_WORD 17
|
||||
#define RE_OP_DEFAULT_START_OF_WORD 18
|
||||
#define RE_OP_END 19
|
||||
#define RE_OP_END_OF_LINE 20
|
||||
#define RE_OP_END_OF_LINE_U 21
|
||||
#define RE_OP_END_OF_STRING 22
|
||||
#define RE_OP_END_OF_STRING_LINE 23
|
||||
#define RE_OP_END_OF_STRING_LINE_U 24
|
||||
#define RE_OP_END_OF_WORD 25
|
||||
#define RE_OP_FUZZY 26
|
||||
#define RE_OP_GRAPHEME_BOUNDARY 27
|
||||
#define RE_OP_GREEDY_REPEAT 28
|
||||
#define RE_OP_GROUP 29
|
||||
#define RE_OP_GROUP_CALL 30
|
||||
#define RE_OP_GROUP_EXISTS 31
|
||||
#define RE_OP_LAZY_REPEAT 32
|
||||
#define RE_OP_LOOKAROUND 33
|
||||
#define RE_OP_NEXT 34
|
||||
#define RE_OP_PROPERTY 35
|
||||
#define RE_OP_PROPERTY_IGN 36
|
||||
#define RE_OP_PROPERTY_IGN_REV 37
|
||||
#define RE_OP_PROPERTY_REV 38
|
||||
#define RE_OP_RANGE 39
|
||||
#define RE_OP_RANGE_IGN 40
|
||||
#define RE_OP_RANGE_IGN_REV 41
|
||||
#define RE_OP_RANGE_REV 42
|
||||
#define RE_OP_REF_GROUP 43
|
||||
#define RE_OP_REF_GROUP_FLD 44
|
||||
#define RE_OP_REF_GROUP_FLD_REV 45
|
||||
#define RE_OP_REF_GROUP_IGN 46
|
||||
#define RE_OP_REF_GROUP_IGN_REV 47
|
||||
#define RE_OP_REF_GROUP_REV 48
|
||||
#define RE_OP_SEARCH_ANCHOR 49
|
||||
#define RE_OP_SET_DIFF 50
|
||||
#define RE_OP_SET_DIFF_IGN 51
|
||||
#define RE_OP_SET_DIFF_IGN_REV 52
|
||||
#define RE_OP_SET_DIFF_REV 53
|
||||
#define RE_OP_SET_INTER 54
|
||||
#define RE_OP_SET_INTER_IGN 55
|
||||
#define RE_OP_SET_INTER_IGN_REV 56
|
||||
#define RE_OP_SET_INTER_REV 57
|
||||
#define RE_OP_SET_SYM_DIFF 58
|
||||
#define RE_OP_SET_SYM_DIFF_IGN 59
|
||||
#define RE_OP_SET_SYM_DIFF_IGN_REV 60
|
||||
#define RE_OP_SET_SYM_DIFF_REV 61
|
||||
#define RE_OP_SET_UNION 62
|
||||
#define RE_OP_SET_UNION_IGN 63
|
||||
#define RE_OP_SET_UNION_IGN_REV 64
|
||||
#define RE_OP_SET_UNION_REV 65
|
||||
#define RE_OP_START_OF_LINE 66
|
||||
#define RE_OP_START_OF_LINE_U 67
|
||||
#define RE_OP_START_OF_STRING 68
|
||||
#define RE_OP_START_OF_WORD 69
|
||||
#define RE_OP_STRING 70
|
||||
#define RE_OP_STRING_FLD 71
|
||||
#define RE_OP_STRING_FLD_REV 72
|
||||
#define RE_OP_STRING_IGN 73
|
||||
#define RE_OP_STRING_IGN_REV 74
|
||||
#define RE_OP_STRING_REV 75
|
||||
#define RE_OP_STRING_SET 76
|
||||
#define RE_OP_STRING_SET_FLD 77
|
||||
#define RE_OP_STRING_SET_FLD_REV 78
|
||||
#define RE_OP_STRING_SET_IGN 79
|
||||
#define RE_OP_STRING_SET_IGN_REV 80
|
||||
#define RE_OP_STRING_SET_REV 81
|
||||
#define RE_OP_BODY_END 82
|
||||
#define RE_OP_BODY_START 83
|
||||
#define RE_OP_END_FUZZY 84
|
||||
#define RE_OP_END_GREEDY_REPEAT 85
|
||||
#define RE_OP_END_GROUP 86
|
||||
#define RE_OP_END_LAZY_REPEAT 87
|
||||
#define RE_OP_GREEDY_REPEAT_ONE 88
|
||||
#define RE_OP_GROUP_RETURN 89
|
||||
#define RE_OP_LAZY_REPEAT_ONE 90
|
||||
#define RE_OP_MATCH_BODY 91
|
||||
#define RE_OP_MATCH_TAIL 92
|
||||
#define RE_OP_START_GROUP 93
|
||||
#define RE_OP_CONDITIONAL 16
|
||||
#define RE_OP_DEFAULT_BOUNDARY 17
|
||||
#define RE_OP_DEFAULT_END_OF_WORD 18
|
||||
#define RE_OP_DEFAULT_START_OF_WORD 19
|
||||
#define RE_OP_END 20
|
||||
#define RE_OP_END_OF_LINE 21
|
||||
#define RE_OP_END_OF_LINE_U 22
|
||||
#define RE_OP_END_OF_STRING 23
|
||||
#define RE_OP_END_OF_STRING_LINE 24
|
||||
#define RE_OP_END_OF_STRING_LINE_U 25
|
||||
#define RE_OP_END_OF_WORD 26
|
||||
#define RE_OP_FUZZY 27
|
||||
#define RE_OP_GRAPHEME_BOUNDARY 28
|
||||
#define RE_OP_GREEDY_REPEAT 29
|
||||
#define RE_OP_GROUP 30
|
||||
#define RE_OP_GROUP_CALL 31
|
||||
#define RE_OP_GROUP_EXISTS 32
|
||||
#define RE_OP_KEEP 33
|
||||
#define RE_OP_LAZY_REPEAT 34
|
||||
#define RE_OP_LOOKAROUND 35
|
||||
#define RE_OP_NEXT 36
|
||||
#define RE_OP_PROPERTY 37
|
||||
#define RE_OP_PROPERTY_IGN 38
|
||||
#define RE_OP_PROPERTY_IGN_REV 39
|
||||
#define RE_OP_PROPERTY_REV 40
|
||||
#define RE_OP_PRUNE 41
|
||||
#define RE_OP_RANGE 42
|
||||
#define RE_OP_RANGE_IGN 43
|
||||
#define RE_OP_RANGE_IGN_REV 44
|
||||
#define RE_OP_RANGE_REV 45
|
||||
#define RE_OP_REF_GROUP 46
|
||||
#define RE_OP_REF_GROUP_FLD 47
|
||||
#define RE_OP_REF_GROUP_FLD_REV 48
|
||||
#define RE_OP_REF_GROUP_IGN 49
|
||||
#define RE_OP_REF_GROUP_IGN_REV 50
|
||||
#define RE_OP_REF_GROUP_REV 51
|
||||
#define RE_OP_SEARCH_ANCHOR 52
|
||||
#define RE_OP_SET_DIFF 53
|
||||
#define RE_OP_SET_DIFF_IGN 54
|
||||
#define RE_OP_SET_DIFF_IGN_REV 55
|
||||
#define RE_OP_SET_DIFF_REV 56
|
||||
#define RE_OP_SET_INTER 57
|
||||
#define RE_OP_SET_INTER_IGN 58
|
||||
#define RE_OP_SET_INTER_IGN_REV 59
|
||||
#define RE_OP_SET_INTER_REV 60
|
||||
#define RE_OP_SET_SYM_DIFF 61
|
||||
#define RE_OP_SET_SYM_DIFF_IGN 62
|
||||
#define RE_OP_SET_SYM_DIFF_IGN_REV 63
|
||||
#define RE_OP_SET_SYM_DIFF_REV 64
|
||||
#define RE_OP_SET_UNION 65
|
||||
#define RE_OP_SET_UNION_IGN 66
|
||||
#define RE_OP_SET_UNION_IGN_REV 67
|
||||
#define RE_OP_SET_UNION_REV 68
|
||||
#define RE_OP_SKIP 69
|
||||
#define RE_OP_START_OF_LINE 70
|
||||
#define RE_OP_START_OF_LINE_U 71
|
||||
#define RE_OP_START_OF_STRING 72
|
||||
#define RE_OP_START_OF_WORD 73
|
||||
#define RE_OP_STRING 74
|
||||
#define RE_OP_STRING_FLD 75
|
||||
#define RE_OP_STRING_FLD_REV 76
|
||||
#define RE_OP_STRING_IGN 77
|
||||
#define RE_OP_STRING_IGN_REV 78
|
||||
#define RE_OP_STRING_REV 79
|
||||
#define RE_OP_STRING_SET 80
|
||||
#define RE_OP_STRING_SET_FLD 81
|
||||
#define RE_OP_STRING_SET_FLD_REV 82
|
||||
#define RE_OP_STRING_SET_IGN 83
|
||||
#define RE_OP_STRING_SET_IGN_REV 84
|
||||
#define RE_OP_STRING_SET_REV 85
|
||||
#define RE_OP_BODY_END 86
|
||||
#define RE_OP_BODY_START 87
|
||||
#define RE_OP_END_ATOMIC 88
|
||||
#define RE_OP_END_CONDITIONAL 89
|
||||
#define RE_OP_END_FUZZY 90
|
||||
#define RE_OP_END_GREEDY_REPEAT 91
|
||||
#define RE_OP_END_GROUP 92
|
||||
#define RE_OP_END_LAZY_REPEAT 93
|
||||
#define RE_OP_END_LOOKAROUND 94
|
||||
#define RE_OP_GREEDY_REPEAT_ONE 95
|
||||
#define RE_OP_GROUP_RETURN 96
|
||||
#define RE_OP_LAZY_REPEAT_ONE 97
|
||||
#define RE_OP_MATCH_BODY 98
|
||||
#define RE_OP_MATCH_TAIL 99
|
||||
#define RE_OP_START_GROUP 100
|
||||
|
||||
char* re_op_text[] = {
|
||||
"RE_OP_FAILURE",
|
||||
@ -130,6 +137,7 @@ char* re_op_text[] = {
|
||||
"RE_OP_CHARACTER_IGN",
|
||||
"RE_OP_CHARACTER_IGN_REV",
|
||||
"RE_OP_CHARACTER_REV",
|
||||
"RE_OP_CONDITIONAL",
|
||||
"RE_OP_DEFAULT_BOUNDARY",
|
||||
"RE_OP_DEFAULT_END_OF_WORD",
|
||||
"RE_OP_DEFAULT_START_OF_WORD",
|
||||
@ -146,6 +154,7 @@ char* re_op_text[] = {
|
||||
"RE_OP_GROUP",
|
||||
"RE_OP_GROUP_CALL",
|
||||
"RE_OP_GROUP_EXISTS",
|
||||
"RE_OP_KEEP",
|
||||
"RE_OP_LAZY_REPEAT",
|
||||
"RE_OP_LOOKAROUND",
|
||||
"RE_OP_NEXT",
|
||||
@ -153,6 +162,7 @@ char* re_op_text[] = {
|
||||
"RE_OP_PROPERTY_IGN",
|
||||
"RE_OP_PROPERTY_IGN_REV",
|
||||
"RE_OP_PROPERTY_REV",
|
||||
"RE_OP_PRUNE",
|
||||
"RE_OP_RANGE",
|
||||
"RE_OP_RANGE_IGN",
|
||||
"RE_OP_RANGE_IGN_REV",
|
||||
@ -180,6 +190,7 @@ char* re_op_text[] = {
|
||||
"RE_OP_SET_UNION_IGN",
|
||||
"RE_OP_SET_UNION_IGN_REV",
|
||||
"RE_OP_SET_UNION_REV",
|
||||
"RE_OP_SKIP",
|
||||
"RE_OP_START_OF_LINE",
|
||||
"RE_OP_START_OF_LINE_U",
|
||||
"RE_OP_START_OF_STRING",
|
||||
@ -198,10 +209,13 @@ char* re_op_text[] = {
|
||||
"RE_OP_STRING_SET_REV",
|
||||
"RE_OP_BODY_END",
|
||||
"RE_OP_BODY_START",
|
||||
"RE_OP_END_ATOMIC",
|
||||
"RE_OP_END_CONDITIONAL",
|
||||
"RE_OP_END_FUZZY",
|
||||
"RE_OP_END_GREEDY_REPEAT",
|
||||
"RE_OP_END_GROUP",
|
||||
"RE_OP_END_LAZY_REPEAT",
|
||||
"RE_OP_END_LOOKAROUND",
|
||||
"RE_OP_GREEDY_REPEAT_ONE",
|
||||
"RE_OP_GROUP_RETURN",
|
||||
"RE_OP_LAZY_REPEAT_ONE",
|
||||
@ -219,6 +233,7 @@ char* re_op_text[] = {
|
||||
#define RE_FLAG_IGNORECASE 0x2
|
||||
#define RE_FLAG_LOCALE 0x4
|
||||
#define RE_FLAG_MULTILINE 0x8
|
||||
#define RE_FLAG_POSIX 0x10000
|
||||
#define RE_FLAG_REVERSE 0x400
|
||||
#define RE_FLAG_TEMPLATE 0x1
|
||||
#define RE_FLAG_UNICODE 0x20
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -24,7 +24,7 @@ typedef struct RE_Property {
|
||||
typedef struct RE_PropertyValue {
|
||||
RE_UINT16 name;
|
||||
RE_UINT8 value_set;
|
||||
RE_UINT8 id;
|
||||
RE_UINT16 id;
|
||||
} RE_PropertyValue;
|
||||
|
||||
typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
||||
@ -83,24 +83,24 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
||||
#define RE_PROP_S_MASK 0x0F000000
|
||||
#define RE_PROP_Z_MASK 0x00007000
|
||||
|
||||
#define RE_PROP_ALNUM 0x460001
|
||||
#define RE_PROP_ALNUM 0x470001
|
||||
#define RE_PROP_ALPHA 0x070001
|
||||
#define RE_PROP_ANY 0x470001
|
||||
#define RE_PROP_ANY 0x480001
|
||||
#define RE_PROP_ASCII 0x010001
|
||||
#define RE_PROP_BLANK 0x480001
|
||||
#define RE_PROP_BLANK 0x490001
|
||||
#define RE_PROP_CNTRL 0x00000F
|
||||
#define RE_PROP_DIGIT 0x000009
|
||||
#define RE_PROP_GRAPH 0x490001
|
||||
#define RE_PROP_GRAPH 0x4A0001
|
||||
#define RE_PROP_LOWER 0x080001
|
||||
#define RE_PROP_PRINT 0x4A0001
|
||||
#define RE_PROP_PRINT 0x4B0001
|
||||
#define RE_PROP_SPACE 0x190001
|
||||
#define RE_PROP_UPPER 0x090001
|
||||
#define RE_PROP_WORD 0x4B0001
|
||||
#define RE_PROP_XDIGIT 0x4C0001
|
||||
#define RE_PROP_POSIX_ALNUM 0x4E0001
|
||||
#define RE_PROP_POSIX_DIGIT 0x4D0001
|
||||
#define RE_PROP_POSIX_PUNCT 0x4F0001
|
||||
#define RE_PROP_POSIX_XDIGIT 0x500001
|
||||
#define RE_PROP_WORD 0x4C0001
|
||||
#define RE_PROP_XDIGIT 0x4D0001
|
||||
#define RE_PROP_POSIX_ALNUM 0x4F0001
|
||||
#define RE_PROP_POSIX_DIGIT 0x4E0001
|
||||
#define RE_PROP_POSIX_PUNCT 0x500001
|
||||
#define RE_PROP_POSIX_XDIGIT 0x510001
|
||||
|
||||
#define RE_BREAK_OTHER 0
|
||||
#define RE_BREAK_DOUBLEQUOTE 1
|
||||
@ -119,26 +119,36 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
||||
#define RE_BREAK_MIDNUMLET 14
|
||||
#define RE_BREAK_NUMERIC 15
|
||||
#define RE_BREAK_EXTENDNUMLET 16
|
||||
#define RE_BREAK_EBASE 17
|
||||
#define RE_BREAK_EMODIFIER 18
|
||||
#define RE_BREAK_ZWJ 19
|
||||
#define RE_BREAK_GLUEAFTERZWJ 20
|
||||
#define RE_BREAK_EBASEGAZ 21
|
||||
|
||||
#define RE_GBREAK_OTHER 0
|
||||
#define RE_GBREAK_CR 1
|
||||
#define RE_GBREAK_LF 2
|
||||
#define RE_GBREAK_CONTROL 3
|
||||
#define RE_GBREAK_EXTEND 4
|
||||
#define RE_GBREAK_REGIONALINDICATOR 5
|
||||
#define RE_GBREAK_SPACINGMARK 6
|
||||
#define RE_GBREAK_L 7
|
||||
#define RE_GBREAK_V 8
|
||||
#define RE_GBREAK_T 9
|
||||
#define RE_GBREAK_LV 10
|
||||
#define RE_GBREAK_LVT 11
|
||||
#define RE_GBREAK_PREPEND 12
|
||||
#define RE_GBREAK_PREPEND 1
|
||||
#define RE_GBREAK_CR 2
|
||||
#define RE_GBREAK_LF 3
|
||||
#define RE_GBREAK_CONTROL 4
|
||||
#define RE_GBREAK_EXTEND 5
|
||||
#define RE_GBREAK_REGIONALINDICATOR 6
|
||||
#define RE_GBREAK_SPACINGMARK 7
|
||||
#define RE_GBREAK_L 8
|
||||
#define RE_GBREAK_V 9
|
||||
#define RE_GBREAK_T 10
|
||||
#define RE_GBREAK_LV 11
|
||||
#define RE_GBREAK_LVT 12
|
||||
#define RE_GBREAK_EBASE 13
|
||||
#define RE_GBREAK_EMODIFIER 14
|
||||
#define RE_GBREAK_ZWJ 15
|
||||
#define RE_GBREAK_GLUEAFTERZWJ 16
|
||||
#define RE_GBREAK_EBASEGAZ 17
|
||||
|
||||
extern char* re_strings[1261];
|
||||
extern RE_Property re_properties[147];
|
||||
extern RE_PropertyValue re_property_values[1372];
|
||||
extern char* re_strings[1336];
|
||||
extern RE_Property re_properties[150];
|
||||
extern RE_PropertyValue re_property_values[1469];
|
||||
extern RE_UINT16 re_expand_on_folding[104];
|
||||
extern RE_GetPropertyFunc re_get_property[81];
|
||||
extern RE_GetPropertyFunc re_get_property[82];
|
||||
|
||||
RE_UINT32 re_get_general_category(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_block(RE_UINT32 ch);
|
||||
@ -193,10 +203,11 @@ RE_UINT32 re_get_soft_dotted(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_other_id_start(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_other_id_continue(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_sterm(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_sentence_terminal(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_variation_selector(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_prepended_concatenation_mark(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_bidi_class(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch);
|
||||
@ -208,7 +219,7 @@ RE_UINT32 re_get_line_break(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_numeric_type(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_numeric_value(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_indic_positional_category(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_alphanumeric(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_any(RE_UINT32 ch);
|
||||
|
Loading…
x
Reference in New Issue
Block a user