mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update regex module from upstream
This commit is contained in:
parent
2e42bfa374
commit
4ffaba8e82
@ -225,31 +225,31 @@ __all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match",
|
||||
"V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
|
||||
"Regex"]
|
||||
|
||||
__version__ = "2.4.39"
|
||||
__version__ = "2.4.48"
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Public interface.
|
||||
|
||||
def match(pattern, string, flags=0, pos=None, endpos=None, concurrent=None,
|
||||
**kwargs):
|
||||
def match(pattern, string, flags=0, pos=None, endpos=None, partial=False,
|
||||
concurrent=None, **kwargs):
|
||||
"""Try to apply the pattern at the start of the string, returning a match
|
||||
object, or None if no match was found."""
|
||||
return _compile(pattern, flags, kwargs).match(string, pos, endpos,
|
||||
concurrent)
|
||||
concurrent, partial)
|
||||
|
||||
def fullmatch(pattern, string, flags=0, pos=None, endpos=None, concurrent=None,
|
||||
**kwargs):
|
||||
def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False,
|
||||
concurrent=None, **kwargs):
|
||||
"""Try to apply the pattern against all of the string, returning a match
|
||||
object, or None if no match was found."""
|
||||
return _compile(pattern, flags, kwargs).fullmatch(string, pos, endpos,
|
||||
concurrent)
|
||||
concurrent, partial)
|
||||
|
||||
def search(pattern, string, flags=0, pos=None, endpos=None, concurrent=None,
|
||||
**kwargs):
|
||||
def search(pattern, string, flags=0, pos=None, endpos=None, partial=False,
|
||||
concurrent=None, **kwargs):
|
||||
"""Search through string looking for a match to the pattern, returning a
|
||||
match object, or None if no match was found."""
|
||||
return _compile(pattern, flags, kwargs).search(string, pos, endpos,
|
||||
concurrent)
|
||||
concurrent, partial)
|
||||
|
||||
def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
|
||||
concurrent=None, **kwargs):
|
||||
@ -319,12 +319,12 @@ def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
|
||||
overlapped, concurrent)
|
||||
|
||||
def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
|
||||
concurrent=None, **kwargs):
|
||||
partial=False, concurrent=None, **kwargs):
|
||||
"""Return an iterator over all matches in the string. The matches may be
|
||||
overlapped if overlapped is True. For each match, the iterator returns a
|
||||
match object. Empty matches are included in the result."""
|
||||
return _compile(pattern, flags, kwargs).finditer(string, pos, endpos,
|
||||
overlapped, concurrent)
|
||||
overlapped, concurrent, partial)
|
||||
|
||||
def compile(pattern, flags=0, **kwargs):
|
||||
"Compile a regular expression pattern, returning a pattern object."
|
||||
@ -392,6 +392,7 @@ from . import _regex_core
|
||||
from calibre.constants import plugins
|
||||
_regex = plugins['_regex'][0]
|
||||
from threading import RLock as _RLock
|
||||
from locale import getlocale as _getlocale
|
||||
from ._regex_core import *
|
||||
from ._regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError,
|
||||
_UnscopedFlagSet, _check_group_features, _compile_firstset,
|
||||
@ -414,6 +415,7 @@ _cache = {}
|
||||
_cache_lock = _RLock()
|
||||
_named_args = {}
|
||||
_replacement_cache = {}
|
||||
_locale_sensitive = {}
|
||||
|
||||
# Maximum size of the cache.
|
||||
_MAXCACHE = 500
|
||||
@ -421,6 +423,15 @@ _MAXREPCACHE = 500
|
||||
|
||||
def _compile(pattern, flags=0, kwargs={}):
|
||||
"Compiles a regular expression to a PatternObject."
|
||||
# What locale is this pattern using?
|
||||
locale_key = (type(pattern), pattern)
|
||||
if _locale_sensitive.get(locale_key, True) or (flags & LOCALE) != 0:
|
||||
# This pattern is, or might be, locale-sensitive.
|
||||
pattern_locale = _getlocale()
|
||||
else:
|
||||
# This pattern is definitely not locale-sensitive.
|
||||
pattern_locale = None
|
||||
|
||||
try:
|
||||
# Do we know what keyword arguments are needed?
|
||||
args_key = pattern, type(pattern), flags
|
||||
@ -433,13 +444,13 @@ def _compile(pattern, flags=0, kwargs={}):
|
||||
try:
|
||||
args_supplied.add((k, frozenset(kwargs[k])))
|
||||
except KeyError:
|
||||
raise error("missing named list")
|
||||
raise error("missing named list: {!r}".format(k))
|
||||
|
||||
args_supplied = frozenset(args_supplied)
|
||||
|
||||
# Have we already seen this regular expression and named list?
|
||||
pattern_key = (pattern, type(pattern), flags, args_supplied,
|
||||
DEFAULT_VERSION)
|
||||
DEFAULT_VERSION, pattern_locale)
|
||||
return _cache[pattern_key]
|
||||
except KeyError:
|
||||
# It's a new pattern, or new named list for a known pattern.
|
||||
@ -462,18 +473,19 @@ def _compile(pattern, flags=0, kwargs={}):
|
||||
_regex_core.DEFAULT_VERSION = DEFAULT_VERSION
|
||||
|
||||
caught_exception = None
|
||||
global_flags = flags
|
||||
|
||||
while True:
|
||||
try:
|
||||
source = _Source(pattern)
|
||||
info = _Info(flags, source.char_type, kwargs)
|
||||
info = _Info(global_flags, source.char_type, kwargs)
|
||||
info.guess_encoding = guess_encoding
|
||||
source.ignore_space = bool(info.flags & VERBOSE)
|
||||
parsed = _parse_pattern(source, info)
|
||||
break
|
||||
except _UnscopedFlagSet:
|
||||
# Remember the global flags for the next attempt.
|
||||
flags = info.global_flags
|
||||
global_flags = info.global_flags
|
||||
except error, e:
|
||||
caught_exception = e
|
||||
|
||||
@ -500,6 +512,9 @@ def _compile(pattern, flags=0, kwargs={}):
|
||||
reverse = bool(info.flags & REVERSE)
|
||||
fuzzy = isinstance(parsed, _Fuzzy)
|
||||
|
||||
# Remember whether this pattern as an inline locale flag.
|
||||
_locale_sensitive[locale_key] = info.inline_locale
|
||||
|
||||
# Should we print the parsed pattern?
|
||||
if flags & DEBUG:
|
||||
parsed.dump(indent=0, reverse=reverse)
|
||||
@ -583,7 +598,8 @@ def _compile(pattern, flags=0, kwargs={}):
|
||||
args_needed = frozenset(args_needed)
|
||||
|
||||
# Store this regular expression and named list.
|
||||
pattern_key = (pattern, type(pattern), flags, args_needed, DEFAULT_VERSION)
|
||||
pattern_key = (pattern, type(pattern), flags, args_needed, DEFAULT_VERSION,
|
||||
pattern_locale)
|
||||
_cache[pattern_key] = compiled_pattern
|
||||
|
||||
# Store what keyword arguments are needed.
|
||||
|
10289
src/regex/_regex.c
10289
src/regex/_regex.c
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,7 @@
|
||||
* 2010-01-16 mrab Re-written
|
||||
*/
|
||||
|
||||
/* Supports Unicode version 6.3.0. */
|
||||
/* Supports Unicode version 7.0.0. */
|
||||
|
||||
#define RE_MAGIC 20100116
|
||||
|
||||
|
@ -14,7 +14,6 @@
|
||||
# 2010-01-16 mrab Python front-end re-written and extended
|
||||
|
||||
import string
|
||||
import sys
|
||||
import unicodedata
|
||||
from collections import defaultdict
|
||||
|
||||
@ -23,6 +22,7 @@ _regex = plugins['_regex'][0]
|
||||
if _regex is None:
|
||||
raise RuntimeError('Failed to load regex module with error: ' + plugins['_regex'][1])
|
||||
|
||||
|
||||
__all__ = ["A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH",
|
||||
"F", "FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "R",
|
||||
"REVERSE", "S", "DOTALL", "T", "TEMPLATE", "U", "UNICODE", "V0", "VERSION0",
|
||||
@ -114,6 +114,7 @@ HEX_ESCAPES = {"x": 2, "u": 4, "U": 8}
|
||||
|
||||
# A singleton which indicates a comment within a pattern.
|
||||
COMMENT = object()
|
||||
FLAGS = object()
|
||||
|
||||
# The names of the opcodes.
|
||||
OPCODES = """
|
||||
@ -248,8 +249,8 @@ def _shrink_cache(cache_dict, args_dict, max_length, divisor=5):
|
||||
|
||||
# Rebuild the arguments dictionary.
|
||||
args_dict.clear()
|
||||
for pattern, pattern_type, flags, args, default_version in cache_dict:
|
||||
args_dict[pattern, pattern_type, flags, default_version] = args
|
||||
for pattern, pattern_type, flags, args, default_version, locale in cache_dict:
|
||||
args_dict[pattern, pattern_type, flags, default_version, locale] = args
|
||||
|
||||
def _fold_case(info, string):
|
||||
"Folds the case of a string."
|
||||
@ -331,81 +332,129 @@ def _parse_pattern(source, info):
|
||||
def parse_sequence(source, info):
|
||||
"Parses a sequence, eg. 'abc'."
|
||||
sequence = []
|
||||
item = parse_item(source, info)
|
||||
while item:
|
||||
sequence.append(item)
|
||||
item = parse_item(source, info)
|
||||
applied = False
|
||||
while True:
|
||||
# Get literal characters followed by an element.
|
||||
characters, case_flags, element = parse_literal_and_element(source,
|
||||
info)
|
||||
if not element:
|
||||
# No element, just a literal. We've also reached the end of the
|
||||
# sequence.
|
||||
append_literal(characters, case_flags, sequence)
|
||||
break
|
||||
|
||||
if element is COMMENT or element is FLAGS:
|
||||
append_literal(characters, case_flags, sequence)
|
||||
elif type(element) is tuple:
|
||||
# It looks like we've found a quantifier.
|
||||
ch, saved_pos = element
|
||||
|
||||
counts = parse_quantifier(source, info, ch)
|
||||
if counts:
|
||||
# It _is_ a quantifier.
|
||||
apply_quantifier(source, info, counts, characters, case_flags,
|
||||
ch, saved_pos, applied, sequence)
|
||||
applied = True
|
||||
else:
|
||||
# It's not a quantifier. Maybe it's a fuzzy constraint.
|
||||
constraints = parse_fuzzy(source, ch)
|
||||
if constraints:
|
||||
# It _is_ a fuzzy constraint.
|
||||
apply_constraint(source, info, constraints, characters,
|
||||
case_flags, saved_pos, applied, sequence)
|
||||
applied = True
|
||||
else:
|
||||
# The element was just a literal.
|
||||
characters.append(ord(ch))
|
||||
append_literal(characters, case_flags, sequence)
|
||||
applied = False
|
||||
else:
|
||||
# We have a literal followed by something else.
|
||||
append_literal(characters, case_flags, sequence)
|
||||
sequence.append(element)
|
||||
applied = False
|
||||
|
||||
return make_sequence(sequence)
|
||||
|
||||
def apply_quantifier(source, info, counts, characters, case_flags, ch,
|
||||
saved_pos, applied, sequence):
|
||||
if characters:
|
||||
# The quantifier applies to the last character.
|
||||
append_literal(characters[ : -1], case_flags, sequence)
|
||||
element = Character(characters[-1], case_flags=case_flags)
|
||||
else:
|
||||
# The quantifier applies to the last item in the sequence.
|
||||
if applied or not sequence:
|
||||
raise error("nothing to repeat at position %d" % saved_pos)
|
||||
|
||||
element = sequence.pop()
|
||||
|
||||
min_count, max_count = counts
|
||||
saved_pos = source.pos
|
||||
ch = source.get()
|
||||
if ch == "?":
|
||||
# The "?" suffix that means it's a lazy repeat.
|
||||
repeated = LazyRepeat
|
||||
elif ch == "+":
|
||||
# The "+" suffix that means it's a possessive repeat.
|
||||
repeated = PossessiveRepeat
|
||||
else:
|
||||
# No suffix means that it's a greedy repeat.
|
||||
source.pos = saved_pos
|
||||
repeated = GreedyRepeat
|
||||
|
||||
# Ignore the quantifier if it applies to a zero-width item or the number of
|
||||
# repeats is fixed at 1.
|
||||
if not element.is_empty() and (min_count != 1 or max_count != 1):
|
||||
element = repeated(element, min_count, max_count)
|
||||
|
||||
sequence.append(element)
|
||||
|
||||
def apply_constraint(source, info, constraints, characters, case_flags,
|
||||
saved_pos, applied, sequence):
|
||||
if characters:
|
||||
# The constraint applies to the last character.
|
||||
append_literal(characters[ : -1], case_flags, sequence)
|
||||
element = Character(characters[-1], case_flags=case_flags)
|
||||
sequence.append(Fuzzy(element, constraints))
|
||||
else:
|
||||
# The constraint applies to the last item in the sequence.
|
||||
if applied or not sequence:
|
||||
raise error("nothing for fuzzy constraint at position %d" % saved_pos)
|
||||
|
||||
element = sequence.pop()
|
||||
|
||||
# If a group is marked as fuzzy then put all of the fuzzy part in the
|
||||
# group.
|
||||
if isinstance(element, Group):
|
||||
element.subpattern = Fuzzy(element.subpattern, constraints)
|
||||
sequence.append(element)
|
||||
else:
|
||||
sequence.append(Fuzzy(element, constraints))
|
||||
|
||||
def append_literal(characters, case_flags, sequence):
|
||||
if characters:
|
||||
sequence.append(Literal(characters, case_flags=case_flags))
|
||||
|
||||
def PossessiveRepeat(element, min_count, max_count):
|
||||
"Builds a possessive repeat."
|
||||
return Atomic(GreedyRepeat(element, min_count, max_count))
|
||||
|
||||
def parse_item(source, info):
|
||||
"Parses an item, which might be repeated. Returns None if there's no item."
|
||||
element = parse_element(source, info)
|
||||
counts = parse_quantifier(source, info)
|
||||
if counts:
|
||||
min_count, max_count = counts
|
||||
saved_pos = source.pos
|
||||
ch = source.get()
|
||||
if ch == "?":
|
||||
# The "?" suffix that means it's a lazy repeat.
|
||||
repeated = LazyRepeat
|
||||
elif ch == "+":
|
||||
# The "+" suffix that means it's a possessive repeat.
|
||||
repeated = PossessiveRepeat
|
||||
else:
|
||||
# No suffix means that it's a greedy repeat.
|
||||
source.pos = saved_pos
|
||||
repeated = GreedyRepeat
|
||||
|
||||
if element.is_empty() or min_count == max_count == 1:
|
||||
return element
|
||||
|
||||
return repeated(element, min_count, max_count)
|
||||
|
||||
# No quantifier, but maybe there's a fuzzy constraint.
|
||||
constraints = parse_fuzzy(source)
|
||||
if not constraints:
|
||||
# No fuzzy constraint.
|
||||
return element
|
||||
|
||||
# If a group is marked as fuzzy then put all of the fuzzy part in the
|
||||
# group.
|
||||
if isinstance(element, Group):
|
||||
element.subpattern = Fuzzy(element.subpattern, constraints)
|
||||
return element
|
||||
|
||||
return Fuzzy(element, constraints)
|
||||
|
||||
_QUANTIFIERS = {"?": (0, 1), "*": (0, None), "+": (1, None)}
|
||||
|
||||
def parse_quantifier(source, info):
|
||||
def parse_quantifier(source, info, ch):
|
||||
"Parses a quantifier."
|
||||
while True:
|
||||
saved_pos = source.pos
|
||||
ch = source.get()
|
||||
q = _QUANTIFIERS.get(ch)
|
||||
if q:
|
||||
# It's a quantifier.
|
||||
return q
|
||||
if ch == "{":
|
||||
# Looks like a limited repeated element, eg. 'a{2,3}'.
|
||||
counts = parse_limited_quantifier(source)
|
||||
if counts:
|
||||
return counts
|
||||
elif ch == "(" and source.match("?#"):
|
||||
# A comment.
|
||||
parse_comment(source)
|
||||
continue
|
||||
q = _QUANTIFIERS.get(ch)
|
||||
if q:
|
||||
# It's a quantifier.
|
||||
return q
|
||||
|
||||
# Neither a quantifier nor a comment.
|
||||
break
|
||||
if ch == "{":
|
||||
# Looks like a limited repeated element, eg. 'a{2,3}'.
|
||||
counts = parse_limited_quantifier(source)
|
||||
if counts:
|
||||
return counts
|
||||
|
||||
# Parse it later, perhaps as a literal.
|
||||
source.pos = saved_pos
|
||||
return None
|
||||
|
||||
def is_above_limit(count):
|
||||
@ -441,13 +490,13 @@ def parse_limited_quantifier(source):
|
||||
|
||||
return min_count, max_count
|
||||
|
||||
def parse_fuzzy(source):
|
||||
def parse_fuzzy(source, ch):
|
||||
"Parses a fuzzy setting, if present."
|
||||
saved_pos = source.pos
|
||||
if not source.match("{"):
|
||||
source.pos = saved_pos
|
||||
if ch != "{":
|
||||
return None
|
||||
|
||||
saved_pos = source.pos
|
||||
|
||||
constraints = {}
|
||||
try:
|
||||
parse_fuzzy_item(source, constraints)
|
||||
@ -455,7 +504,6 @@ def parse_fuzzy(source):
|
||||
parse_fuzzy_item(source, constraints)
|
||||
except ParseError:
|
||||
source.pos = saved_pos
|
||||
|
||||
return None
|
||||
|
||||
if not source.match("}"):
|
||||
@ -597,10 +645,12 @@ def parse_count(source):
|
||||
"Parses a quantifier's count, which can be empty."
|
||||
return source.get_while(DIGITS)
|
||||
|
||||
def parse_element(source, info):
|
||||
"""Parses an element. An element might actually be a flag, eg. '(?i)', in
|
||||
which case it returns None.
|
||||
def parse_literal_and_element(source, info):
|
||||
"""Parses a literal followed by an element. The element is FLAGS if it's an
|
||||
inline flag or None if it has reached the end of a sequence.
|
||||
"""
|
||||
characters = []
|
||||
case_flags = info.flags & CASE_FLAGS
|
||||
while True:
|
||||
saved_pos = source.pos
|
||||
ch = source.get()
|
||||
@ -608,71 +658,69 @@ def parse_element(source, info):
|
||||
if ch in ")|":
|
||||
# The end of a sequence. At the end of the pattern ch is "".
|
||||
source.pos = saved_pos
|
||||
return None
|
||||
return characters, case_flags, None
|
||||
elif ch == "\\":
|
||||
# An escape sequence outside a set.
|
||||
return parse_escape(source, info, False)
|
||||
element = parse_escape(source, info, False)
|
||||
return characters, case_flags, element
|
||||
elif ch == "(":
|
||||
# A parenthesised subpattern or a flag.
|
||||
element = parse_paren(source, info)
|
||||
if element and element is not COMMENT:
|
||||
return element
|
||||
return characters, case_flags, element
|
||||
elif ch == ".":
|
||||
# Any character.
|
||||
if info.flags & DOTALL:
|
||||
return AnyAll()
|
||||
element = AnyAll()
|
||||
elif info.flags & WORD:
|
||||
return AnyU()
|
||||
element = AnyU()
|
||||
else:
|
||||
return Any()
|
||||
element = Any()
|
||||
|
||||
return characters, case_flags, element
|
||||
elif ch == "[":
|
||||
# A character set.
|
||||
return parse_set(source, info)
|
||||
element = parse_set(source, info)
|
||||
return characters, case_flags, element
|
||||
elif ch == "^":
|
||||
# The start of a line or the string.
|
||||
if info.flags & MULTILINE:
|
||||
if info.flags & WORD:
|
||||
return StartOfLineU()
|
||||
element = StartOfLineU()
|
||||
else:
|
||||
return StartOfLine()
|
||||
element = StartOfLine()
|
||||
else:
|
||||
return StartOfString()
|
||||
element = StartOfString()
|
||||
|
||||
return characters, case_flags, element
|
||||
elif ch == "$":
|
||||
# The end of a line or the string.
|
||||
if info.flags & MULTILINE:
|
||||
if info.flags & WORD:
|
||||
return EndOfLineU()
|
||||
element = EndOfLineU()
|
||||
else:
|
||||
return EndOfLine()
|
||||
element = EndOfLine()
|
||||
else:
|
||||
if info.flags & WORD:
|
||||
return EndOfStringLineU()
|
||||
element = EndOfStringLineU()
|
||||
else:
|
||||
return EndOfStringLine()
|
||||
elif ch == "{":
|
||||
# Looks like a limited quantifier.
|
||||
saved_pos_2 = source.pos
|
||||
source.pos = saved_pos
|
||||
counts = parse_quantifier(source, info)
|
||||
if counts:
|
||||
# A quantifier where we expected an element.
|
||||
raise error("nothing to repeat at position %d" % saved_pos_2)
|
||||
element = EndOfStringLine()
|
||||
|
||||
# Not a quantifier, so it's a literal.
|
||||
source.pos = saved_pos_2
|
||||
return make_character(info, ord(ch))
|
||||
elif ch in "?*+":
|
||||
# A quantifier where we expected an element.
|
||||
raise error("nothing to repeat at position %d" % saved_pos)
|
||||
return characters, case_flags, element
|
||||
elif ch in "?*+{":
|
||||
# Looks like a quantifier.
|
||||
return characters, case_flags, (ch, saved_pos)
|
||||
else:
|
||||
# A literal.
|
||||
return make_character(info, ord(ch))
|
||||
characters.append(ord(ch))
|
||||
else:
|
||||
# A literal.
|
||||
return make_character(info, ord(ch))
|
||||
characters.append(ord(ch))
|
||||
|
||||
def parse_paren(source, info):
|
||||
"Parses a parenthesised subpattern or a flag."
|
||||
"""Parses a parenthesised subpattern or a flag. Returns FLAGS if it's an
|
||||
inline flag.
|
||||
"""
|
||||
saved_pos = source.pos
|
||||
ch = source.get()
|
||||
if ch == "?":
|
||||
@ -897,6 +945,10 @@ def parse_flags(source, info):
|
||||
else:
|
||||
flags_off = 0
|
||||
|
||||
if flags_on & LOCALE:
|
||||
# Remember that this pattern as an inline locale flag.
|
||||
info.inline_locale = True
|
||||
|
||||
return flags_on, flags_off
|
||||
|
||||
def parse_subpattern(source, info, flags_on, flags_off):
|
||||
@ -913,30 +965,10 @@ def parse_subpattern(source, info, flags_on, flags_off):
|
||||
|
||||
return subpattern
|
||||
|
||||
def parse_positional_flags(source, info, flags_on, flags_off):
|
||||
"Parses positional flags."
|
||||
version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION
|
||||
if version == VERSION0:
|
||||
# Positional flags are global and can only be turned on.
|
||||
if flags_off:
|
||||
raise error("bad inline flags: can't turn flags off at position %d" % source.pos)
|
||||
|
||||
new_global_flags = flags_on & ~info.global_flags
|
||||
if new_global_flags:
|
||||
info.global_flags |= new_global_flags
|
||||
|
||||
# A global has been turned on, so reparse the pattern.
|
||||
raise _UnscopedFlagSet(info.global_flags)
|
||||
else:
|
||||
info.flags = (info.flags | flags_on) & ~flags_off
|
||||
|
||||
source.ignore_space = bool(info.flags & VERBOSE)
|
||||
|
||||
return None
|
||||
|
||||
def parse_flags_subpattern(source, info):
|
||||
"""Parses a flags subpattern. It could be inline flags or a subpattern
|
||||
possibly with local flags.
|
||||
possibly with local flags. If it's a subpattern, then that's returned;
|
||||
if it's a inline flags, then FLAGS is returned.
|
||||
"""
|
||||
flags_on, flags_off = parse_flags(source, info)
|
||||
|
||||
@ -961,10 +993,30 @@ def parse_flags_subpattern(source, info):
|
||||
return parse_subpattern(source, info, flags_on, flags_off)
|
||||
|
||||
if source.match(")"):
|
||||
return parse_positional_flags(source, info, flags_on, flags_off)
|
||||
parse_positional_flags(source, info, flags_on, flags_off)
|
||||
return FLAGS
|
||||
|
||||
raise error("unknown extension at position %d" % source.pos)
|
||||
|
||||
def parse_positional_flags(source, info, flags_on, flags_off):
|
||||
"Parses positional flags."
|
||||
version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION
|
||||
if version == VERSION0:
|
||||
# Positional flags are global and can only be turned on.
|
||||
if flags_off:
|
||||
raise error("bad inline flags: can't turn flags off at position %d" % source.pos)
|
||||
|
||||
new_global_flags = flags_on & ~info.global_flags
|
||||
if new_global_flags:
|
||||
info.global_flags |= new_global_flags
|
||||
|
||||
# A global has been turned on, so reparse the pattern.
|
||||
raise _UnscopedFlagSet(info.global_flags)
|
||||
else:
|
||||
info.flags = (info.flags | flags_on) & ~flags_off
|
||||
|
||||
source.ignore_space = bool(info.flags & VERBOSE)
|
||||
|
||||
def parse_name(source, allow_numeric=False):
|
||||
"Parses a name."
|
||||
name = source.get_while(set(")>"), include=False)
|
||||
@ -1175,12 +1227,12 @@ def parse_property(source, info, positive, in_set):
|
||||
prop_name, name = parse_property_name(source)
|
||||
if source.match("}"):
|
||||
# It's correctly delimited.
|
||||
prop = lookup_property(prop_name, name, positive != negate)
|
||||
prop = lookup_property(prop_name, name, positive != negate, source_pos=source.pos)
|
||||
return make_property(info, prop, in_set)
|
||||
elif ch and ch in "CLMNPSZ":
|
||||
# An abbreviated property, eg \pL.
|
||||
prop = lookup_property(None, ch, positive)
|
||||
return make_property(info, prop, in_set)
|
||||
return make_property(info, prop, in_set, source_pos=source.pos)
|
||||
|
||||
# Not a property, so treat as a literal "p" or "P".
|
||||
source.pos = saved_pos
|
||||
@ -1375,7 +1427,7 @@ def parse_posix_class(source, info):
|
||||
if not source.match(":]"):
|
||||
raise ParseError()
|
||||
|
||||
return lookup_property(prop_name, name, positive=not negate)
|
||||
return lookup_property(prop_name, name, positive=not negate, source_pos=source.pos)
|
||||
|
||||
def float_to_rational(flt):
|
||||
"Converts a float to a rational pair."
|
||||
@ -1416,21 +1468,25 @@ def standardise_name(name):
|
||||
except (ValueError, ZeroDivisionError):
|
||||
return "".join(ch for ch in name if ch not in "_- ").upper()
|
||||
|
||||
def lookup_property(property, value, positive):
|
||||
def lookup_property(property, value, positive, source_pos=None):
|
||||
"Looks up a property."
|
||||
# Normalise the names (which may still be lists).
|
||||
property = standardise_name(property) if property else None
|
||||
value = standardise_name(value)
|
||||
|
||||
if (property, value) == ("GENERALCATEGORY", "ASSIGNED"):
|
||||
property, value, positive = "GENERALCATEGORY", "UNASSIGNED", not positive
|
||||
|
||||
if property:
|
||||
# Both the property and the value are provided.
|
||||
prop = PROPERTIES.get(property)
|
||||
if not prop:
|
||||
raise error("unknown property at position %d" % source.pos)
|
||||
raise error("unknown property at position %d" % source_pos)
|
||||
|
||||
prop_id, value_dict = prop
|
||||
val_id = value_dict.get(value)
|
||||
if val_id is None:
|
||||
raise error("unknown property value at position %d" % source.pos)
|
||||
raise error("unknown property value at position %d" % source_pos)
|
||||
|
||||
if "YES" in value_dict and val_id == 0:
|
||||
positive, val_id = not positive, 1
|
||||
@ -1470,7 +1526,7 @@ def lookup_property(property, value, positive):
|
||||
return Property((prop_id << 16) | val_id, positive)
|
||||
|
||||
# Unknown property.
|
||||
raise error("unknown property at position %d" % source.pos)
|
||||
raise error("unknown property at position %d" % source_pos)
|
||||
|
||||
def _compile_replacement(source, pattern, is_unicode):
|
||||
"Compiles a replacement template escape sequence."
|
||||
@ -1660,6 +1716,12 @@ class RegexBase(object):
|
||||
def has_simple_start(self):
|
||||
return False
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
return self._compile(reverse, fuzzy)
|
||||
|
||||
def dump(self, indent, reverse):
|
||||
self._dump(indent, reverse)
|
||||
|
||||
def is_empty(self):
|
||||
return False
|
||||
|
||||
@ -1686,7 +1748,7 @@ class ZeroWidthBase(RegexBase):
|
||||
def get_firstset(self, reverse):
|
||||
return set([None])
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
flags = 0
|
||||
if self.positive:
|
||||
flags |= POSITIVE_OP
|
||||
@ -1696,7 +1758,7 @@ class ZeroWidthBase(RegexBase):
|
||||
flags |= REVERSE_OP
|
||||
return [(self._opcode, flags)]
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%s%s %s" % (INDENT * indent, self._op_name,
|
||||
POS_TEXT[self.positive])
|
||||
|
||||
@ -1710,13 +1772,13 @@ class Any(RegexBase):
|
||||
def has_simple_start(self):
|
||||
return True
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
flags = 0
|
||||
if fuzzy:
|
||||
flags |= FUZZY_OP
|
||||
return [(self._opcode[reverse], flags)]
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%s%s" % (INDENT * indent, self._op_name)
|
||||
|
||||
def max_width(self):
|
||||
@ -1765,11 +1827,11 @@ class Atomic(RegexBase):
|
||||
def has_simple_start(self):
|
||||
return self.subpattern.has_simple_start()
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
return ([(OP.ATOMIC, )] + self.subpattern.compile(reverse, fuzzy) +
|
||||
[(OP.END, )])
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%sATOMIC" % (INDENT * indent)
|
||||
self.subpattern.dump(indent + 1, reverse)
|
||||
|
||||
@ -1822,6 +1884,20 @@ class Branch(RegexBase):
|
||||
|
||||
return make_sequence(sequence)
|
||||
|
||||
def optimise(self, info):
|
||||
# Flatten branches within branches.
|
||||
branches = Branch._flatten_branches(info, self.branches)
|
||||
|
||||
# Try to reduce adjacent single-character branches to sets.
|
||||
branches = Branch._reduce_to_set(info, branches)
|
||||
|
||||
if len(branches) > 1:
|
||||
sequence = [Branch(branches)]
|
||||
else:
|
||||
sequence = branches
|
||||
|
||||
return make_sequence(sequence)
|
||||
|
||||
def pack_characters(self, info):
|
||||
self.branches = [b.pack_characters(info) for b in self.branches]
|
||||
return self
|
||||
@ -1846,7 +1922,7 @@ class Branch(RegexBase):
|
||||
|
||||
return fs or set([None])
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
code = [(OP.BRANCH, )]
|
||||
for b in self.branches:
|
||||
code.extend(b.compile(reverse, fuzzy))
|
||||
@ -1856,7 +1932,7 @@ class Branch(RegexBase):
|
||||
|
||||
return code
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%sBRANCH" % (INDENT * indent)
|
||||
self.branches[0].dump(indent + 1, reverse)
|
||||
for b in self.branches[1 : ]:
|
||||
@ -2181,10 +2257,10 @@ class CallGroup(RegexBase):
|
||||
def remove_captures(self):
|
||||
raise error("group reference not allowed at position %d" % self.position)
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
return [(OP.GROUP_CALL, self.call_ref)]
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%sGROUP_CALL %s" % (INDENT * indent, self.group)
|
||||
|
||||
def __eq__(self, other):
|
||||
@ -2229,7 +2305,7 @@ class Character(RegexBase):
|
||||
def has_simple_start(self):
|
||||
return True
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
flags = 0
|
||||
if self.positive:
|
||||
flags |= POSITIVE_OP
|
||||
@ -2248,7 +2324,7 @@ class Character(RegexBase):
|
||||
|
||||
return code.compile(reverse, fuzzy)
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
display = repr(unichr(self.value)).lstrip("bu")
|
||||
print "%sCHARACTER %s %s%s" % (INDENT * indent,
|
||||
POS_TEXT[self.positive], display, CASE_TEXT[self.case_flags])
|
||||
@ -2319,7 +2395,7 @@ class Conditional(RegexBase):
|
||||
return (self.yes_item.get_firstset(reverse) |
|
||||
self.no_item.get_firstset(reverse))
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
code = [(OP.GROUP_EXISTS, self.group)]
|
||||
code.extend(self.yes_item.compile(reverse, fuzzy))
|
||||
add_code = self.no_item.compile(reverse, fuzzy)
|
||||
@ -2331,7 +2407,7 @@ class Conditional(RegexBase):
|
||||
|
||||
return code
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%sGROUP_EXISTS %s" % (INDENT * indent, self.group)
|
||||
self.yes_item.dump(indent + 1, reverse)
|
||||
if self.no_item:
|
||||
@ -2437,7 +2513,7 @@ class Fuzzy(RegexBase):
|
||||
def contains_group(self):
|
||||
return self.subpattern.contains_group()
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
# The individual limits.
|
||||
arguments = []
|
||||
for e in "dise":
|
||||
@ -2460,7 +2536,7 @@ class Fuzzy(RegexBase):
|
||||
return ([(OP.FUZZY, flags) + tuple(arguments)] +
|
||||
self.subpattern.compile(reverse, True) + [(OP.END,)])
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
constraints = self._constraints_to_string()
|
||||
if constraints:
|
||||
constraints = " " + constraints
|
||||
@ -2511,7 +2587,7 @@ class Fuzzy(RegexBase):
|
||||
return ",".join(constraints)
|
||||
|
||||
class Grapheme(RegexBase):
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
# Match at least 1 character until a grapheme boundary is reached. Note
|
||||
# that this is the same whether matching forwards or backwards.
|
||||
character_matcher = LazyRepeat(AnyAll(), 1, None).compile(reverse,
|
||||
@ -2520,7 +2596,7 @@ class Grapheme(RegexBase):
|
||||
|
||||
return character_matcher + boundary_matcher
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%sGRAPHEME" % (INDENT * indent)
|
||||
|
||||
def max_width(self):
|
||||
@ -2565,7 +2641,7 @@ class GreedyRepeat(RegexBase):
|
||||
|
||||
return fs
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
repeat = [self._opcode, self.min_count]
|
||||
if self.max_count is None:
|
||||
repeat.append(UNLIMITED)
|
||||
@ -2578,7 +2654,7 @@ class GreedyRepeat(RegexBase):
|
||||
|
||||
return ([tuple(repeat)] + subpattern + [(OP.END, )])
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
if self.max_count is None:
|
||||
limit = "INF"
|
||||
else:
|
||||
@ -2655,7 +2731,7 @@ class Group(RegexBase):
|
||||
def has_simple_start(self):
|
||||
return self.subpattern.has_simple_start()
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
code = []
|
||||
|
||||
key = self.group, reverse, fuzzy
|
||||
@ -2676,7 +2752,7 @@ class Group(RegexBase):
|
||||
|
||||
return code
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
group = self.group
|
||||
if group < 0:
|
||||
group = private_groups[group]
|
||||
@ -2736,11 +2812,11 @@ class LookAround(RegexBase):
|
||||
def contains_group(self):
|
||||
return self.subpattern.contains_group()
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
return ([(OP.LOOKAROUND, int(self.positive), int(not self.behind))] +
|
||||
self.subpattern.compile(self.behind) + [(OP.END, )])
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%sLOOK%s %s" % (INDENT * indent, self._dir_text[self.behind],
|
||||
POS_TEXT[self.positive])
|
||||
self.subpattern.dump(indent + 1, self.behind)
|
||||
@ -2759,7 +2835,7 @@ class PrecompiledCode(RegexBase):
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
return [tuple(self.code)]
|
||||
|
||||
class Property(RegexBase):
|
||||
@ -2792,7 +2868,7 @@ class Property(RegexBase):
|
||||
def has_simple_start(self):
|
||||
return True
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
flags = 0
|
||||
if self.positive:
|
||||
flags |= POSITIVE_OP
|
||||
@ -2802,7 +2878,7 @@ class Property(RegexBase):
|
||||
flags |= FUZZY_OP
|
||||
return [(self._opcode[self.case_flags, reverse], flags, self.value)]
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
prop = PROPERTY_NAMES[self.value >> 16]
|
||||
name, value = prop[0], prop[1][self.value & 0xFFFF]
|
||||
print "%sPROPERTY %s %s:%s%s" % (INDENT * indent,
|
||||
@ -2867,7 +2943,7 @@ class Range(RegexBase):
|
||||
|
||||
return Branch(items)
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
flags = 0
|
||||
if self.positive:
|
||||
flags |= POSITIVE_OP
|
||||
@ -2878,7 +2954,7 @@ class Range(RegexBase):
|
||||
return [(self._opcode[self.case_flags, reverse], flags, self.lower,
|
||||
self.upper)]
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
display_lower = repr(unichr(self.lower)).lstrip("bu")
|
||||
display_upper = repr(unichr(self.upper)).lstrip("bu")
|
||||
print "%sRANGE %s %s %s%s" % (INDENT * indent, POS_TEXT[self.positive],
|
||||
@ -2923,13 +2999,13 @@ class RefGroup(RegexBase):
|
||||
def remove_captures(self):
|
||||
raise error("group reference not allowed at position %d" % self.position)
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
flags = 0
|
||||
if fuzzy:
|
||||
flags |= FUZZY_OP
|
||||
return [(self._opcode[self.case_flags, reverse], flags, self.group)]
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%sREF_GROUP %s%s" % (INDENT * indent, self.group,
|
||||
CASE_TEXT[self.case_flags])
|
||||
|
||||
@ -2974,18 +3050,18 @@ class Sequence(RegexBase):
|
||||
if s.case_flags != case_flags:
|
||||
# Different case sensitivity, so flush, unless neither the
|
||||
# previous nor the new character are cased.
|
||||
if case_flags or is_cased(info, s.value):
|
||||
if s.case_flags or is_cased(info, s.value):
|
||||
Sequence._flush_characters(info, characters,
|
||||
case_flags, items)
|
||||
|
||||
case_flags = s.case_flags
|
||||
|
||||
characters.append(s.value)
|
||||
elif type(s) is String:
|
||||
elif type(s) is String or type(s) is Literal:
|
||||
if s.case_flags != case_flags:
|
||||
# Different case sensitivity, so flush, unless the neither
|
||||
# the previous nor the new string are cased.
|
||||
if not s.case_flags or any(is_cased(info, c) for c in
|
||||
if s.case_flags or any(is_cased(info, c) for c in
|
||||
characters):
|
||||
Sequence._flush_characters(info, characters,
|
||||
case_flags, items)
|
||||
@ -3031,7 +3107,7 @@ class Sequence(RegexBase):
|
||||
def has_simple_start(self):
|
||||
return self.items and self.items[0].has_simple_start()
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
seq = self.items
|
||||
if reverse:
|
||||
seq = seq[::-1]
|
||||
@ -3042,7 +3118,7 @@ class Sequence(RegexBase):
|
||||
|
||||
return code
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
for s in self.items:
|
||||
s.dump(indent, reverse)
|
||||
|
||||
@ -3112,7 +3188,7 @@ class SetBase(RegexBase):
|
||||
def has_simple_start(self):
|
||||
return True
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
flags = 0
|
||||
if self.positive:
|
||||
flags |= POSITIVE_OP
|
||||
@ -3128,7 +3204,7 @@ class SetBase(RegexBase):
|
||||
|
||||
return code
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%s%s %s%s" % (INDENT * indent, self._op_name,
|
||||
POS_TEXT[self.positive], CASE_TEXT[self.case_flags])
|
||||
for i in self.items:
|
||||
@ -3306,7 +3382,7 @@ class SetUnion(SetBase):
|
||||
|
||||
return self._handle_case_folding(info, in_set)
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
flags = 0
|
||||
if self.positive:
|
||||
flags |= POSITIVE_OP
|
||||
@ -3395,7 +3471,7 @@ class String(RegexBase):
|
||||
def has_simple_start(self):
|
||||
return True
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
flags = 0
|
||||
if fuzzy:
|
||||
flags |= FUZZY_OP
|
||||
@ -3404,7 +3480,7 @@ class String(RegexBase):
|
||||
return [(self._opcode[self.case_flags, reverse], flags,
|
||||
len(self.folded_characters)) + self.folded_characters]
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
display = repr("".join(unichr(c) for c in self.characters)).lstrip("bu")
|
||||
print "%sSTRING %s%s" % (INDENT * indent, display,
|
||||
CASE_TEXT[self.case_flags])
|
||||
@ -3415,6 +3491,13 @@ class String(RegexBase):
|
||||
def get_required_string(self, reverse):
|
||||
return 0, self
|
||||
|
||||
class Literal(String):
|
||||
def _dump(self, indent, reverse):
|
||||
for c in self.characters:
|
||||
display = ascii("".join(chr(c))).lstrip("bu")
|
||||
print("{}CHARACTER MATCH {}{}".format(INDENT * indent,
|
||||
display, CASE_TEXT[self.case_flags]))
|
||||
|
||||
class StringSet(RegexBase):
|
||||
_opcode = {(NOCASE, False): OP.STRING_SET, (IGNORECASE, False):
|
||||
OP.STRING_SET_IGN, (FULLCASE, False): OP.STRING_SET, (FULLIGNORECASE,
|
||||
@ -3433,7 +3516,7 @@ class StringSet(RegexBase):
|
||||
if self.set_key not in info.named_lists_used:
|
||||
info.named_lists_used[self.set_key] = len(info.named_lists_used)
|
||||
|
||||
def compile(self, reverse=False, fuzzy=False):
|
||||
def _compile(self, reverse, fuzzy):
|
||||
index = self.info.named_lists_used[self.set_key]
|
||||
items = self.info.kwargs[self.name]
|
||||
|
||||
@ -3469,7 +3552,7 @@ class StringSet(RegexBase):
|
||||
return [(self._opcode[case_flags, reverse], index, min_len,
|
||||
max_len)]
|
||||
|
||||
def dump(self, indent=0, reverse=False):
|
||||
def _dump(self, indent, reverse):
|
||||
print "%sSTRING_SET %s%s" % (INDENT * indent, self.name,
|
||||
CASE_TEXT[self.case_flags])
|
||||
|
||||
@ -3740,6 +3823,7 @@ class Info(object):
|
||||
flags |= DEFAULT_FLAGS[(flags & _ALL_VERSIONS) or DEFAULT_VERSION]
|
||||
self.flags = flags
|
||||
self.global_flags = flags
|
||||
self.inline_locale = False
|
||||
|
||||
self.kwargs = kwargs
|
||||
|
||||
@ -3799,8 +3883,8 @@ class Info(object):
|
||||
|
||||
def _check_group_features(info, parsed):
|
||||
"""Checks whether the reverse and fuzzy features of the group calls match
|
||||
the groups which they call."""
|
||||
|
||||
the groups which they call.
|
||||
"""
|
||||
call_refs = {}
|
||||
additional_groups = []
|
||||
for call, reverse, fuzzy in info.group_calls:
|
||||
@ -3976,12 +4060,12 @@ CHARACTER_ESCAPES = {
|
||||
|
||||
# Predefined character set escape sequences.
|
||||
CHARSET_ESCAPES = {
|
||||
"d": lookup_property(None, "DIGIT", True),
|
||||
"D": lookup_property(None, "DIGIT", False),
|
||||
"s": lookup_property(None, "SPACE", True),
|
||||
"S": lookup_property(None, "SPACE", False),
|
||||
"w": lookup_property(None, "WORD", True),
|
||||
"W": lookup_property(None, "WORD", False),
|
||||
"d": lookup_property(None, "Digit", True),
|
||||
"D": lookup_property(None, "Digit", False),
|
||||
"s": lookup_property(None, "Space", True),
|
||||
"S": lookup_property(None, "Space", False),
|
||||
"w": lookup_property(None, "Word", True),
|
||||
"W": lookup_property(None, "Word", False),
|
||||
}
|
||||
|
||||
# Positional escape sequences.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -41,6 +41,8 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
||||
#define RE_PROP_P 34
|
||||
#define RE_PROP_S 35
|
||||
#define RE_PROP_Z 36
|
||||
#define RE_PROP_ASSIGNED 38
|
||||
#define RE_PROP_CASEDLETTER 37
|
||||
|
||||
#define RE_PROP_CN 0
|
||||
#define RE_PROP_LU 1
|
||||
@ -84,19 +86,17 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
||||
#define RE_PROP_ALNUM 0x460001
|
||||
#define RE_PROP_ALPHA 0x070001
|
||||
#define RE_PROP_ANY 0x470001
|
||||
#define RE_PROP_ASCII 0x480001
|
||||
#define RE_PROP_ASSIGNED 0x490001
|
||||
#define RE_PROP_BLANK 0x4A0001
|
||||
#define RE_PROP_ASCII 0x010001
|
||||
#define RE_PROP_BLANK 0x480001
|
||||
#define RE_PROP_CNTRL 0x00000F
|
||||
#define RE_PROP_DIGIT 0x000009
|
||||
#define RE_PROP_GRAPH 0x4B0001
|
||||
#define RE_PROP_GRAPH 0x490001
|
||||
#define RE_PROP_LOWER 0x080001
|
||||
#define RE_PROP_PRINT 0x4C0001
|
||||
#define RE_PROP_PUNCT 0x000022
|
||||
#define RE_PROP_PRINT 0x4A0001
|
||||
#define RE_PROP_SPACE 0x190001
|
||||
#define RE_PROP_UPPER 0x090001
|
||||
#define RE_PROP_WORD 0x4D0001
|
||||
#define RE_PROP_XDIGIT 0x4E0001
|
||||
#define RE_PROP_WORD 0x4B0001
|
||||
#define RE_PROP_XDIGIT 0x4C0001
|
||||
|
||||
#define RE_BREAK_OTHER 0
|
||||
#define RE_BREAK_DOUBLEQUOTE 1
|
||||
@ -130,11 +130,11 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
||||
#define RE_GBREAK_LVT 11
|
||||
#define RE_GBREAK_PREPEND 12
|
||||
|
||||
extern char* re_strings[1155];
|
||||
extern RE_Property re_properties[145];
|
||||
extern RE_PropertyValue re_property_values[1244];
|
||||
extern char* re_strings[1257];
|
||||
extern RE_Property re_properties[143];
|
||||
extern RE_PropertyValue re_property_values[1372];
|
||||
extern RE_UINT16 re_expand_on_folding[104];
|
||||
extern RE_GetPropertyFunc re_get_property[79];
|
||||
extern RE_GetPropertyFunc re_get_property[77];
|
||||
|
||||
RE_UINT32 re_get_general_category(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_block(RE_UINT32 ch);
|
||||
@ -208,8 +208,6 @@ RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_alphanumeric(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_any(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_ascii(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_assigned(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_blank(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_graph(RE_UINT32 ch);
|
||||
RE_UINT32 re_get_print(RE_UINT32 ch);
|
||||
|
Loading…
x
Reference in New Issue
Block a user