mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Update regex engine (fixes a thread safety bug)
This commit is contained in:
parent
c04db5b1ff
commit
b51b73b530
@ -225,7 +225,7 @@ __all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match",
|
|||||||
"V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
|
"V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
|
||||||
"Regex"]
|
"Regex"]
|
||||||
|
|
||||||
__version__ = "2.4.61"
|
__version__ = "2.4.64"
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# Public interface.
|
# Public interface.
|
||||||
|
@ -1194,6 +1194,18 @@ Py_LOCAL_INLINE(BOOL) locale_has_property(RE_LocaleInfo* locale_info, RE_CODE
|
|||||||
case RE_PROP_LOWER >> 16:
|
case RE_PROP_LOWER >> 16:
|
||||||
v = locale_islower(locale_info, ch);
|
v = locale_islower(locale_info, ch);
|
||||||
break;
|
break;
|
||||||
|
case RE_PROP_POSIX_ALNUM >> 16:
|
||||||
|
v = re_get_posix_alnum(ch) != 0;
|
||||||
|
break;
|
||||||
|
case RE_PROP_POSIX_DIGIT >> 16:
|
||||||
|
v = re_get_posix_digit(ch) != 0;
|
||||||
|
break;
|
||||||
|
case RE_PROP_POSIX_PUNCT >> 16:
|
||||||
|
v = re_get_posix_punct(ch) != 0;
|
||||||
|
break;
|
||||||
|
case RE_PROP_POSIX_XDIGIT >> 16:
|
||||||
|
v = re_get_posix_xdigit(ch) != 0;
|
||||||
|
break;
|
||||||
case RE_PROP_PRINT >> 16:
|
case RE_PROP_PRINT >> 16:
|
||||||
v = locale_isprint(locale_info, ch);
|
v = locale_isprint(locale_info, ch);
|
||||||
break;
|
break;
|
||||||
@ -19101,8 +19113,8 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject*
|
|||||||
#if PY_VERSION_HEX >= 0x02060000
|
#if PY_VERSION_HEX >= 0x02060000
|
||||||
BOOL built_capture = FALSE;
|
BOOL built_capture = FALSE;
|
||||||
#endif
|
#endif
|
||||||
PyObject* args = NULL;
|
PyObject* args;
|
||||||
PyObject* kwargs = NULL;
|
PyObject* kwargs;
|
||||||
Py_ssize_t end_pos;
|
Py_ssize_t end_pos;
|
||||||
|
|
||||||
/* Get the string. */
|
/* Get the string. */
|
||||||
|
@ -262,7 +262,7 @@ def _shrink_cache(cache_dict, args_dict, locale_sensitive, max_length, divisor=5
|
|||||||
# Rebuild the arguments and locale-sensitivity dictionaries.
|
# Rebuild the arguments and locale-sensitivity dictionaries.
|
||||||
args_dict.clear()
|
args_dict.clear()
|
||||||
sensitivity_dict = {}
|
sensitivity_dict = {}
|
||||||
for pattern, pattern_type, flags, args, default_version, locale in cache_dict:
|
for pattern, pattern_type, flags, args, default_version, locale in tuple(cache_dict):
|
||||||
args_dict[pattern, pattern_type, flags, default_version, locale] = args
|
args_dict[pattern, pattern_type, flags, default_version, locale] = args
|
||||||
try:
|
try:
|
||||||
sensitivity_dict[pattern_type, pattern] = locale_sensitive[pattern_type, pattern]
|
sensitivity_dict[pattern_type, pattern] = locale_sensitive[pattern_type, pattern]
|
||||||
@ -292,6 +292,9 @@ def _compile_firstset(info, fs):
|
|||||||
# If we ignore the case, for simplicity we won't build a firstset.
|
# If we ignore the case, for simplicity we won't build a firstset.
|
||||||
members = set()
|
members = set()
|
||||||
for i in fs:
|
for i in fs:
|
||||||
|
if isinstance(i, Character) and not i.positive:
|
||||||
|
return []
|
||||||
|
|
||||||
if i.case_flags:
|
if i.case_flags:
|
||||||
if isinstance(i, Character):
|
if isinstance(i, Character):
|
||||||
if is_cased(info, i.value):
|
if is_cased(info, i.value):
|
||||||
@ -1476,7 +1479,7 @@ def parse_posix_class(source, info):
|
|||||||
if not source.match(":]"):
|
if not source.match(":]"):
|
||||||
raise ParseError()
|
raise ParseError()
|
||||||
|
|
||||||
return lookup_property(prop_name, name, not negate, source)
|
return lookup_property(prop_name, name, not negate, source, posix=True)
|
||||||
|
|
||||||
def float_to_rational(flt):
|
def float_to_rational(flt):
|
||||||
"Converts a float to a rational pair."
|
"Converts a float to a rational pair."
|
||||||
@ -1517,7 +1520,9 @@ def standardise_name(name):
|
|||||||
except (ValueError, ZeroDivisionError):
|
except (ValueError, ZeroDivisionError):
|
||||||
return "".join(ch for ch in name if ch not in "_- ").upper()
|
return "".join(ch for ch in name if ch not in "_- ").upper()
|
||||||
|
|
||||||
def lookup_property(property, value, positive, source=None):
|
_posix_classes = set('ALNUM DIGIT PUNCT XDIGIT'.split())
|
||||||
|
|
||||||
|
def lookup_property(property, value, positive, source=None, posix=False):
|
||||||
"Looks up a property."
|
"Looks up a property."
|
||||||
# Normalise the names (which may still be lists).
|
# Normalise the names (which may still be lists).
|
||||||
property = standardise_name(property) if property else None
|
property = standardise_name(property) if property else None
|
||||||
@ -1526,6 +1531,9 @@ def lookup_property(property, value, positive, source=None):
|
|||||||
if (property, value) == ("GENERALCATEGORY", "ASSIGNED"):
|
if (property, value) == ("GENERALCATEGORY", "ASSIGNED"):
|
||||||
property, value, positive = "GENERALCATEGORY", "UNASSIGNED", not positive
|
property, value, positive = "GENERALCATEGORY", "UNASSIGNED", not positive
|
||||||
|
|
||||||
|
if posix and not property and value.upper() in _posix_classes:
|
||||||
|
value = 'POSIX' + value
|
||||||
|
|
||||||
if property:
|
if property:
|
||||||
# Both the property and the value are provided.
|
# Both the property and the value are provided.
|
||||||
prop = PROPERTIES.get(property)
|
prop = PROPERTIES.get(property)
|
||||||
@ -2650,11 +2658,10 @@ class Grapheme(RegexBase):
|
|||||||
def _compile(self, reverse, fuzzy):
|
def _compile(self, reverse, fuzzy):
|
||||||
# Match at least 1 character until a grapheme boundary is reached. Note
|
# Match at least 1 character until a grapheme boundary is reached. Note
|
||||||
# that this is the same whether matching forwards or backwards.
|
# that this is the same whether matching forwards or backwards.
|
||||||
character_matcher = LazyRepeat(AnyAll(), 1, None).compile(reverse,
|
grapheme_matcher = Atomic(Sequence([LazyRepeat(AnyAll(), 1, None),
|
||||||
fuzzy)
|
GraphemeBoundary()]))
|
||||||
boundary_matcher = [(OP.GRAPHEME_BOUNDARY, 1)]
|
|
||||||
|
|
||||||
return character_matcher + boundary_matcher
|
return grapheme_matcher.compile(reverse, fuzzy)
|
||||||
|
|
||||||
def _dump(self, indent, reverse):
|
def _dump(self, indent, reverse):
|
||||||
print "%sGRAPHEME" % (INDENT * indent)
|
print "%sGRAPHEME" % (INDENT * indent)
|
||||||
@ -2662,6 +2669,10 @@ class Grapheme(RegexBase):
|
|||||||
def max_width(self):
|
def max_width(self):
|
||||||
return UNLIMITED
|
return UNLIMITED
|
||||||
|
|
||||||
|
class GraphemeBoundary:
|
||||||
|
def compile(self, reverse, fuzzy):
|
||||||
|
return [(OP.GRAPHEME_BOUNDARY, 1)]
|
||||||
|
|
||||||
class GreedyRepeat(RegexBase):
|
class GreedyRepeat(RegexBase):
|
||||||
_opcode = OP.GREEDY_REPEAT
|
_opcode = OP.GREEDY_REPEAT
|
||||||
_op_name = "GREEDY_REPEAT"
|
_op_name = "GREEDY_REPEAT"
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -97,6 +97,10 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
|||||||
#define RE_PROP_UPPER 0x090001
|
#define RE_PROP_UPPER 0x090001
|
||||||
#define RE_PROP_WORD 0x4B0001
|
#define RE_PROP_WORD 0x4B0001
|
||||||
#define RE_PROP_XDIGIT 0x4C0001
|
#define RE_PROP_XDIGIT 0x4C0001
|
||||||
|
#define RE_PROP_POSIX_ALNUM 0x4E0001
|
||||||
|
#define RE_PROP_POSIX_DIGIT 0x4D0001
|
||||||
|
#define RE_PROP_POSIX_PUNCT 0x4F0001
|
||||||
|
#define RE_PROP_POSIX_XDIGIT 0x500001
|
||||||
|
|
||||||
#define RE_BREAK_OTHER 0
|
#define RE_BREAK_OTHER 0
|
||||||
#define RE_BREAK_DOUBLEQUOTE 1
|
#define RE_BREAK_DOUBLEQUOTE 1
|
||||||
@ -130,11 +134,11 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
|||||||
#define RE_GBREAK_LVT 11
|
#define RE_GBREAK_LVT 11
|
||||||
#define RE_GBREAK_PREPEND 12
|
#define RE_GBREAK_PREPEND 12
|
||||||
|
|
||||||
extern char* re_strings[1257];
|
extern char* re_strings[1261];
|
||||||
extern RE_Property re_properties[143];
|
extern RE_Property re_properties[147];
|
||||||
extern RE_PropertyValue re_property_values[1372];
|
extern RE_PropertyValue re_property_values[1372];
|
||||||
extern RE_UINT16 re_expand_on_folding[104];
|
extern RE_UINT16 re_expand_on_folding[104];
|
||||||
extern RE_GetPropertyFunc re_get_property[77];
|
extern RE_GetPropertyFunc re_get_property[81];
|
||||||
|
|
||||||
RE_UINT32 re_get_general_category(RE_UINT32 ch);
|
RE_UINT32 re_get_general_category(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_block(RE_UINT32 ch);
|
RE_UINT32 re_get_block(RE_UINT32 ch);
|
||||||
@ -213,6 +217,10 @@ RE_UINT32 re_get_graph(RE_UINT32 ch);
|
|||||||
RE_UINT32 re_get_print(RE_UINT32 ch);
|
RE_UINT32 re_get_print(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_word(RE_UINT32 ch);
|
RE_UINT32 re_get_word(RE_UINT32 ch);
|
||||||
RE_UINT32 re_get_xdigit(RE_UINT32 ch);
|
RE_UINT32 re_get_xdigit(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_posix_digit(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_posix_alnum(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_posix_punct(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_posix_xdigit(RE_UINT32 ch);
|
||||||
int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints);
|
int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints);
|
||||||
RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch);
|
RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch);
|
||||||
int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints);
|
int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user