mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	Update regex engine (fixes a thread safety bug)
This commit is contained in:
		
							parent
							
								
									c04db5b1ff
								
							
						
					
					
						commit
						b51b73b530
					
				@ -225,7 +225,7 @@ __all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match",
 | 
			
		||||
  "V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
 | 
			
		||||
  "Regex"]
 | 
			
		||||
 | 
			
		||||
__version__ = "2.4.61"
 | 
			
		||||
__version__ = "2.4.64"
 | 
			
		||||
 | 
			
		||||
# --------------------------------------------------------------------
 | 
			
		||||
# Public interface.
 | 
			
		||||
 | 
			
		||||
@ -1194,6 +1194,18 @@ Py_LOCAL_INLINE(BOOL) locale_has_property(RE_LocaleInfo* locale_info, RE_CODE
 | 
			
		||||
    case RE_PROP_LOWER >> 16:
 | 
			
		||||
        v = locale_islower(locale_info, ch);
 | 
			
		||||
        break;
 | 
			
		||||
    case RE_PROP_POSIX_ALNUM >> 16:
 | 
			
		||||
        v = re_get_posix_alnum(ch) != 0;
 | 
			
		||||
        break;
 | 
			
		||||
    case RE_PROP_POSIX_DIGIT >> 16:
 | 
			
		||||
        v = re_get_posix_digit(ch) != 0;
 | 
			
		||||
        break;
 | 
			
		||||
    case RE_PROP_POSIX_PUNCT >> 16:
 | 
			
		||||
        v = re_get_posix_punct(ch) != 0;
 | 
			
		||||
        break;
 | 
			
		||||
    case RE_PROP_POSIX_XDIGIT >> 16:
 | 
			
		||||
        v = re_get_posix_xdigit(ch) != 0;
 | 
			
		||||
        break;
 | 
			
		||||
    case RE_PROP_PRINT >> 16:
 | 
			
		||||
        v = locale_isprint(locale_info, ch);
 | 
			
		||||
        break;
 | 
			
		||||
@ -19101,8 +19113,8 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject*
 | 
			
		||||
#if PY_VERSION_HEX >= 0x02060000
 | 
			
		||||
    BOOL built_capture = FALSE;
 | 
			
		||||
#endif
 | 
			
		||||
    PyObject* args = NULL;
 | 
			
		||||
    PyObject* kwargs = NULL;
 | 
			
		||||
    PyObject* args;
 | 
			
		||||
    PyObject* kwargs;
 | 
			
		||||
    Py_ssize_t end_pos;
 | 
			
		||||
 | 
			
		||||
    /* Get the string. */
 | 
			
		||||
 | 
			
		||||
@ -262,7 +262,7 @@ def _shrink_cache(cache_dict, args_dict, locale_sensitive, max_length, divisor=5
 | 
			
		||||
    # Rebuild the arguments and locale-sensitivity dictionaries.
 | 
			
		||||
    args_dict.clear()
 | 
			
		||||
    sensitivity_dict = {}
 | 
			
		||||
    for pattern, pattern_type, flags, args, default_version, locale in cache_dict:
 | 
			
		||||
    for pattern, pattern_type, flags, args, default_version, locale in tuple(cache_dict):
 | 
			
		||||
        args_dict[pattern, pattern_type, flags, default_version, locale] = args
 | 
			
		||||
        try:
 | 
			
		||||
            sensitivity_dict[pattern_type, pattern] = locale_sensitive[pattern_type, pattern]
 | 
			
		||||
@ -292,6 +292,9 @@ def _compile_firstset(info, fs):
 | 
			
		||||
    # If we ignore the case, for simplicity we won't build a firstset.
 | 
			
		||||
    members = set()
 | 
			
		||||
    for i in fs:
 | 
			
		||||
        if isinstance(i, Character) and not i.positive:
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
        if i.case_flags:
 | 
			
		||||
            if isinstance(i, Character):
 | 
			
		||||
                if is_cased(info, i.value):
 | 
			
		||||
@ -1476,7 +1479,7 @@ def parse_posix_class(source, info):
 | 
			
		||||
    if not source.match(":]"):
 | 
			
		||||
        raise ParseError()
 | 
			
		||||
 | 
			
		||||
    return lookup_property(prop_name, name, not negate, source)
 | 
			
		||||
    return lookup_property(prop_name, name, not negate, source, posix=True)
 | 
			
		||||
 | 
			
		||||
def float_to_rational(flt):
 | 
			
		||||
    "Converts a float to a rational pair."
 | 
			
		||||
@ -1517,7 +1520,9 @@ def standardise_name(name):
 | 
			
		||||
    except (ValueError, ZeroDivisionError):
 | 
			
		||||
        return "".join(ch for ch in name if ch not in "_- ").upper()
 | 
			
		||||
 | 
			
		||||
def lookup_property(property, value, positive, source=None):
 | 
			
		||||
_posix_classes = set('ALNUM DIGIT PUNCT XDIGIT'.split())
 | 
			
		||||
 | 
			
		||||
def lookup_property(property, value, positive, source=None, posix=False):
 | 
			
		||||
    "Looks up a property."
 | 
			
		||||
    # Normalise the names (which may still be lists).
 | 
			
		||||
    property = standardise_name(property) if property else None
 | 
			
		||||
@ -1526,6 +1531,9 @@ def lookup_property(property, value, positive, source=None):
 | 
			
		||||
    if (property, value) == ("GENERALCATEGORY", "ASSIGNED"):
 | 
			
		||||
        property, value, positive = "GENERALCATEGORY", "UNASSIGNED", not positive
 | 
			
		||||
 | 
			
		||||
    if posix and not property and value.upper() in _posix_classes:
 | 
			
		||||
        value = 'POSIX' + value
 | 
			
		||||
 | 
			
		||||
    if property:
 | 
			
		||||
        # Both the property and the value are provided.
 | 
			
		||||
        prop = PROPERTIES.get(property)
 | 
			
		||||
@ -2650,11 +2658,10 @@ class Grapheme(RegexBase):
 | 
			
		||||
    def _compile(self, reverse, fuzzy):
 | 
			
		||||
        # Match at least 1 character until a grapheme boundary is reached. Note
 | 
			
		||||
        # that this is the same whether matching forwards or backwards.
 | 
			
		||||
        character_matcher = LazyRepeat(AnyAll(), 1, None).compile(reverse,
 | 
			
		||||
          fuzzy)
 | 
			
		||||
        boundary_matcher = [(OP.GRAPHEME_BOUNDARY, 1)]
 | 
			
		||||
        grapheme_matcher = Atomic(Sequence([LazyRepeat(AnyAll(), 1, None),
 | 
			
		||||
          GraphemeBoundary()]))
 | 
			
		||||
 | 
			
		||||
        return character_matcher + boundary_matcher
 | 
			
		||||
        return grapheme_matcher.compile(reverse, fuzzy)
 | 
			
		||||
 | 
			
		||||
    def _dump(self, indent, reverse):
 | 
			
		||||
        print "%sGRAPHEME" % (INDENT * indent)
 | 
			
		||||
@ -2662,6 +2669,10 @@ class Grapheme(RegexBase):
 | 
			
		||||
    def max_width(self):
 | 
			
		||||
        return UNLIMITED
 | 
			
		||||
 | 
			
		||||
class GraphemeBoundary:
 | 
			
		||||
    def compile(self, reverse, fuzzy):
 | 
			
		||||
        return [(OP.GRAPHEME_BOUNDARY, 1)]
 | 
			
		||||
 | 
			
		||||
class GreedyRepeat(RegexBase):
 | 
			
		||||
    _opcode = OP.GREEDY_REPEAT
 | 
			
		||||
    _op_name = "GREEDY_REPEAT"
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -97,6 +97,10 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
 | 
			
		||||
#define RE_PROP_UPPER 0x090001
 | 
			
		||||
#define RE_PROP_WORD 0x4B0001
 | 
			
		||||
#define RE_PROP_XDIGIT 0x4C0001
 | 
			
		||||
#define RE_PROP_POSIX_ALNUM 0x4E0001
 | 
			
		||||
#define RE_PROP_POSIX_DIGIT 0x4D0001
 | 
			
		||||
#define RE_PROP_POSIX_PUNCT 0x4F0001
 | 
			
		||||
#define RE_PROP_POSIX_XDIGIT 0x500001
 | 
			
		||||
 | 
			
		||||
#define RE_BREAK_OTHER 0
 | 
			
		||||
#define RE_BREAK_DOUBLEQUOTE 1
 | 
			
		||||
@ -130,11 +134,11 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
 | 
			
		||||
#define RE_GBREAK_LVT 11
 | 
			
		||||
#define RE_GBREAK_PREPEND 12
 | 
			
		||||
 | 
			
		||||
extern char* re_strings[1257];
 | 
			
		||||
extern RE_Property re_properties[143];
 | 
			
		||||
extern char* re_strings[1261];
 | 
			
		||||
extern RE_Property re_properties[147];
 | 
			
		||||
extern RE_PropertyValue re_property_values[1372];
 | 
			
		||||
extern RE_UINT16 re_expand_on_folding[104];
 | 
			
		||||
extern RE_GetPropertyFunc re_get_property[77];
 | 
			
		||||
extern RE_GetPropertyFunc re_get_property[81];
 | 
			
		||||
 | 
			
		||||
RE_UINT32 re_get_general_category(RE_UINT32 ch);
 | 
			
		||||
RE_UINT32 re_get_block(RE_UINT32 ch);
 | 
			
		||||
@ -213,6 +217,10 @@ RE_UINT32 re_get_graph(RE_UINT32 ch);
 | 
			
		||||
RE_UINT32 re_get_print(RE_UINT32 ch);
 | 
			
		||||
RE_UINT32 re_get_word(RE_UINT32 ch);
 | 
			
		||||
RE_UINT32 re_get_xdigit(RE_UINT32 ch);
 | 
			
		||||
RE_UINT32 re_get_posix_digit(RE_UINT32 ch);
 | 
			
		||||
RE_UINT32 re_get_posix_alnum(RE_UINT32 ch);
 | 
			
		||||
RE_UINT32 re_get_posix_punct(RE_UINT32 ch);
 | 
			
		||||
RE_UINT32 re_get_posix_xdigit(RE_UINT32 ch);
 | 
			
		||||
int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints);
 | 
			
		||||
RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch);
 | 
			
		||||
int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user