mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update regex module
This commit is contained in:
parent
eb4a6f89fe
commit
f41faa558a
@ -225,7 +225,7 @@ __all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match",
|
||||
"V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
|
||||
"Regex"]
|
||||
|
||||
__version__ = "2.4.37"
|
||||
__version__ = "2.4.39"
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Public interface.
|
||||
|
@ -222,9 +222,6 @@ typedef struct RE_State* RE_StatePtr;
|
||||
/* Handlers for ASCII, locale and Unicode. */
|
||||
typedef struct RE_EncodingTable {
|
||||
BOOL (*has_property)(RE_CODE property, Py_UCS4 ch);
|
||||
Py_UCS4 (*lower)(Py_UCS4 ch);
|
||||
Py_UCS4 (*upper)(Py_UCS4 ch);
|
||||
Py_UCS4 (*title)(Py_UCS4 ch);
|
||||
BOOL (*at_boundary)(RE_StatePtr state, Py_ssize_t text_pos);
|
||||
BOOL (*at_word_start)(RE_StatePtr state, Py_ssize_t text_pos);
|
||||
BOOL (*at_word_end)(RE_StatePtr state, Py_ssize_t text_pos);
|
||||
@ -774,22 +771,6 @@ static BOOL ascii_has_property(RE_CODE property, Py_UCS4 ch) {
|
||||
return unicode_has_property(property, ch);
|
||||
}
|
||||
|
||||
/* Converts a character to lowercase. */
|
||||
static Py_UCS4 ascii_lower(Py_UCS4 ch) {
|
||||
if (ch > RE_ASCII_MAX || !re_get_uppercase(ch))
|
||||
return ch;
|
||||
|
||||
return ch ^ 0x20;
|
||||
}
|
||||
|
||||
/* Converts a character to uppercase. */
|
||||
static Py_UCS4 ascii_upper(Py_UCS4 ch) {
|
||||
if (ch > RE_ASCII_MAX || !re_get_lowercase(ch))
|
||||
return ch;
|
||||
|
||||
return ch ^ 0x20;
|
||||
}
|
||||
|
||||
/* Checks whether the current text position is on a word boundary. */
|
||||
static BOOL ascii_at_boundary(RE_State* state, Py_ssize_t text_pos) {
|
||||
BOOL before;
|
||||
@ -932,9 +913,6 @@ static int ascii_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) {
|
||||
/* The handlers for ASCII characters. */
|
||||
static RE_EncodingTable ascii_encoding = {
|
||||
ascii_has_property,
|
||||
ascii_lower,
|
||||
ascii_upper,
|
||||
ascii_upper, /* For ASCII, titlecase is the same as uppercase. */
|
||||
ascii_at_boundary,
|
||||
ascii_at_word_start,
|
||||
ascii_at_word_end,
|
||||
@ -1035,22 +1013,6 @@ static BOOL locale_has_property(RE_CODE property, Py_UCS4 ch) {
|
||||
return v == value;
|
||||
}
|
||||
|
||||
/* Converts a character to lowercase. */
|
||||
static Py_UCS4 locale_lower(Py_UCS4 ch) {
|
||||
if (ch > RE_LOCALE_MAX)
|
||||
return ch;
|
||||
|
||||
return tolower(ch);
|
||||
}
|
||||
|
||||
/* Converts a character to uppercase. */
|
||||
static Py_UCS4 locale_upper(Py_UCS4 ch) {
|
||||
if (ch > RE_LOCALE_MAX)
|
||||
return ch;
|
||||
|
||||
return toupper(ch);
|
||||
}
|
||||
|
||||
/* Checks whether the current text position is on a word boundary. */
|
||||
static BOOL locale_at_boundary(RE_State* state, Py_ssize_t text_pos) {
|
||||
BOOL before;
|
||||
@ -1166,9 +1128,6 @@ static int locale_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) {
|
||||
/* The handlers for locale characters. */
|
||||
static RE_EncodingTable locale_encoding = {
|
||||
locale_has_property,
|
||||
locale_lower,
|
||||
locale_upper,
|
||||
locale_upper, /* For locale, titlecase is the same as uppercase. */
|
||||
locale_at_boundary,
|
||||
locale_at_word_start,
|
||||
locale_at_word_end,
|
||||
@ -1226,21 +1185,6 @@ static BOOL unicode_has_property(RE_CODE property, Py_UCS4 ch) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Converts a Unicode character to lowercase. */
|
||||
static Py_UCS4 unicode_lower(Py_UCS4 ch) {
|
||||
return Py_UNICODE_TOLOWER((Py_UCS4)ch);
|
||||
}
|
||||
|
||||
/* Converts a Unicode character to uppercase. */
|
||||
static Py_UCS4 unicode_upper(Py_UCS4 ch) {
|
||||
return Py_UNICODE_TOUPPER((Py_UCS4)ch);
|
||||
}
|
||||
|
||||
/* Converts a Unicode character to titlecase. */
|
||||
static Py_UCS4 unicode_title(Py_UCS4 ch) {
|
||||
return Py_UNICODE_TOTITLE((Py_UCS4)ch);
|
||||
}
|
||||
|
||||
/* Checks whether the current text position is on a word boundary. */
|
||||
static BOOL unicode_at_boundary(RE_State* state, Py_ssize_t text_pos) {
|
||||
BOOL before;
|
||||
@ -1285,7 +1229,7 @@ static BOOL unicode_at_word_end(RE_State* state, Py_ssize_t text_pos) {
|
||||
* Only a limited number are treated as vowels.
|
||||
*/
|
||||
Py_LOCAL_INLINE(BOOL) is_unicode_vowel(Py_UCS4 ch) {
|
||||
switch (unicode_lower(ch)) {
|
||||
switch (Py_UNICODE_TOLOWER(ch)) {
|
||||
case 'a': case 0xE0: case 0xE1: case 0xE2:
|
||||
case 'e': case 0xE8: case 0xE9: case 0xEA:
|
||||
case 'i': case 0xEC: case 0xED: case 0xEE:
|
||||
@ -1773,9 +1717,6 @@ static int unicode_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) {
|
||||
/* The handlers for Unicode characters. */
|
||||
static RE_EncodingTable unicode_encoding = {
|
||||
unicode_has_property,
|
||||
unicode_lower,
|
||||
unicode_upper,
|
||||
unicode_title,
|
||||
unicode_at_boundary,
|
||||
unicode_at_word_start,
|
||||
unicode_at_word_end,
|
||||
@ -8521,6 +8462,7 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string_fld(RE_SafeState* safe_state,
|
||||
new_node = bt_data->fuzzy_string.position.node;
|
||||
data.new_string_pos = bt_data->fuzzy_string.string_pos;
|
||||
data.new_folded_pos = bt_data->fuzzy_string.folded_pos;
|
||||
data.folded_len = bt_data->fuzzy_string.folded_len;
|
||||
data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type;
|
||||
data.step = bt_data->fuzzy_string.step;
|
||||
|
||||
@ -8701,6 +8643,7 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string_fld2(RE_SafeState* safe_state,
|
||||
new_node = bt_data->fuzzy_string.position.node;
|
||||
new_group_pos = bt_data->fuzzy_string.string_pos;
|
||||
data.new_folded_pos = bt_data->fuzzy_string.folded_pos;
|
||||
data.folded_len = bt_data->fuzzy_string.folded_len;
|
||||
data.new_gfolded_pos = bt_data->fuzzy_string.gfolded_pos;
|
||||
data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type;
|
||||
data.step = bt_data->fuzzy_string.step;
|
||||
@ -10953,9 +10896,9 @@ advance:
|
||||
goto backtrack;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string_pos = -1;
|
||||
}
|
||||
|
||||
/* Successful match. */
|
||||
node = node->next_1.node;
|
||||
@ -11057,12 +11000,10 @@ advance:
|
||||
}
|
||||
}
|
||||
|
||||
if (folded_pos < folded_len) {
|
||||
string_pos = -1;
|
||||
goto backtrack;
|
||||
}
|
||||
|
||||
string_pos = -1;
|
||||
if (folded_pos < folded_len)
|
||||
goto backtrack;
|
||||
}
|
||||
|
||||
/* Successful match. */
|
||||
@ -11165,12 +11106,10 @@ advance:
|
||||
}
|
||||
}
|
||||
|
||||
if (folded_pos > 0) {
|
||||
string_pos = -1;
|
||||
goto backtrack;
|
||||
}
|
||||
|
||||
string_pos = -1;
|
||||
if (folded_pos > 0)
|
||||
goto backtrack;
|
||||
}
|
||||
|
||||
/* Successful match. */
|
||||
@ -11220,9 +11159,9 @@ advance:
|
||||
goto backtrack;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string_pos = -1;
|
||||
}
|
||||
|
||||
/* Successful match. */
|
||||
node = node->next_1.node;
|
||||
@ -11271,9 +11210,9 @@ advance:
|
||||
goto backtrack;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string_pos = -1;
|
||||
}
|
||||
|
||||
/* Successful match. */
|
||||
node = node->next_1.node;
|
||||
@ -11322,9 +11261,9 @@ advance:
|
||||
goto backtrack;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string_pos = -1;
|
||||
}
|
||||
|
||||
/* Successful match. */
|
||||
node = node->next_1.node;
|
||||
@ -14494,7 +14433,7 @@ Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) {
|
||||
* Returns its length if it is a literal, otherwise -1.
|
||||
*/
|
||||
Py_LOCAL_INLINE(Py_ssize_t) check_replacement_string(PyObject* str_replacement,
|
||||
char special_char) {
|
||||
unsigned char special_char) {
|
||||
RE_StringInfo str_info;
|
||||
Py_UCS4 (*char_at)(void* text, Py_ssize_t pos);
|
||||
Py_ssize_t pos;
|
||||
@ -16951,6 +16890,94 @@ Py_LOCAL_INLINE(BOOL) append_string(PyObject* list, char* string) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Appends a (decimal) integer to a list. */
|
||||
Py_LOCAL_INLINE(BOOL) append_integer(PyObject* list, Py_ssize_t value) {
|
||||
PyObject* int_obj;
|
||||
PyObject* repr_obj;
|
||||
int status;
|
||||
|
||||
int_obj = Py_BuildValue("n", value);
|
||||
if (!int_obj)
|
||||
return FALSE;
|
||||
|
||||
repr_obj = PyObject_Repr(int_obj);
|
||||
Py_DECREF(int_obj);
|
||||
if (!repr_obj)
|
||||
return FALSE;
|
||||
|
||||
status = PyList_Append(list, repr_obj);
|
||||
Py_DECREF(repr_obj);
|
||||
if (status < 0)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* MatchObject's '__repr__' method. */
|
||||
static PyObject* match_repr(PyObject* self_) {
|
||||
MatchObject* self;
|
||||
PyObject* list;
|
||||
PyObject* matched_substring;
|
||||
PyObject* matched_repr;
|
||||
int status;
|
||||
PyObject* separator;
|
||||
PyObject* result;
|
||||
|
||||
self = (MatchObject*)self_;
|
||||
|
||||
list = PyList_New(0);
|
||||
if (!list)
|
||||
return NULL;
|
||||
|
||||
if (!append_string(list, "<regex.Match object; span=("))
|
||||
goto error;
|
||||
|
||||
if (!append_integer(list, self->match_start))
|
||||
goto error;
|
||||
|
||||
if (! append_string(list, ", "))
|
||||
goto error;
|
||||
|
||||
if (!append_integer(list, self->match_end))
|
||||
goto error;
|
||||
|
||||
if (!append_string(list, "), match="))
|
||||
goto error;
|
||||
|
||||
matched_substring = get_slice(self->substring, self->match_start -
|
||||
self->substring_offset, self->match_end - self->substring_offset);
|
||||
if (!matched_substring)
|
||||
goto error;
|
||||
|
||||
matched_repr = PyObject_Repr(matched_substring);
|
||||
Py_DECREF(matched_substring);
|
||||
if (!matched_repr)
|
||||
goto error;
|
||||
|
||||
status = PyList_Append(list, matched_repr);
|
||||
Py_DECREF(matched_repr);
|
||||
if (status < 0)
|
||||
goto error;
|
||||
|
||||
if (! append_string(list, ">"))
|
||||
goto error;
|
||||
|
||||
separator = Py_BuildValue("s", "");
|
||||
if (!separator)
|
||||
goto error;
|
||||
|
||||
result = PyUnicode_Join(separator, list);
|
||||
Py_DECREF(separator);
|
||||
|
||||
Py_DECREF(list);
|
||||
|
||||
return result;
|
||||
|
||||
error:
|
||||
Py_DECREF(list);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* PatternObject's '__repr__' method. */
|
||||
static PyObject* pattern_repr(PyObject* self_) {
|
||||
PatternObject* self;
|
||||
@ -19873,6 +19900,7 @@ PyMODINIT_FUNC init_regex(void) {
|
||||
|
||||
/* Initialise Match_Type. */
|
||||
Match_Type.tp_dealloc = match_dealloc;
|
||||
Match_Type.tp_repr = match_repr;
|
||||
Match_Type.tp_as_mapping = &match_as_mapping;
|
||||
Match_Type.tp_flags = Py_TPFLAGS_DEFAULT;
|
||||
Match_Type.tp_doc = match_doc;
|
||||
|
Loading…
x
Reference in New Issue
Block a user