mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update regex module
This commit is contained in:
parent
eb4a6f89fe
commit
f41faa558a
@ -225,7 +225,7 @@ __all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match",
|
|||||||
"V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
|
"V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
|
||||||
"Regex"]
|
"Regex"]
|
||||||
|
|
||||||
__version__ = "2.4.37"
|
__version__ = "2.4.39"
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# Public interface.
|
# Public interface.
|
||||||
|
@ -222,9 +222,6 @@ typedef struct RE_State* RE_StatePtr;
|
|||||||
/* Handlers for ASCII, locale and Unicode. */
|
/* Handlers for ASCII, locale and Unicode. */
|
||||||
typedef struct RE_EncodingTable {
|
typedef struct RE_EncodingTable {
|
||||||
BOOL (*has_property)(RE_CODE property, Py_UCS4 ch);
|
BOOL (*has_property)(RE_CODE property, Py_UCS4 ch);
|
||||||
Py_UCS4 (*lower)(Py_UCS4 ch);
|
|
||||||
Py_UCS4 (*upper)(Py_UCS4 ch);
|
|
||||||
Py_UCS4 (*title)(Py_UCS4 ch);
|
|
||||||
BOOL (*at_boundary)(RE_StatePtr state, Py_ssize_t text_pos);
|
BOOL (*at_boundary)(RE_StatePtr state, Py_ssize_t text_pos);
|
||||||
BOOL (*at_word_start)(RE_StatePtr state, Py_ssize_t text_pos);
|
BOOL (*at_word_start)(RE_StatePtr state, Py_ssize_t text_pos);
|
||||||
BOOL (*at_word_end)(RE_StatePtr state, Py_ssize_t text_pos);
|
BOOL (*at_word_end)(RE_StatePtr state, Py_ssize_t text_pos);
|
||||||
@ -774,22 +771,6 @@ static BOOL ascii_has_property(RE_CODE property, Py_UCS4 ch) {
|
|||||||
return unicode_has_property(property, ch);
|
return unicode_has_property(property, ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Converts a character to lowercase. */
|
|
||||||
static Py_UCS4 ascii_lower(Py_UCS4 ch) {
|
|
||||||
if (ch > RE_ASCII_MAX || !re_get_uppercase(ch))
|
|
||||||
return ch;
|
|
||||||
|
|
||||||
return ch ^ 0x20;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Converts a character to uppercase. */
|
|
||||||
static Py_UCS4 ascii_upper(Py_UCS4 ch) {
|
|
||||||
if (ch > RE_ASCII_MAX || !re_get_lowercase(ch))
|
|
||||||
return ch;
|
|
||||||
|
|
||||||
return ch ^ 0x20;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Checks whether the current text position is on a word boundary. */
|
/* Checks whether the current text position is on a word boundary. */
|
||||||
static BOOL ascii_at_boundary(RE_State* state, Py_ssize_t text_pos) {
|
static BOOL ascii_at_boundary(RE_State* state, Py_ssize_t text_pos) {
|
||||||
BOOL before;
|
BOOL before;
|
||||||
@ -932,9 +913,6 @@ static int ascii_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) {
|
|||||||
/* The handlers for ASCII characters. */
|
/* The handlers for ASCII characters. */
|
||||||
static RE_EncodingTable ascii_encoding = {
|
static RE_EncodingTable ascii_encoding = {
|
||||||
ascii_has_property,
|
ascii_has_property,
|
||||||
ascii_lower,
|
|
||||||
ascii_upper,
|
|
||||||
ascii_upper, /* For ASCII, titlecase is the same as uppercase. */
|
|
||||||
ascii_at_boundary,
|
ascii_at_boundary,
|
||||||
ascii_at_word_start,
|
ascii_at_word_start,
|
||||||
ascii_at_word_end,
|
ascii_at_word_end,
|
||||||
@ -1035,22 +1013,6 @@ static BOOL locale_has_property(RE_CODE property, Py_UCS4 ch) {
|
|||||||
return v == value;
|
return v == value;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Converts a character to lowercase. */
|
|
||||||
static Py_UCS4 locale_lower(Py_UCS4 ch) {
|
|
||||||
if (ch > RE_LOCALE_MAX)
|
|
||||||
return ch;
|
|
||||||
|
|
||||||
return tolower(ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Converts a character to uppercase. */
|
|
||||||
static Py_UCS4 locale_upper(Py_UCS4 ch) {
|
|
||||||
if (ch > RE_LOCALE_MAX)
|
|
||||||
return ch;
|
|
||||||
|
|
||||||
return toupper(ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Checks whether the current text position is on a word boundary. */
|
/* Checks whether the current text position is on a word boundary. */
|
||||||
static BOOL locale_at_boundary(RE_State* state, Py_ssize_t text_pos) {
|
static BOOL locale_at_boundary(RE_State* state, Py_ssize_t text_pos) {
|
||||||
BOOL before;
|
BOOL before;
|
||||||
@ -1166,9 +1128,6 @@ static int locale_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) {
|
|||||||
/* The handlers for locale characters. */
|
/* The handlers for locale characters. */
|
||||||
static RE_EncodingTable locale_encoding = {
|
static RE_EncodingTable locale_encoding = {
|
||||||
locale_has_property,
|
locale_has_property,
|
||||||
locale_lower,
|
|
||||||
locale_upper,
|
|
||||||
locale_upper, /* For locale, titlecase is the same as uppercase. */
|
|
||||||
locale_at_boundary,
|
locale_at_boundary,
|
||||||
locale_at_word_start,
|
locale_at_word_start,
|
||||||
locale_at_word_end,
|
locale_at_word_end,
|
||||||
@ -1226,21 +1185,6 @@ static BOOL unicode_has_property(RE_CODE property, Py_UCS4 ch) {
|
|||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Converts a Unicode character to lowercase. */
|
|
||||||
static Py_UCS4 unicode_lower(Py_UCS4 ch) {
|
|
||||||
return Py_UNICODE_TOLOWER((Py_UCS4)ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Converts a Unicode character to uppercase. */
|
|
||||||
static Py_UCS4 unicode_upper(Py_UCS4 ch) {
|
|
||||||
return Py_UNICODE_TOUPPER((Py_UCS4)ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Converts a Unicode character to titlecase. */
|
|
||||||
static Py_UCS4 unicode_title(Py_UCS4 ch) {
|
|
||||||
return Py_UNICODE_TOTITLE((Py_UCS4)ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Checks whether the current text position is on a word boundary. */
|
/* Checks whether the current text position is on a word boundary. */
|
||||||
static BOOL unicode_at_boundary(RE_State* state, Py_ssize_t text_pos) {
|
static BOOL unicode_at_boundary(RE_State* state, Py_ssize_t text_pos) {
|
||||||
BOOL before;
|
BOOL before;
|
||||||
@ -1285,7 +1229,7 @@ static BOOL unicode_at_word_end(RE_State* state, Py_ssize_t text_pos) {
|
|||||||
* Only a limited number are treated as vowels.
|
* Only a limited number are treated as vowels.
|
||||||
*/
|
*/
|
||||||
Py_LOCAL_INLINE(BOOL) is_unicode_vowel(Py_UCS4 ch) {
|
Py_LOCAL_INLINE(BOOL) is_unicode_vowel(Py_UCS4 ch) {
|
||||||
switch (unicode_lower(ch)) {
|
switch (Py_UNICODE_TOLOWER(ch)) {
|
||||||
case 'a': case 0xE0: case 0xE1: case 0xE2:
|
case 'a': case 0xE0: case 0xE1: case 0xE2:
|
||||||
case 'e': case 0xE8: case 0xE9: case 0xEA:
|
case 'e': case 0xE8: case 0xE9: case 0xEA:
|
||||||
case 'i': case 0xEC: case 0xED: case 0xEE:
|
case 'i': case 0xEC: case 0xED: case 0xEE:
|
||||||
@ -1773,9 +1717,6 @@ static int unicode_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) {
|
|||||||
/* The handlers for Unicode characters. */
|
/* The handlers for Unicode characters. */
|
||||||
static RE_EncodingTable unicode_encoding = {
|
static RE_EncodingTable unicode_encoding = {
|
||||||
unicode_has_property,
|
unicode_has_property,
|
||||||
unicode_lower,
|
|
||||||
unicode_upper,
|
|
||||||
unicode_title,
|
|
||||||
unicode_at_boundary,
|
unicode_at_boundary,
|
||||||
unicode_at_word_start,
|
unicode_at_word_start,
|
||||||
unicode_at_word_end,
|
unicode_at_word_end,
|
||||||
@ -8521,6 +8462,7 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string_fld(RE_SafeState* safe_state,
|
|||||||
new_node = bt_data->fuzzy_string.position.node;
|
new_node = bt_data->fuzzy_string.position.node;
|
||||||
data.new_string_pos = bt_data->fuzzy_string.string_pos;
|
data.new_string_pos = bt_data->fuzzy_string.string_pos;
|
||||||
data.new_folded_pos = bt_data->fuzzy_string.folded_pos;
|
data.new_folded_pos = bt_data->fuzzy_string.folded_pos;
|
||||||
|
data.folded_len = bt_data->fuzzy_string.folded_len;
|
||||||
data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type;
|
data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type;
|
||||||
data.step = bt_data->fuzzy_string.step;
|
data.step = bt_data->fuzzy_string.step;
|
||||||
|
|
||||||
@ -8701,6 +8643,7 @@ Py_LOCAL_INLINE(BOOL) retry_fuzzy_match_string_fld2(RE_SafeState* safe_state,
|
|||||||
new_node = bt_data->fuzzy_string.position.node;
|
new_node = bt_data->fuzzy_string.position.node;
|
||||||
new_group_pos = bt_data->fuzzy_string.string_pos;
|
new_group_pos = bt_data->fuzzy_string.string_pos;
|
||||||
data.new_folded_pos = bt_data->fuzzy_string.folded_pos;
|
data.new_folded_pos = bt_data->fuzzy_string.folded_pos;
|
||||||
|
data.folded_len = bt_data->fuzzy_string.folded_len;
|
||||||
data.new_gfolded_pos = bt_data->fuzzy_string.gfolded_pos;
|
data.new_gfolded_pos = bt_data->fuzzy_string.gfolded_pos;
|
||||||
data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type;
|
data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type;
|
||||||
data.step = bt_data->fuzzy_string.step;
|
data.step = bt_data->fuzzy_string.step;
|
||||||
@ -10953,9 +10896,9 @@ advance:
|
|||||||
goto backtrack;
|
goto backtrack;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
string_pos = -1;
|
string_pos = -1;
|
||||||
}
|
|
||||||
|
|
||||||
/* Successful match. */
|
/* Successful match. */
|
||||||
node = node->next_1.node;
|
node = node->next_1.node;
|
||||||
@ -11057,12 +11000,10 @@ advance:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (folded_pos < folded_len) {
|
|
||||||
string_pos = -1;
|
string_pos = -1;
|
||||||
goto backtrack;
|
|
||||||
}
|
|
||||||
|
|
||||||
string_pos = -1;
|
if (folded_pos < folded_len)
|
||||||
|
goto backtrack;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Successful match. */
|
/* Successful match. */
|
||||||
@ -11165,12 +11106,10 @@ advance:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (folded_pos > 0) {
|
|
||||||
string_pos = -1;
|
string_pos = -1;
|
||||||
goto backtrack;
|
|
||||||
}
|
|
||||||
|
|
||||||
string_pos = -1;
|
if (folded_pos > 0)
|
||||||
|
goto backtrack;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Successful match. */
|
/* Successful match. */
|
||||||
@ -11220,9 +11159,9 @@ advance:
|
|||||||
goto backtrack;
|
goto backtrack;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
string_pos = -1;
|
string_pos = -1;
|
||||||
}
|
|
||||||
|
|
||||||
/* Successful match. */
|
/* Successful match. */
|
||||||
node = node->next_1.node;
|
node = node->next_1.node;
|
||||||
@ -11271,9 +11210,9 @@ advance:
|
|||||||
goto backtrack;
|
goto backtrack;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
string_pos = -1;
|
string_pos = -1;
|
||||||
}
|
|
||||||
|
|
||||||
/* Successful match. */
|
/* Successful match. */
|
||||||
node = node->next_1.node;
|
node = node->next_1.node;
|
||||||
@ -11322,9 +11261,9 @@ advance:
|
|||||||
goto backtrack;
|
goto backtrack;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
string_pos = -1;
|
string_pos = -1;
|
||||||
}
|
|
||||||
|
|
||||||
/* Successful match. */
|
/* Successful match. */
|
||||||
node = node->next_1.node;
|
node = node->next_1.node;
|
||||||
@ -14494,7 +14433,7 @@ Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) {
|
|||||||
* Returns its length if it is a literal, otherwise -1.
|
* Returns its length if it is a literal, otherwise -1.
|
||||||
*/
|
*/
|
||||||
Py_LOCAL_INLINE(Py_ssize_t) check_replacement_string(PyObject* str_replacement,
|
Py_LOCAL_INLINE(Py_ssize_t) check_replacement_string(PyObject* str_replacement,
|
||||||
char special_char) {
|
unsigned char special_char) {
|
||||||
RE_StringInfo str_info;
|
RE_StringInfo str_info;
|
||||||
Py_UCS4 (*char_at)(void* text, Py_ssize_t pos);
|
Py_UCS4 (*char_at)(void* text, Py_ssize_t pos);
|
||||||
Py_ssize_t pos;
|
Py_ssize_t pos;
|
||||||
@ -16951,6 +16890,94 @@ Py_LOCAL_INLINE(BOOL) append_string(PyObject* list, char* string) {
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Appends a (decimal) integer to a list. */
|
||||||
|
Py_LOCAL_INLINE(BOOL) append_integer(PyObject* list, Py_ssize_t value) {
|
||||||
|
PyObject* int_obj;
|
||||||
|
PyObject* repr_obj;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
int_obj = Py_BuildValue("n", value);
|
||||||
|
if (!int_obj)
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
repr_obj = PyObject_Repr(int_obj);
|
||||||
|
Py_DECREF(int_obj);
|
||||||
|
if (!repr_obj)
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
status = PyList_Append(list, repr_obj);
|
||||||
|
Py_DECREF(repr_obj);
|
||||||
|
if (status < 0)
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* MatchObject's '__repr__' method. */
|
||||||
|
static PyObject* match_repr(PyObject* self_) {
|
||||||
|
MatchObject* self;
|
||||||
|
PyObject* list;
|
||||||
|
PyObject* matched_substring;
|
||||||
|
PyObject* matched_repr;
|
||||||
|
int status;
|
||||||
|
PyObject* separator;
|
||||||
|
PyObject* result;
|
||||||
|
|
||||||
|
self = (MatchObject*)self_;
|
||||||
|
|
||||||
|
list = PyList_New(0);
|
||||||
|
if (!list)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!append_string(list, "<regex.Match object; span=("))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
if (!append_integer(list, self->match_start))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
if (! append_string(list, ", "))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
if (!append_integer(list, self->match_end))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
if (!append_string(list, "), match="))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
matched_substring = get_slice(self->substring, self->match_start -
|
||||||
|
self->substring_offset, self->match_end - self->substring_offset);
|
||||||
|
if (!matched_substring)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
matched_repr = PyObject_Repr(matched_substring);
|
||||||
|
Py_DECREF(matched_substring);
|
||||||
|
if (!matched_repr)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
status = PyList_Append(list, matched_repr);
|
||||||
|
Py_DECREF(matched_repr);
|
||||||
|
if (status < 0)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
if (! append_string(list, ">"))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
separator = Py_BuildValue("s", "");
|
||||||
|
if (!separator)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
result = PyUnicode_Join(separator, list);
|
||||||
|
Py_DECREF(separator);
|
||||||
|
|
||||||
|
Py_DECREF(list);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
|
||||||
|
error:
|
||||||
|
Py_DECREF(list);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* PatternObject's '__repr__' method. */
|
/* PatternObject's '__repr__' method. */
|
||||||
static PyObject* pattern_repr(PyObject* self_) {
|
static PyObject* pattern_repr(PyObject* self_) {
|
||||||
PatternObject* self;
|
PatternObject* self;
|
||||||
@ -19873,6 +19900,7 @@ PyMODINIT_FUNC init_regex(void) {
|
|||||||
|
|
||||||
/* Initialise Match_Type. */
|
/* Initialise Match_Type. */
|
||||||
Match_Type.tp_dealloc = match_dealloc;
|
Match_Type.tp_dealloc = match_dealloc;
|
||||||
|
Match_Type.tp_repr = match_repr;
|
||||||
Match_Type.tp_as_mapping = &match_as_mapping;
|
Match_Type.tp_as_mapping = &match_as_mapping;
|
||||||
Match_Type.tp_flags = Py_TPFLAGS_DEFAULT;
|
Match_Type.tp_flags = Py_TPFLAGS_DEFAULT;
|
||||||
Match_Type.tp_doc = match_doc;
|
Match_Type.tp_doc = match_doc;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user