mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement find_all for ICU collators
This commit is contained in:
parent
50f4b86f9e
commit
8df81bb212
@ -218,7 +218,7 @@ icu_Collator_find(icu_Collator *self, PyObject *args) {
|
|||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
UStringSearch *search = NULL;
|
UStringSearch *search = NULL;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;
|
if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL;
|
||||||
|
|
||||||
a = python_to_icu(a_, &asz);
|
a = python_to_icu(a_, &asz);
|
||||||
if (a == NULL) goto end;
|
if (a == NULL) goto end;
|
||||||
@ -245,6 +245,44 @@ end:
|
|||||||
return (PyErr_Occurred()) ? NULL : Py_BuildValue("ll", (long)pos, (long)length);
|
return (PyErr_Occurred()) ? NULL : Py_BuildValue("ll", (long)pos, (long)length);
|
||||||
} // }}}
|
} // }}}
|
||||||
|
|
||||||
|
// Collator.find_all {{{
|
||||||
|
static PyObject *
|
||||||
|
icu_Collator_find_all(icu_Collator *self, PyObject *args) {
|
||||||
|
PyObject *a_ = NULL, *b_ = NULL, *callback;
|
||||||
|
UChar *a = NULL, *b = NULL;
|
||||||
|
int32_t asz = 0, bsz = 0, pos = -1, length = -1;
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
UStringSearch *search = NULL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "UUO", &a_, &b_, &callback)) return NULL;
|
||||||
|
|
||||||
|
a = python_to_icu(a_, &asz);
|
||||||
|
b = python_to_icu(b_, &bsz);
|
||||||
|
if (a && b) {
|
||||||
|
search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status);
|
||||||
|
if (search && U_SUCCESS(status)) {
|
||||||
|
pos = usearch_first(search, &status);
|
||||||
|
int32_t codepoint_count = 0, pos_for_codepoint_count = 0;
|
||||||
|
while (pos != USEARCH_DONE) {
|
||||||
|
codepoint_count += u_countChar32(b + pos_for_codepoint_count, pos - pos_for_codepoint_count);
|
||||||
|
pos_for_codepoint_count = pos;
|
||||||
|
length = usearch_getMatchedLength(search);
|
||||||
|
length = u_countChar32(b + pos, length);
|
||||||
|
PyObject *ret = PyObject_CallFunction(callback, "ii", pos, length);
|
||||||
|
if (ret && ret == Py_None) pos = usearch_next(search, &status);
|
||||||
|
else pos = USEARCH_DONE;
|
||||||
|
Py_CLEAR(ret);
|
||||||
|
}
|
||||||
|
} else PyErr_SetString(PyExc_ValueError, u_errorName(status));
|
||||||
|
}
|
||||||
|
if (search != NULL) usearch_close(search);
|
||||||
|
if (a != NULL) free(a);
|
||||||
|
if (b != NULL) free(b);
|
||||||
|
|
||||||
|
if (PyErr_Occurred()) return NULL;
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
} // }}}
|
||||||
|
|
||||||
// Collator.contains {{{
|
// Collator.contains {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_Collator_contains(icu_Collator *self, PyObject *args) {
|
icu_Collator_contains(icu_Collator *self, PyObject *args) {
|
||||||
@ -444,6 +482,10 @@ static PyMethodDef icu_Collator_methods[] = {
|
|||||||
"strcmp(unicode object, unicode object) -> strcmp(a, b) <=> cmp(sorty_key(a), sort_key(b)), but faster."
|
"strcmp(unicode object, unicode object) -> strcmp(a, b) <=> cmp(sorty_key(a), sort_key(b)), but faster."
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{"find_all", (PyCFunction)icu_Collator_find_all, METH_VARARGS,
|
||||||
|
"find(pattern, source, callback) -> reports the position and length of all occurrences of pattern in source to callback. Aborts if callback returns anything other than None."
|
||||||
|
},
|
||||||
|
|
||||||
{"find", (PyCFunction)icu_Collator_find, METH_VARARGS,
|
{"find", (PyCFunction)icu_Collator_find, METH_VARARGS,
|
||||||
"find(pattern, source) -> returns the position and length of the first occurrence of pattern in source. Returns (-1, -1) if not found."
|
"find(pattern, source) -> returns the position and length of the first occurrence of pattern in source. Returns (-1, -1) if not found."
|
||||||
},
|
},
|
||||||
|
@ -118,6 +118,11 @@ class TestICU(unittest.TestCase):
|
|||||||
self.ae((0, 13), icu.primary_no_punc_find("typographers", 'typographer’s'))
|
self.ae((0, 13), icu.primary_no_punc_find("typographers", 'typographer’s'))
|
||||||
self.ae((0, 7), icu.primary_no_punc_find('abcd', 'a\u00adb\u200cc\u200dd'))
|
self.ae((0, 7), icu.primary_no_punc_find('abcd', 'a\u00adb\u200cc\u200dd'))
|
||||||
self.ae((0, 5), icu.primary_no_punc_find('abcd', 'ab cd'))
|
self.ae((0, 5), icu.primary_no_punc_find('abcd', 'ab cd'))
|
||||||
|
# test find all
|
||||||
|
m = []
|
||||||
|
a = lambda p,l : m.append((p, l))
|
||||||
|
icu.primary_collator_without_punctuation().find_all('a', 'a a🐱a', a)
|
||||||
|
self.ae(m, [(0, 1), (2, 1), (5, 1)])
|
||||||
|
|
||||||
def test_collation_order(self):
|
def test_collation_order(self):
|
||||||
'Testing collation ordering'
|
'Testing collation ordering'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user