mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Fix ICU find returning incorrect position and length parameters when non-BMP characters are present on wide python builds
This commit is contained in:
parent
27327e811b
commit
4eaee89487
@ -191,6 +191,9 @@ end:
|
|||||||
// Collator.find {{{
|
// Collator.find {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
|
icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
|
||||||
|
#if PY_VERSION_HEX >= 0x03030000
|
||||||
|
#error Not implemented for python >= 3.3
|
||||||
|
#endif
|
||||||
PyObject *a_ = NULL, *b_ = NULL;
|
PyObject *a_ = NULL, *b_ = NULL;
|
||||||
UChar *a = NULL, *b = NULL;
|
UChar *a = NULL, *b = NULL;
|
||||||
int32_t asz = 0, bsz = 0, pos = -1, length = -1;
|
int32_t asz = 0, bsz = 0, pos = -1, length = -1;
|
||||||
@ -207,10 +210,16 @@ icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
|
|||||||
search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status);
|
search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status);
|
||||||
if (U_SUCCESS(status)) {
|
if (U_SUCCESS(status)) {
|
||||||
pos = usearch_first(search, &status);
|
pos = usearch_first(search, &status);
|
||||||
if (pos != USEARCH_DONE)
|
if (pos != USEARCH_DONE) {
|
||||||
length = usearch_getMatchedLength(search);
|
length = usearch_getMatchedLength(search);
|
||||||
else
|
#ifdef Py_UNICODE_WIDE
|
||||||
pos = -1;
|
// We have to return number of unicode characters since the string
|
||||||
|
// could contain surrogate pairs which are represented as a single
|
||||||
|
// character in python wide builds
|
||||||
|
length = u_countChar32(b + pos, length);
|
||||||
|
pos = u_countChar32(b, pos);
|
||||||
|
#endif
|
||||||
|
} else pos = -1;
|
||||||
}
|
}
|
||||||
end:
|
end:
|
||||||
if (search != NULL) usearch_close(search);
|
if (search != NULL) usearch_close(search);
|
||||||
|
@ -92,7 +92,8 @@ class TestICU(unittest.TestCase):
|
|||||||
def test_find(self):
|
def test_find(self):
|
||||||
' Test searching for substrings '
|
' Test searching for substrings '
|
||||||
self.ae((1, 1), icu.find(b'a', b'1ab'))
|
self.ae((1, 1), icu.find(b'a', b'1ab'))
|
||||||
self.ae((1, 2), icu.find('\U0001f431', 'x\U0001f431x'))
|
self.ae((1, 1 if sys.maxunicode >= 0x10ffff else 2), icu.find('\U0001f431', 'x\U0001f431x'))
|
||||||
|
self.ae((1 if sys.maxunicode >= 0x10ffff else 2, 1), icu.find('y', '\U0001f431y'))
|
||||||
self.ae((0, 4), icu.primary_find('pena', 'peña'))
|
self.ae((0, 4), icu.primary_find('pena', 'peña'))
|
||||||
for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}.iteritems():
|
for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}.iteritems():
|
||||||
self.ae((1, len(k)), icu.primary_find(v, ' ' + k), 'Failed to find %s in %s' % (v, k))
|
self.ae((1, len(k)), icu.primary_find(v, ' ' + k), 'Failed to find %s in %s' % (v, k))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user