mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Fix ICU find returning incorrect position and length parameters when non-BMP characters are present on wide python builds
This commit is contained in:
parent
27327e811b
commit
4eaee89487
@ -191,6 +191,9 @@ end:
|
||||
// Collator.find {{{
|
||||
static PyObject *
|
||||
icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
|
||||
#if PY_VERSION_HEX >= 0x03030000
|
||||
#error Not implemented for python >= 3.3
|
||||
#endif
|
||||
PyObject *a_ = NULL, *b_ = NULL;
|
||||
UChar *a = NULL, *b = NULL;
|
||||
int32_t asz = 0, bsz = 0, pos = -1, length = -1;
|
||||
@ -207,10 +210,16 @@ icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
|
||||
search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
pos = usearch_first(search, &status);
|
||||
if (pos != USEARCH_DONE)
|
||||
if (pos != USEARCH_DONE) {
|
||||
length = usearch_getMatchedLength(search);
|
||||
else
|
||||
pos = -1;
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
// We have to return number of unicode characters since the string
|
||||
// could contain surrogate pairs which are represented as a single
|
||||
// character in python wide builds
|
||||
length = u_countChar32(b + pos, length);
|
||||
pos = u_countChar32(b, pos);
|
||||
#endif
|
||||
} else pos = -1;
|
||||
}
|
||||
end:
|
||||
if (search != NULL) usearch_close(search);
|
||||
|
@ -92,7 +92,8 @@ class TestICU(unittest.TestCase):
|
||||
def test_find(self):
|
||||
' Test searching for substrings '
|
||||
self.ae((1, 1), icu.find(b'a', b'1ab'))
|
||||
self.ae((1, 2), icu.find('\U0001f431', 'x\U0001f431x'))
|
||||
self.ae((1, 1 if sys.maxunicode >= 0x10ffff else 2), icu.find('\U0001f431', 'x\U0001f431x'))
|
||||
self.ae((1 if sys.maxunicode >= 0x10ffff else 2, 1), icu.find('y', '\U0001f431y'))
|
||||
self.ae((0, 4), icu.primary_find('pena', 'peña'))
|
||||
for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}.iteritems():
|
||||
self.ae((1, len(k)), icu.primary_find(v, ' ' + k), 'Failed to find %s in %s' % (v, k))
|
||||
|
Loading…
x
Reference in New Issue
Block a user