Fix memory corruption bug in implementation of ord_string()

This commit is contained in:
Kovid Goyal 2015-01-22 23:34:22 +05:30
parent 974592eb23
commit 37604406c5
2 changed files with 7 additions and 5 deletions

View File

@ -26,6 +26,8 @@
#error Not implemented for python >= 3.3
#endif
#define MIN(x, y) ((x)<(y)) ? (x) : (y)
// Roundtripping will need to be implemented differently for python 3.3+ where strings are stored with variable widths
#ifndef NO_PYTHON_TO_ICU
@ -73,17 +75,17 @@ static UChar32* python_to_icu32(PyObject *obj, int32_t *osz, uint8_t do_check) {
goto end;
}
sz = PyUnicode_GET_DATA_SIZE(obj);
ans = (UChar32*) calloc(sz+1, 1); // Ensure null termination
sz = PyUnicode_GET_SIZE(obj); // number of UCS2 code-points in narrow build and UCS4 code-points in wide build
ans = (UChar32*) calloc(sz+1, sizeof(UChar32)); // Ensure null termination
if (ans == NULL) { PyErr_NoMemory(); goto end; }
#ifdef Py_UNICODE_WIDE
// wide build (UCS 4)
memcpy(ans, PyUnicode_AS_UNICODE(obj), sz);
memcpy(ans, PyUnicode_AS_DATA(obj), MIN((sizeof(UChar32)*(sz+1)),PyUnicode_GET_DATA_SIZE(obj)));
if (osz != NULL) *osz = (int32_t)PyUnicode_GET_SIZE(obj);
#else
// narrow build (UTF-16)
u_strToUTF32(ans, sz + 1, osz, (UChar*)PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj), &status);
u_strToUTF32(ans, (int32_t)sz + 1, osz, (UChar*)PyUnicode_AS_UNICODE(obj), (int32_t)sz, &status);
if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); free(ans); ans = NULL; goto end; }
#endif
end:

View File

@ -137,7 +137,7 @@ class TestICU(unittest.TestCase):
for x, l in [('', 0), ('a', 1), ('\U0001f431', 2)]:
self.ae(icu._icu.utf16_length(x), l)
self.ae(icu._icu.chr(0x1f431), '\U0001f431')
self.ae(icu._icu.ord_string('abc'), tuple(map(ord, 'abc')))
self.ae(icu._icu.ord_string('abc'*100), tuple(map(ord, 'abc'*100)))
self.ae(icu._icu.ord_string('\U0001f431'), (0x1f431,))
def test_character_name(self):