Fix memory corruption bug in implementation of ord_string()

2025-08-30 23:00:21 -04:00 · 2015-01-22 23:34:22 +05:30 · 2015-01-22 23:34:22 +05:30 · 37604406c5
commit 37604406c5
parent 974592eb23
2 changed files with 7 additions and 5 deletions
--- a/src/calibre/utils/icu_calibre_utils.h
+++ b/src/calibre/utils/icu_calibre_utils.h
@ -26,6 +26,8 @@
 #error Not implemented for python >= 3.3
 #endif

+#define MIN(x, y) ((x)<(y)) ? (x) : (y)
+
 // Roundtripping will need to be implemented differently for python 3.3+ where strings are stored with variable widths

 #ifndef NO_PYTHON_TO_ICU
@ -73,17 +75,17 @@ static UChar32* python_to_icu32(PyObject *obj, int32_t *osz, uint8_t do_check) {
        goto end;
    }

-    sz = PyUnicode_GET_DATA_SIZE(obj);
-    ans = (UChar32*) calloc(sz+1, 1);  // Ensure null termination
+    sz = PyUnicode_GET_SIZE(obj);  // number of UCS2 code-points in narrow build and UCS4 code-points in wide build
+    ans = (UChar32*) calloc(sz+1, sizeof(UChar32));  // Ensure null termination
    if (ans == NULL) { PyErr_NoMemory(); goto end; }

 #ifdef Py_UNICODE_WIDE
 // wide build (UCS 4)
-    memcpy(ans, PyUnicode_AS_UNICODE(obj), sz);
+    memcpy(ans, PyUnicode_AS_DATA(obj), MIN((sizeof(UChar32)*(sz+1)),PyUnicode_GET_DATA_SIZE(obj)));
    if (osz != NULL) *osz = (int32_t)PyUnicode_GET_SIZE(obj);
 #else
 // narrow build (UTF-16)
-    u_strToUTF32(ans, sz + 1, osz, (UChar*)PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj), &status);
+    u_strToUTF32(ans, (int32_t)sz + 1, osz, (UChar*)PyUnicode_AS_UNICODE(obj), (int32_t)sz, &status);
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); free(ans); ans = NULL; goto end; }
 #endif
 end:
--- a/src/calibre/utils/icu_test.py
+++ b/src/calibre/utils/icu_test.py
@ -137,7 +137,7 @@ class TestICU(unittest.TestCase):
        for x, l in [('', 0), ('a', 1), ('\U0001f431', 2)]:
            self.ae(icu._icu.utf16_length(x), l)
        self.ae(icu._icu.chr(0x1f431), '\U0001f431')
-        self.ae(icu._icu.ord_string('abc'), tuple(map(ord, 'abc')))
+        self.ae(icu._icu.ord_string('abc'*100), tuple(map(ord, 'abc'*100)))
        self.ae(icu._icu.ord_string('\U0001f431'), (0x1f431,))

    def test_character_name(self):