Some minor tweaks to ensure correctness

Also fix compilation of the subsequence matcher on OS X
2025-07-09 03:04:10 -04:00 · 2014-03-07 09:56:07 +05:30 · 2014-03-07 09:56:07 +05:30 · 0dc884efc5
commit 0dc884efc5
parent 90868e0262
3 changed files with 10 additions and 7 deletions
--- a/src/calibre/gui2/tweak_book/matcher.c
+++ b/src/calibre/gui2/tweak_book/matcher.c
@ -7,6 +7,7 @@

 #define NO_ICU_TO_PYTHON
 #include "icu_calibre_utils.h"
+#include <float.h>

 #ifdef _MSC_VER
 // inline does not work with the visual studio C compiler
--- a/src/calibre/utils/icu.py
+++ b/src/calibre/utils/icu.py
@ -512,10 +512,10 @@ pêché'''
 # }}}

 def test_roundtrip():
-    r = u'xxx\0\u2219\U0001f431xxx'
-    rp = _icu.roundtrip(r)
-    if rp != r:
-        raise ValueError(u'Roundtripping failed: %r != %r' % (r, rp))
+    for r in (u'xxx\0\u2219\U0001f431xxx', u'\0', u'', u'simple'):
+        rp = _icu.roundtrip(r)
+        if rp != r:
+            raise ValueError(u'Roundtripping failed: %r != %r' % (r, rp))

 def test_normalize_performance():
    import os
--- a/src/calibre/utils/icu_calibre_utils.h
+++ b/src/calibre/utils/icu_calibre_utils.h
@ -28,7 +28,9 @@
 static UChar* python_to_icu(PyObject *obj, int32_t *osz, uint8_t do_check) {
    UChar *ans = NULL;
    Py_ssize_t sz = 0;
+#ifdef Py_UNICODE_WIDE
    UErrorCode status = U_ZERO_ERROR;
+#endif

    if (do_check && !PyUnicode_CheckExact(obj)) {
        PyErr_SetString(PyExc_TypeError, "Not a unicode string");
@ -38,14 +40,14 @@ static UChar* python_to_icu(PyObject *obj, int32_t *osz, uint8_t do_check) {
 #ifdef Py_UNICODE_WIDE
 // wide build (UCS 4)
    sz = PyUnicode_GET_SIZE(obj);
-    ans = (UChar*) calloc(2*sz+1, sizeof(UChar)); // There can be no more than 2 UChars per character
+    ans = (UChar*) calloc(2*(sz+1), sizeof(UChar)); // There can be no more than 2 UChars per character + ensure null termination
    if (ans == NULL) { PyErr_NoMemory(); goto end; }
-    u_strFromUTF32(ans, (int32_t)2*sz+1, osz, (UChar32*)PyUnicode_AS_UNICODE(obj), (int32_t)sz, &status);
+    u_strFromUTF32(ans, (int32_t)(2*(sz+1)), osz, (UChar32*)PyUnicode_AS_UNICODE(obj), (int32_t)sz, &status);
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); free(ans); ans = NULL; goto end; }
 #else
 // narrow build (UTF-16)
    sz = PyUnicode_GET_DATA_SIZE(obj);
-    ans = (UChar*) calloc(sz, 1);
+    ans = (UChar*) calloc(sz+2, 1);  // Ensure null termination
    if (ans == NULL) { PyErr_NoMemory(); goto end; }
    memcpy(ans, PyUnicode_AS_UNICODE(obj), sz);
    if (osz != NULL) *osz = (int32_t)PyUnicode_GET_SIZE(obj);