Some minor tweaks to ensure correctness

Also fix compilation of the subsequence matcher on OS X
This commit is contained in:
Kovid Goyal 2014-03-07 09:56:07 +05:30
parent 90868e0262
commit 0dc884efc5
3 changed files with 10 additions and 7 deletions

View File

@ -7,6 +7,7 @@
#define NO_ICU_TO_PYTHON
#include "icu_calibre_utils.h"
#include <float.h>
#ifdef _MSC_VER
// inline does not work with the visual studio C compiler

View File

@ -512,10 +512,10 @@ pêché'''
# }}}
def test_roundtrip():
r = u'xxx\0\u2219\U0001f431xxx'
rp = _icu.roundtrip(r)
if rp != r:
raise ValueError(u'Roundtripping failed: %r != %r' % (r, rp))
for r in (u'xxx\0\u2219\U0001f431xxx', u'\0', u'', u'simple'):
rp = _icu.roundtrip(r)
if rp != r:
raise ValueError(u'Roundtripping failed: %r != %r' % (r, rp))
def test_normalize_performance():
import os

View File

@ -28,7 +28,9 @@
static UChar* python_to_icu(PyObject *obj, int32_t *osz, uint8_t do_check) {
UChar *ans = NULL;
Py_ssize_t sz = 0;
#ifdef Py_UNICODE_WIDE
UErrorCode status = U_ZERO_ERROR;
#endif
if (do_check && !PyUnicode_CheckExact(obj)) {
PyErr_SetString(PyExc_TypeError, "Not a unicode string");
@ -38,14 +40,14 @@ static UChar* python_to_icu(PyObject *obj, int32_t *osz, uint8_t do_check) {
#ifdef Py_UNICODE_WIDE
// wide build (UCS 4)
sz = PyUnicode_GET_SIZE(obj);
ans = (UChar*) calloc(2*sz+1, sizeof(UChar)); // There can be no more than 2 UChars per character
ans = (UChar*) calloc(2*(sz+1), sizeof(UChar)); // There can be no more than 2 UChars per character + ensure null termination
if (ans == NULL) { PyErr_NoMemory(); goto end; }
u_strFromUTF32(ans, (int32_t)2*sz+1, osz, (UChar32*)PyUnicode_AS_UNICODE(obj), (int32_t)sz, &status);
u_strFromUTF32(ans, (int32_t)(2*(sz+1)), osz, (UChar32*)PyUnicode_AS_UNICODE(obj), (int32_t)sz, &status);
if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); free(ans); ans = NULL; goto end; }
#else
// narrow build (UTF-16)
sz = PyUnicode_GET_DATA_SIZE(obj);
ans = (UChar*) calloc(sz, 1);
ans = (UChar*) calloc(sz+2, 1); // Ensure null termination
if (ans == NULL) { PyErr_NoMemory(); goto end; }
memcpy(ans, PyUnicode_AS_UNICODE(obj), sz);
if (osz != NULL) *osz = (int32_t)PyUnicode_GET_SIZE(obj);