diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 77ebb53c23..0cf53abd0a 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -978,10 +978,6 @@ icu_break_iterator_locales(PyObject *self, PyObject *args) { // string_length {{{ static PyObject * icu_string_length(PyObject *self, PyObject *args) { -#if PY_VERSION_HEX >= 0x03030000 -#error Not implemented for python >= 3.3 -#endif - int32_t sz = 0; UChar *icu = NULL; PyObject *src = NULL; @@ -994,6 +990,20 @@ icu_string_length(PyObject *self, PyObject *args) { return Py_BuildValue("i", sz); } // }}} +// utf16_length {{{ +static PyObject * +icu_utf16_length(PyObject *self, PyObject *args) { + int32_t sz = 0; + UChar *icu = NULL; + PyObject *src = NULL; + + if (!PyArg_ParseTuple(args, "O", &src)) return NULL; + icu = python_to_icu(src, &sz, 1); + if (icu == NULL) return NULL; + free(icu); + return Py_BuildValue("i", sz); +} // }}} + // Module initialization {{{ static PyMethodDef icu_methods[] = { {"change_case", icu_change_case, METH_VARARGS, @@ -1037,7 +1047,11 @@ static PyMethodDef icu_methods[] = { }, {"string_length", icu_string_length, METH_VARARGS, - "string_length(string) -> Return the length of a string (number of unicode code points in the string). Useful on anrrow python builds where len() returns an incorrect answer if the string contains surrogate pairs." + "string_length(string) -> Return the length of a string (number of unicode code points in the string). Useful on narrow python builds where len() returns an incorrect answer if the string contains surrogate pairs." + }, + + {"utf16_length", icu_utf16_length, METH_VARARGS, + "utf16_length(string) -> Return the length of a string (number of UTF-16 code points in the string). Useful on wide python builds where len() returns an incorrect answer if the string contains surrogate pairs." }, {NULL} /* Sentinel */ diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index c930860431..41f9abd0c2 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -281,6 +281,12 @@ def partition_by_first_letter(items, reverse=False, key=lambda x:x): # Return the number of unicode codepoints in a string string_length = _icu.string_length if is_narrow_build else len +# Return the number of UTF-16 codepoints in a string +try: + utf16_length = len if is_narrow_build else _icu.utf16_length +except AttributeError: + utf16_length = len # People running from source + ################################################################################ if __name__ == '__main__': diff --git a/src/calibre/utils/icu_test.py b/src/calibre/utils/icu_test.py index 9069d0acce..83cce844df 100644 --- a/src/calibre/utils/icu_test.py +++ b/src/calibre/utils/icu_test.py @@ -133,6 +133,8 @@ class TestICU(unittest.TestCase): self.ae(r, icu._icu.roundtrip(r)) for x, l in [('', 0), ('a', 1), ('\U0001f431', 1)]: self.ae(icu._icu.string_length(x), l) + for x, l in [('', 0), ('a', 1), ('\U0001f431', 2)]: + self.ae(icu._icu.utf16_length(x), l) def test_character_name(self): ' Test character naming '