mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Function to return the number of utf-16 code points in a python string on wide python builds
This commit is contained in:
parent
fd17470464
commit
7ee75a8775
@ -978,10 +978,6 @@ icu_break_iterator_locales(PyObject *self, PyObject *args) {
|
|||||||
// string_length {{{
|
// string_length {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_string_length(PyObject *self, PyObject *args) {
|
icu_string_length(PyObject *self, PyObject *args) {
|
||||||
#if PY_VERSION_HEX >= 0x03030000
|
|
||||||
#error Not implemented for python >= 3.3
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int32_t sz = 0;
|
int32_t sz = 0;
|
||||||
UChar *icu = NULL;
|
UChar *icu = NULL;
|
||||||
PyObject *src = NULL;
|
PyObject *src = NULL;
|
||||||
@ -994,6 +990,20 @@ icu_string_length(PyObject *self, PyObject *args) {
|
|||||||
return Py_BuildValue("i", sz);
|
return Py_BuildValue("i", sz);
|
||||||
} // }}}
|
} // }}}
|
||||||
|
|
||||||
|
// utf16_length {{{
|
||||||
|
static PyObject *
|
||||||
|
icu_utf16_length(PyObject *self, PyObject *args) {
|
||||||
|
int32_t sz = 0;
|
||||||
|
UChar *icu = NULL;
|
||||||
|
PyObject *src = NULL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "O", &src)) return NULL;
|
||||||
|
icu = python_to_icu(src, &sz, 1);
|
||||||
|
if (icu == NULL) return NULL;
|
||||||
|
free(icu);
|
||||||
|
return Py_BuildValue("i", sz);
|
||||||
|
} // }}}
|
||||||
|
|
||||||
// Module initialization {{{
|
// Module initialization {{{
|
||||||
static PyMethodDef icu_methods[] = {
|
static PyMethodDef icu_methods[] = {
|
||||||
{"change_case", icu_change_case, METH_VARARGS,
|
{"change_case", icu_change_case, METH_VARARGS,
|
||||||
@ -1037,7 +1047,11 @@ static PyMethodDef icu_methods[] = {
|
|||||||
},
|
},
|
||||||
|
|
||||||
{"string_length", icu_string_length, METH_VARARGS,
|
{"string_length", icu_string_length, METH_VARARGS,
|
||||||
"string_length(string) -> Return the length of a string (number of unicode code points in the string). Useful on anrrow python builds where len() returns an incorrect answer if the string contains surrogate pairs."
|
"string_length(string) -> Return the length of a string (number of unicode code points in the string). Useful on narrow python builds where len() returns an incorrect answer if the string contains surrogate pairs."
|
||||||
|
},
|
||||||
|
|
||||||
|
{"utf16_length", icu_utf16_length, METH_VARARGS,
|
||||||
|
"utf16_length(string) -> Return the length of a string (number of UTF-16 code points in the string). Useful on wide python builds where len() returns an incorrect answer if the string contains surrogate pairs."
|
||||||
},
|
},
|
||||||
|
|
||||||
{NULL} /* Sentinel */
|
{NULL} /* Sentinel */
|
||||||
|
@ -281,6 +281,12 @@ def partition_by_first_letter(items, reverse=False, key=lambda x:x):
|
|||||||
# Return the number of unicode codepoints in a string
|
# Return the number of unicode codepoints in a string
|
||||||
string_length = _icu.string_length if is_narrow_build else len
|
string_length = _icu.string_length if is_narrow_build else len
|
||||||
|
|
||||||
|
# Return the number of UTF-16 codepoints in a string
|
||||||
|
try:
|
||||||
|
utf16_length = len if is_narrow_build else _icu.utf16_length
|
||||||
|
except AttributeError:
|
||||||
|
utf16_length = len # People running from source
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -133,6 +133,8 @@ class TestICU(unittest.TestCase):
|
|||||||
self.ae(r, icu._icu.roundtrip(r))
|
self.ae(r, icu._icu.roundtrip(r))
|
||||||
for x, l in [('', 0), ('a', 1), ('\U0001f431', 1)]:
|
for x, l in [('', 0), ('a', 1), ('\U0001f431', 1)]:
|
||||||
self.ae(icu._icu.string_length(x), l)
|
self.ae(icu._icu.string_length(x), l)
|
||||||
|
for x, l in [('', 0), ('a', 1), ('\U0001f431', 2)]:
|
||||||
|
self.ae(icu._icu.utf16_length(x), l)
|
||||||
|
|
||||||
def test_character_name(self):
|
def test_character_name(self):
|
||||||
' Test character naming '
|
' Test character naming '
|
||||||
|
Loading…
x
Reference in New Issue
Block a user