mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add a function to count the number of unicode code points in a python string
This commit is contained in:
parent
17ac015c64
commit
e591101202
@ -975,6 +975,25 @@ icu_break_iterator_locales(PyObject *self, PyObject *args) {
|
||||
return ret;
|
||||
} // }}}
|
||||
|
||||
// string_length {{{
|
||||
static PyObject *
|
||||
icu_string_length(PyObject *self, PyObject *args) {
|
||||
#if PY_VERSION_HEX >= 0x03030000
|
||||
#error Not implemented for python >= 3.3
|
||||
#endif
|
||||
|
||||
int32_t sz = 0;
|
||||
UChar *icu = NULL;
|
||||
PyObject *src = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O", &src)) return NULL;
|
||||
icu = python_to_icu(src, &sz, 1);
|
||||
if (icu == NULL) return NULL;
|
||||
sz = u_countChar32(icu, sz);
|
||||
free(icu);
|
||||
return Py_BuildValue("i", sz);
|
||||
} // }}}
|
||||
|
||||
// Module initialization {{{
|
||||
static PyMethodDef icu_methods[] = {
|
||||
{"change_case", icu_change_case, METH_VARARGS,
|
||||
@ -1017,6 +1036,10 @@ static PyMethodDef icu_methods[] = {
|
||||
"available_locales_for_break_iterator() -> Return tuple of all available locales for the BreakIterator"
|
||||
},
|
||||
|
||||
{"string_length", icu_string_length, METH_VARARGS,
|
||||
"string_length(string) -> Return the length of a string (number of unicode code points in the string). Useful on anrrow python builds where len() returns an incorrect answer if the string contains surrogate pairs."
|
||||
},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
@ -278,6 +278,12 @@ def partition_by_first_letter(items, reverse=False, key=lambda x:x):
|
||||
ans[last_c] = [item]
|
||||
return ans
|
||||
|
||||
# Return the number of unicode codepoints in a string
|
||||
try:
|
||||
string_length = _icu.string_length if is_narrow_build else len
|
||||
except AttributeError:
|
||||
string_length = len # Somebody running from source with a binary that has not been updated
|
||||
|
||||
################################################################################
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -132,6 +132,8 @@ class TestICU(unittest.TestCase):
|
||||
' Test roundtripping '
|
||||
for r in (u'xxx\0\u2219\U0001f431xxx', u'\0', u'', u'simple'):
|
||||
self.ae(r, icu._icu.roundtrip(r))
|
||||
for x, l in [('', 0), ('a', 1), ('\U0001f431', 1)]:
|
||||
self.ae(icu._icu.string_length(x), l)
|
||||
|
||||
def test_character_name(self):
|
||||
' Test character naming '
|
||||
|
Loading…
x
Reference in New Issue
Block a user