From 4412d3f615e80a43009bde1ea08bcad66a9f7bb0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Apr 2022 11:46:19 +0530 Subject: [PATCH] wrap api for attribute control on ICU collators --- src/calibre/utils/icu.c | 44 +++++++++++++++++++++++++++++++++++ src/calibre/utils/icu.py | 5 ++-- src/calibre/utils/icu_test.py | 2 ++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 8ea231e60b..4ba87f3651 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -377,6 +377,34 @@ icu_Collator_set_upper_first(icu_Collator *self, PyObject *val, void *closure) { } // }}} + +// Collator.get/set_attribute {{{ +static PyObject * +icu_Collator_get_attribute(icu_Collator *self, PyObject *args) { + int k; + if (!PyArg_ParseTuple(args, "i", &k)) return NULL; + UErrorCode status = U_ZERO_ERROR; + long v = ucol_getAttribute(self->collator, k, &status); + if (U_FAILURE(status)) { + PyErr_SetString(PyExc_ValueError, u_errorName(status)); + return NULL; + } + return PyLong_FromLong(v); +} + +static PyObject * +icu_Collator_set_attribute(icu_Collator *self, PyObject *args) { + int k, v; + if (!PyArg_ParseTuple(args, "ii", &k, &v)) return NULL; + UErrorCode status = U_ZERO_ERROR; + ucol_setAttribute(self->collator, k, v, &status); + if (U_FAILURE(status)) { + PyErr_SetString(PyExc_ValueError, u_errorName(status)); + return NULL; + } + Py_RETURN_NONE; +} // }}} + static PyObject* icu_Collator_clone(icu_Collator *self, PyObject *args); @@ -385,6 +413,14 @@ static PyMethodDef icu_Collator_methods[] = { "sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU." }, + {"get_attribute", (PyCFunction)icu_Collator_get_attribute, METH_VARARGS, + "get_attribute(key) -> get the specified attribute on this collator." + }, + + {"set_attribute", (PyCFunction)icu_Collator_set_attribute, METH_VARARGS, + "set_attribute(key, val) -> set the specified attribute on this collator." + }, + {"strcmp", (PyCFunction)icu_Collator_strcmp, METH_VARARGS, "strcmp(unicode object, unicode object) -> strcmp(a, b) <=> cmp(sorty_key(a), sort_key(b)), but faster." }, @@ -1467,6 +1503,14 @@ exec_module(PyObject *mod) { ADDUCONST(UCOL_NON_IGNORABLE); ADDUCONST(UCOL_LOWER_FIRST); ADDUCONST(UCOL_UPPER_FIRST); + ADDUCONST(UCOL_FRENCH_COLLATION); + ADDUCONST(UCOL_ALTERNATE_HANDLING); + ADDUCONST(UCOL_CASE_FIRST); + ADDUCONST(UCOL_CASE_LEVEL); + ADDUCONST(UCOL_NORMALIZATION_MODE); + ADDUCONST(UCOL_DECOMPOSITION_MODE); + ADDUCONST(UCOL_STRENGTH); + ADDUCONST(UCOL_NUMERIC_COLLATION); ADDUCONST(NFD); ADDUCONST(NFKD); diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index a198bfd2bb..9b859880a0 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -19,12 +19,11 @@ _none = '' _none2 = b'' _cmap = {} -icu_unicode_version = getattr(_icu, 'unicode_version', None) +icu_unicode_version = _icu.unicode_version _nmodes = {m:getattr(_icu, m) for m in ('NFC', 'NFD', 'NFKC', 'NFKD')} + # Ensure that the python internal filesystem and default encodings are not ASCII - - def is_ascii(name): try: return codecs.lookup(name).name == b'ascii' diff --git a/src/calibre/utils/icu_test.py b/src/calibre/utils/icu_test.py index 2e13a45b4f..35830fd8ff 100644 --- a/src/calibre/utils/icu_test.py +++ b/src/calibre/utils/icu_test.py @@ -112,6 +112,8 @@ class TestICU(unittest.TestCase): self.assertTrue(icu.contains('', '')) self.assertFalse(icu.contains('xxx', 'xx')) self.assertTrue(icu.primary_contains('pena', 'peña')) + x = icu.primary_collator() + self.ae(x.get_attribute(icu._icu.UCOL_STRENGTH), icu._icu.UCOL_PRIMARY), def test_collation_order(self): 'Testing collation ordering'