From f8dfd7fdda19694ec8a64b196d45866333fbdfc9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Jul 2012 18:53:58 +0530 Subject: [PATCH] Allow setting the strength of the ICU collator --- src/calibre/utils/icu.c | 44 ++++++++++++++++++++++++++++++++++++--- src/calibre/utils/icu.py | 45 +++++++++++++++++++++++++++++++++------- 2 files changed, 79 insertions(+), 10 deletions(-) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index b5805bd4e1..675d72fb87 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -74,6 +74,23 @@ icu_Collator_display_name(icu_Collator *self, void *closure) { // }}} +// Collator.strength {{{ +static PyObject * +icu_Collator_get_strength(icu_Collator *self, void *closure) { + return Py_BuildValue("i", ucol_getStrength(self->collator)); +} + +static int +icu_Collator_set_strength(icu_Collator *self, PyObject *val, void *closure) { + if (!PyInt_Check(val)) { + PyErr_SetString(PyExc_TypeError, "Strength must be an integer."); + return -1; + } + ucol_setStrength(self->collator, (int)PyInt_AS_LONG(val)); + return 0; +} +// }}} + // Collator.actual_locale {{{ static PyObject * icu_Collator_actual_locale(icu_Collator *self, void *closure) { @@ -320,6 +337,12 @@ static PyGetSetDef icu_Collator_getsetters[] = { (char *)"Display name of this collator in English. The name reflects the actual data source used.", NULL}, + {(char *)"strength", + (getter)icu_Collator_get_strength, (setter)icu_Collator_set_strength, + (char *)"The strength of this collator.", + NULL}, + + {NULL} /* Sentinel */ }; @@ -542,6 +565,7 @@ static PyMethodDef icu_methods[] = { {NULL} /* Sentinel */ }; +#define ADDUCONST(x) PyModule_AddIntConstant(m, #x, x) PyMODINIT_FUNC initicu(void) @@ -562,9 +586,23 @@ initicu(void) PyModule_AddObject(m, "Collator", (PyObject *)&icu_CollatorType); // uint8_t must be the same size as char PyModule_AddIntConstant(m, "ok", (U_SUCCESS(status) && sizeof(uint8_t) == sizeof(char)) ? 1 : 0); - PyModule_AddIntConstant(m, "USET_SPAN_NOT_CONTAINED", USET_SPAN_NOT_CONTAINED); - PyModule_AddIntConstant(m, "USET_SPAN_CONTAINED", USET_SPAN_CONTAINED); - PyModule_AddIntConstant(m, "USET_SPAN_SIMPLE", USET_SPAN_SIMPLE); + + ADDUCONST(USET_SPAN_NOT_CONTAINED); + ADDUCONST(USET_SPAN_CONTAINED); + ADDUCONST(USET_SPAN_SIMPLE); + ADDUCONST(UCOL_DEFAULT); + ADDUCONST(UCOL_PRIMARY); + ADDUCONST(UCOL_SECONDARY); + ADDUCONST(UCOL_TERTIARY); + ADDUCONST(UCOL_DEFAULT_STRENGTH); + ADDUCONST(UCOL_QUATERNARY); + ADDUCONST(UCOL_IDENTICAL); + ADDUCONST(UCOL_OFF); + ADDUCONST(UCOL_ON); + ADDUCONST(UCOL_SHIFTED); + ADDUCONST(UCOL_NON_IGNORABLE); + ADDUCONST(UCOL_LOWER_FIRST); + ADDUCONST(UCOL_UPPER_FIRST); } // }}} diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index ead820f066..b5e17042ae 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -65,11 +65,18 @@ def py_find(pattern, source): return pos, len(pattern) return -1, -1 -def icu_find(collator, pattern, source): +def icu_find(collator, pattern, source, strength=None): + if strength is not None: + ostrength = collator.strength + collator.strength = strength try: - return collator.find(pattern, source) - except TypeError: - return collator.find(unicode(pattern), unicode(source)) + try: + return collator.find(pattern, source) + except TypeError: + return collator.find(unicode(pattern), unicode(source)) + finally: + if strength is not None: + collator.strength = ostrength def py_case_sensitive_sort_key(obj): if not obj: @@ -81,10 +88,20 @@ def icu_case_sensitive_sort_key(collator, obj): return _none2 return collator.sort_key(obj) -def icu_strcmp(collator, a, b): - return collator.strcmp(lower(a), lower(b)) +def icu_strcmp(collator, a, b, strength=None): + if strength is not None: + ostrength = collator.strength + collator.strength = strength + try: + s = collator.strength + if s >= _icu.UCOL_TERTIARY: + a, b = lower(a), lower(b) + return collator.strcmp(a, b) + finally: + if strength is not None: + collator.strength = ostrength -def py_strcmp(a, b): +def py_strcmp(a, b, strength=None): return cmp(a.lower(), b.lower()) def icu_case_sensitive_strcmp(collator, a, b): @@ -161,6 +178,20 @@ contractions = ((lambda : {}) if _icu_not_ok else (partial(icu_contractions, span_contractions = (py_span_contractions if _icu_not_ok else icu_span_contractions) +def primary_strcmp(a, b): + 'strcmp that ignores case and accents on letters' + if _icu_not_ok: + from calibre.utils.filenames import ascii_text + return py_strcmp(ascii_text(a), ascii_text(b)) + return icu_strcmp(_collator, a, b, _icu.UCOL_PRIMARY) + +def primary_find(pat, src): + 'find that ignores case and accents on letters' + if _icu_not_ok: + from calibre.utils.filenames import ascii_text + return py_find(ascii_text(pat), ascii_text(src)) + return icu_find(_collator, pat, src, _icu.UCOL_PRIMARY) + ################################################################################ def test(): # {{{