Allow setting the strength of the ICU collator

This commit is contained in:
Kovid Goyal 2012-07-04 18:53:58 +05:30
parent 018617f810
commit f8dfd7fdda
2 changed files with 79 additions and 10 deletions

View File

@ -74,6 +74,23 @@ icu_Collator_display_name(icu_Collator *self, void *closure) {
// }}}
// Collator.strength {{{
static PyObject *
icu_Collator_get_strength(icu_Collator *self, void *closure) {
return Py_BuildValue("i", ucol_getStrength(self->collator));
}
static int
icu_Collator_set_strength(icu_Collator *self, PyObject *val, void *closure) {
if (!PyInt_Check(val)) {
PyErr_SetString(PyExc_TypeError, "Strength must be an integer.");
return -1;
}
ucol_setStrength(self->collator, (int)PyInt_AS_LONG(val));
return 0;
}
// }}}
// Collator.actual_locale {{{
static PyObject *
icu_Collator_actual_locale(icu_Collator *self, void *closure) {
@ -320,6 +337,12 @@ static PyGetSetDef icu_Collator_getsetters[] = {
(char *)"Display name of this collator in English. The name reflects the actual data source used.",
NULL},
{(char *)"strength",
(getter)icu_Collator_get_strength, (setter)icu_Collator_set_strength,
(char *)"The strength of this collator.",
NULL},
{NULL} /* Sentinel */
};
@ -542,6 +565,7 @@ static PyMethodDef icu_methods[] = {
{NULL} /* Sentinel */
};
#define ADDUCONST(x) PyModule_AddIntConstant(m, #x, x)
PyMODINIT_FUNC
initicu(void)
@ -562,9 +586,23 @@ initicu(void)
PyModule_AddObject(m, "Collator", (PyObject *)&icu_CollatorType);
// uint8_t must be the same size as char
PyModule_AddIntConstant(m, "ok", (U_SUCCESS(status) && sizeof(uint8_t) == sizeof(char)) ? 1 : 0);
PyModule_AddIntConstant(m, "USET_SPAN_NOT_CONTAINED", USET_SPAN_NOT_CONTAINED);
PyModule_AddIntConstant(m, "USET_SPAN_CONTAINED", USET_SPAN_CONTAINED);
PyModule_AddIntConstant(m, "USET_SPAN_SIMPLE", USET_SPAN_SIMPLE);
ADDUCONST(USET_SPAN_NOT_CONTAINED);
ADDUCONST(USET_SPAN_CONTAINED);
ADDUCONST(USET_SPAN_SIMPLE);
ADDUCONST(UCOL_DEFAULT);
ADDUCONST(UCOL_PRIMARY);
ADDUCONST(UCOL_SECONDARY);
ADDUCONST(UCOL_TERTIARY);
ADDUCONST(UCOL_DEFAULT_STRENGTH);
ADDUCONST(UCOL_QUATERNARY);
ADDUCONST(UCOL_IDENTICAL);
ADDUCONST(UCOL_OFF);
ADDUCONST(UCOL_ON);
ADDUCONST(UCOL_SHIFTED);
ADDUCONST(UCOL_NON_IGNORABLE);
ADDUCONST(UCOL_LOWER_FIRST);
ADDUCONST(UCOL_UPPER_FIRST);
}
// }}}

View File

@ -65,11 +65,18 @@ def py_find(pattern, source):
return pos, len(pattern)
return -1, -1
def icu_find(collator, pattern, source):
def icu_find(collator, pattern, source, strength=None):
if strength is not None:
ostrength = collator.strength
collator.strength = strength
try:
return collator.find(pattern, source)
except TypeError:
return collator.find(unicode(pattern), unicode(source))
try:
return collator.find(pattern, source)
except TypeError:
return collator.find(unicode(pattern), unicode(source))
finally:
if strength is not None:
collator.strength = ostrength
def py_case_sensitive_sort_key(obj):
if not obj:
@ -81,10 +88,20 @@ def icu_case_sensitive_sort_key(collator, obj):
return _none2
return collator.sort_key(obj)
def icu_strcmp(collator, a, b):
return collator.strcmp(lower(a), lower(b))
def icu_strcmp(collator, a, b, strength=None):
if strength is not None:
ostrength = collator.strength
collator.strength = strength
try:
s = collator.strength
if s >= _icu.UCOL_TERTIARY:
a, b = lower(a), lower(b)
return collator.strcmp(a, b)
finally:
if strength is not None:
collator.strength = ostrength
def py_strcmp(a, b):
def py_strcmp(a, b, strength=None):
return cmp(a.lower(), b.lower())
def icu_case_sensitive_strcmp(collator, a, b):
@ -161,6 +178,20 @@ contractions = ((lambda : {}) if _icu_not_ok else (partial(icu_contractions,
span_contractions = (py_span_contractions if _icu_not_ok else
icu_span_contractions)
def primary_strcmp(a, b):
'strcmp that ignores case and accents on letters'
if _icu_not_ok:
from calibre.utils.filenames import ascii_text
return py_strcmp(ascii_text(a), ascii_text(b))
return icu_strcmp(_collator, a, b, _icu.UCOL_PRIMARY)
def primary_find(pat, src):
'find that ignores case and accents on letters'
if _icu_not_ok:
from calibre.utils.filenames import ascii_text
return py_find(ascii_text(pat), ascii_text(src))
return icu_find(_collator, pat, src, _icu.UCOL_PRIMARY)
################################################################################
def test(): # {{{