Add collation_order() ICU function

This commit is contained in:
Kovid Goyal 2012-07-28 21:00:48 +05:30
parent 8b62ddf460
commit 7efa77639a
2 changed files with 55 additions and 0 deletions

View File

@ -4,6 +4,7 @@
#include <unicode/utypes.h>
#include <unicode/uclean.h>
#include <unicode/ucol.h>
#include <unicode/ucoleitr.h>
#include <unicode/ustring.h>
#include <unicode/usearch.h>
@ -310,6 +311,41 @@ icu_Collator_startswith(icu_Collator *self, PyObject *args, PyObject *kwargs) {
Py_RETURN_FALSE;
} // }}}
// Collator.startswith {{{
static PyObject *
icu_Collator_collation_order(icu_Collator *self, PyObject *args, PyObject *kwargs) {
PyObject *a_;
size_t asz;
int32_t actual_a;
UChar *a;
wchar_t *aw;
UErrorCode status = U_ZERO_ERROR;
UCollationElements *iter = NULL;
int order = 0, len = -1;
if (!PyArg_ParseTuple(args, "U", &a_)) return NULL;
asz = PyUnicode_GetSize(a_);
a = (UChar*)calloc(asz*4 + 2, sizeof(UChar));
aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t));
if (a == NULL || aw == NULL ) return PyErr_NoMemory();
actual_a = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1);
if (actual_a > -1) {
u_strFromWCS(a, asz*4 + 1, &actual_a, aw, -1, &status);
iter = ucol_openElements(self->collator, a, actual_a, &status);
if (iter != NULL && U_SUCCESS(status)) {
order = ucol_next(iter, &status);
len = ucol_getOffset(iter);
ucol_closeElements(iter); iter = NULL;
}
}
free(a); free(aw);
return Py_BuildValue("ii", order, len);
} // }}}
static PyObject*
icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs);
@ -338,6 +374,10 @@ static PyMethodDef icu_Collator_methods[] = {
"startswith(a, b) -> returns True iff a startswith b, following the current collation rules."
},
{"collation_order", (PyCFunction)icu_Collator_collation_order, METH_VARARGS,
"collation_order(string) -> returns (order, length) where order is an integer that gives the position of string in a list. length gives the number of characters used for order."
},
{NULL} /* Sentinel */
};

View File

@ -75,6 +75,7 @@ def icu_sort_key(collator, obj):
except AttributeError:
return secondary_collator().sort_key(obj)
def py_find(pattern, source):
pos = source.find(pattern)
if pos > -1:
@ -126,6 +127,12 @@ def icu_contractions(collator):
_cmap[collator] = ans
return ans
def icu_collation_order(collator, a):
try:
return collator.collation_order(a)
except TypeError:
return collator.collation_order(unicode(a))
load_icu()
load_collator()
_icu_not_ok = _icu is None or _collator is None
@ -205,6 +212,14 @@ def primary_startswith(a, b):
except AttributeError:
return icu_startswith(primary_collator(), a, b)
def collation_order(a):
if _icu_not_ok:
return (ord(a[0]), 1) if a else (0, 0)
try:
return icu_collation_order(_secondary_collator, a)
except AttributeError:
return icu_collation_order(secondary_collator(), a)
################################################################################
def test(): # {{{