Add span_contractions to icu API

This commit is contained in:
Kovid Goyal 2012-07-03 23:09:47 +05:30
parent 16c1e6a102
commit b620da0b26
2 changed files with 55 additions and 0 deletions

View File

@ -255,6 +255,36 @@ icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs)
return Py_BuildValue("O", ans);
} // }}}
// Collator.span_contractions {{{
static PyObject *
icu_Collator_span_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs) {
int span_type;
UErrorCode status = U_ZERO_ERROR;
PyObject *str;
size_t slen = 0;
wchar_t *buf;
UChar *s;
if (!PyArg_ParseTuple(args, "Ui", &str, &span_type)) return NULL;
if (self->contractions == NULL) {
self->contractions = uset_open(1, 0);
if (self->contractions == NULL) return PyErr_NoMemory();
ucol_getContractionsAndExpansions(self->collator, self->contractions, NULL, 0, &status);
}
status = U_ZERO_ERROR;
slen = PyUnicode_GetSize(str);
buf = (wchar_t*)calloc(slen*4 + 2, sizeof(wchar_t));
s = (UChar*)calloc(slen*4 + 2, sizeof(UChar));
if (buf == NULL || s == NULL) return PyErr_NoMemory();
slen = PyUnicode_AsWideChar((PyUnicodeObject*)str, buf, slen);
u_strFromWCS(s, slen*4+1, NULL, buf, slen, &status);
free(buf); free(s);
return Py_BuildValue("i", uset_span(self->contractions, s, slen, span_type));
} // }}}
static PyMethodDef icu_Collator_methods[] = {
{"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS,
"sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU."
@ -271,6 +301,11 @@ static PyMethodDef icu_Collator_methods[] = {
{"contractions", (PyCFunction)icu_Collator_contractions, METH_VARARGS,
"contractions() -> returns the contractions defined for this collator."
},
{"span_contractions", (PyCFunction)icu_Collator_span_contractions, METH_VARARGS,
"span_contractions(src, span_condition) -> returns the length of the initial substring according to span_condition in the set of contractions for this collator. Returns 0 if src does not fit the span_condition. The span_condition can be one of USET_SPAN_NOT_CONTAINED, USET_SPAN_CONTAINED, USET_SPAN_SIMPLE."
},
{NULL} /* Sentinel */
};
@ -527,6 +562,9 @@ initicu(void)
PyModule_AddObject(m, "Collator", (PyObject *)&icu_CollatorType);
// uint8_t must be the same size as char
PyModule_AddIntConstant(m, "ok", (U_SUCCESS(status) && sizeof(uint8_t) == sizeof(char)) ? 1 : 0);
PyModule_AddIntConstant(m, "USET_SPAN_NOT_CONTAINED", USET_SPAN_NOT_CONTAINED);
PyModule_AddIntConstant(m, "USET_SPAN_CONTAINED", USET_SPAN_CONTAINED);
PyModule_AddIntConstant(m, "USET_SPAN_SIMPLE", USET_SPAN_SIMPLE);
}
// }}}

View File

@ -104,6 +104,20 @@ def icu_contractions(collator):
_cmap[collator] = ans
return ans
def py_span_contractions(*args, **kwargs):
return 0
def icu_span_contractions(src, span_type=None, collator=None):
global _collator
if collator is None:
collator = _collator
if span_type is None:
span_type = _icu.USET_SPAN_SIMPLE
try:
return collator.span_contractions(src, span_type)
except TypeError:
return collator.span_contractions(unicode(src), span_type)
load_icu()
load_collator()
_icu_not_ok = _icu is None or _collator is None
@ -144,6 +158,9 @@ find = (py_find if _icu_not_ok else partial(icu_find, _collator))
contractions = ((lambda : {}) if _icu_not_ok else (partial(icu_contractions,
_collator)))
span_contractions = (py_span_contractions if _icu_not_ok else
icu_span_contractions)
################################################################################
def test(): # {{{