mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add span_contractions to icu API
This commit is contained in:
parent
16c1e6a102
commit
b620da0b26
@ -255,6 +255,36 @@ icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs)
|
|||||||
return Py_BuildValue("O", ans);
|
return Py_BuildValue("O", ans);
|
||||||
} // }}}
|
} // }}}
|
||||||
|
|
||||||
|
// Collator.span_contractions {{{
|
||||||
|
static PyObject *
|
||||||
|
icu_Collator_span_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs) {
|
||||||
|
int span_type;
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
PyObject *str;
|
||||||
|
size_t slen = 0;
|
||||||
|
wchar_t *buf;
|
||||||
|
UChar *s;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "Ui", &str, &span_type)) return NULL;
|
||||||
|
|
||||||
|
if (self->contractions == NULL) {
|
||||||
|
self->contractions = uset_open(1, 0);
|
||||||
|
if (self->contractions == NULL) return PyErr_NoMemory();
|
||||||
|
ucol_getContractionsAndExpansions(self->collator, self->contractions, NULL, 0, &status);
|
||||||
|
}
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
|
||||||
|
slen = PyUnicode_GetSize(str);
|
||||||
|
buf = (wchar_t*)calloc(slen*4 + 2, sizeof(wchar_t));
|
||||||
|
s = (UChar*)calloc(slen*4 + 2, sizeof(UChar));
|
||||||
|
if (buf == NULL || s == NULL) return PyErr_NoMemory();
|
||||||
|
slen = PyUnicode_AsWideChar((PyUnicodeObject*)str, buf, slen);
|
||||||
|
u_strFromWCS(s, slen*4+1, NULL, buf, slen, &status);
|
||||||
|
|
||||||
|
free(buf); free(s);
|
||||||
|
return Py_BuildValue("i", uset_span(self->contractions, s, slen, span_type));
|
||||||
|
} // }}}
|
||||||
|
|
||||||
static PyMethodDef icu_Collator_methods[] = {
|
static PyMethodDef icu_Collator_methods[] = {
|
||||||
{"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS,
|
{"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS,
|
||||||
"sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU."
|
"sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU."
|
||||||
@ -271,6 +301,11 @@ static PyMethodDef icu_Collator_methods[] = {
|
|||||||
{"contractions", (PyCFunction)icu_Collator_contractions, METH_VARARGS,
|
{"contractions", (PyCFunction)icu_Collator_contractions, METH_VARARGS,
|
||||||
"contractions() -> returns the contractions defined for this collator."
|
"contractions() -> returns the contractions defined for this collator."
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{"span_contractions", (PyCFunction)icu_Collator_span_contractions, METH_VARARGS,
|
||||||
|
"span_contractions(src, span_condition) -> returns the length of the initial substring according to span_condition in the set of contractions for this collator. Returns 0 if src does not fit the span_condition. The span_condition can be one of USET_SPAN_NOT_CONTAINED, USET_SPAN_CONTAINED, USET_SPAN_SIMPLE."
|
||||||
|
},
|
||||||
|
|
||||||
{NULL} /* Sentinel */
|
{NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -527,6 +562,9 @@ initicu(void)
|
|||||||
PyModule_AddObject(m, "Collator", (PyObject *)&icu_CollatorType);
|
PyModule_AddObject(m, "Collator", (PyObject *)&icu_CollatorType);
|
||||||
// uint8_t must be the same size as char
|
// uint8_t must be the same size as char
|
||||||
PyModule_AddIntConstant(m, "ok", (U_SUCCESS(status) && sizeof(uint8_t) == sizeof(char)) ? 1 : 0);
|
PyModule_AddIntConstant(m, "ok", (U_SUCCESS(status) && sizeof(uint8_t) == sizeof(char)) ? 1 : 0);
|
||||||
|
PyModule_AddIntConstant(m, "USET_SPAN_NOT_CONTAINED", USET_SPAN_NOT_CONTAINED);
|
||||||
|
PyModule_AddIntConstant(m, "USET_SPAN_CONTAINED", USET_SPAN_CONTAINED);
|
||||||
|
PyModule_AddIntConstant(m, "USET_SPAN_SIMPLE", USET_SPAN_SIMPLE);
|
||||||
|
|
||||||
}
|
}
|
||||||
// }}}
|
// }}}
|
||||||
|
@ -104,6 +104,20 @@ def icu_contractions(collator):
|
|||||||
_cmap[collator] = ans
|
_cmap[collator] = ans
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def py_span_contractions(*args, **kwargs):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def icu_span_contractions(src, span_type=None, collator=None):
|
||||||
|
global _collator
|
||||||
|
if collator is None:
|
||||||
|
collator = _collator
|
||||||
|
if span_type is None:
|
||||||
|
span_type = _icu.USET_SPAN_SIMPLE
|
||||||
|
try:
|
||||||
|
return collator.span_contractions(src, span_type)
|
||||||
|
except TypeError:
|
||||||
|
return collator.span_contractions(unicode(src), span_type)
|
||||||
|
|
||||||
load_icu()
|
load_icu()
|
||||||
load_collator()
|
load_collator()
|
||||||
_icu_not_ok = _icu is None or _collator is None
|
_icu_not_ok = _icu is None or _collator is None
|
||||||
@ -144,6 +158,9 @@ find = (py_find if _icu_not_ok else partial(icu_find, _collator))
|
|||||||
contractions = ((lambda : {}) if _icu_not_ok else (partial(icu_contractions,
|
contractions = ((lambda : {}) if _icu_not_ok else (partial(icu_contractions,
|
||||||
_collator)))
|
_collator)))
|
||||||
|
|
||||||
|
span_contractions = (py_span_contractions if _icu_not_ok else
|
||||||
|
icu_span_contractions)
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
||||||
def test(): # {{{
|
def test(): # {{{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user