mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add an ICU implementation of strcmp
This commit is contained in:
parent
c1c62e5fd3
commit
a6ad9f2c96
@ -133,11 +133,47 @@ icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) {
|
|||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Collator.strcmp {{{
|
||||||
|
static PyObject *
|
||||||
|
icu_Collator_strcmp(icu_Collator *self, PyObject *args, PyObject *kwargs) {
|
||||||
|
char *a_, *b_;
|
||||||
|
size_t asz, bsz;
|
||||||
|
UChar *a, *b;
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
UCollationResult res = UCOL_EQUAL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "eses", "UTF-8", &a_, "UTF-8", &b_)) return NULL;
|
||||||
|
|
||||||
|
asz = strlen(a_); bsz = strlen(b_);
|
||||||
|
|
||||||
|
a = (UChar*)calloc(asz*4 + 1, sizeof(UChar));
|
||||||
|
b = (UChar*)calloc(bsz*4 + 1, sizeof(UChar));
|
||||||
|
|
||||||
|
|
||||||
|
if (a == NULL || b == NULL) return PyErr_NoMemory();
|
||||||
|
|
||||||
|
u_strFromUTF8(a, asz*4 + 1, NULL, a_, asz, &status);
|
||||||
|
u_strFromUTF8(b, bsz*4 + 1, NULL, b_, bsz, &status);
|
||||||
|
PyMem_Free(a_); PyMem_Free(b_);
|
||||||
|
|
||||||
|
if (U_SUCCESS(status))
|
||||||
|
res = ucol_strcoll(self->collator, a, -1, b, -1);
|
||||||
|
|
||||||
|
free(a); free(b);
|
||||||
|
|
||||||
|
return Py_BuildValue("i", res);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyMethodDef icu_Collator_methods[] = {
|
static PyMethodDef icu_Collator_methods[] = {
|
||||||
{"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS,
|
{"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS,
|
||||||
"sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU."
|
"sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU."
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{"strcmp", (PyCFunction)icu_Collator_strcmp, METH_VARARGS,
|
||||||
|
"strcmp(unicode object, unicode object) -> strcmp(a, b) <=> cmp(sorty_key(a), sort_key(b)), but faster."
|
||||||
|
},
|
||||||
|
|
||||||
{NULL} /* Sentinel */
|
{NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -46,10 +46,35 @@ def icu_sort_key(collator, obj):
|
|||||||
return _none2
|
return _none2
|
||||||
return collator.sort_key(obj.lower())
|
return collator.sort_key(obj.lower())
|
||||||
|
|
||||||
|
def py_case_sensitive_sort_key(obj):
|
||||||
|
if not obj:
|
||||||
|
return _none
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def icu_case_sensitive_sort_key(collator, obj):
|
||||||
|
if not obj:
|
||||||
|
return _none2
|
||||||
|
return collator.sort_key(obj)
|
||||||
|
|
||||||
|
def icu_strcmp(collator, a, b):
|
||||||
|
return collator.strcmp(a.lower(), b.lower())
|
||||||
|
|
||||||
|
def py_strcmp(a, b):
|
||||||
|
return cmp(a.lower(), b.lower())
|
||||||
|
|
||||||
|
def icu_case_sensitive_strcmp(collator, a, b):
|
||||||
|
return collator.strcmp(a, b)
|
||||||
|
|
||||||
|
|
||||||
load_icu()
|
load_icu()
|
||||||
load_collator()
|
load_collator()
|
||||||
sort_key = py_sort_key if _icu is None or _collator is None else \
|
_icu_not_ok = _icu is None or _collator is None
|
||||||
partial(icu_sort_key, _collator)
|
|
||||||
|
sort_key = py_sort_key if _icu_not_ok else partial(icu_sort_key, _collator)
|
||||||
|
strcmp = py_strcmp if _icu_not_ok else partial(icu_strcmp, _collator)
|
||||||
|
case_sensitive_sort_key = py_case_sensitive_sort_key if _icu_not_ok else \
|
||||||
|
icu_case_sensitive_sort_key
|
||||||
|
case_sensitive_strcmp = cmp if _icu_not_ok else icu_case_sensitive_strcmp
|
||||||
|
|
||||||
|
|
||||||
def test(): # {{{
|
def test(): # {{{
|
||||||
@ -137,6 +162,12 @@ pêché'''
|
|||||||
l = l.decode('utf-8').splitlines()
|
l = l.decode('utf-8').splitlines()
|
||||||
return [x.strip() for x in l if x.strip()]
|
return [x.strip() for x in l if x.strip()]
|
||||||
|
|
||||||
|
def test_strcmp(entries):
|
||||||
|
for x in entries:
|
||||||
|
for y in entries:
|
||||||
|
if strcmp(x, y) != cmp(sort_key(x), sort_key(y)):
|
||||||
|
print 'strcmp failed for %r, %r'%(x, y)
|
||||||
|
|
||||||
german = create(german)
|
german = create(german)
|
||||||
c = _icu.Collator('de')
|
c = _icu.Collator('de')
|
||||||
print 'Sorted german:: (%s)'%c.actual_locale
|
print 'Sorted german:: (%s)'%c.actual_locale
|
||||||
@ -156,5 +187,6 @@ pêché'''
|
|||||||
if fs != create(french_good):
|
if fs != create(french_good):
|
||||||
print 'French failed (note that French fails with icu < 4.6 i.e. on windows and OS X)'
|
print 'French failed (note that French fails with icu < 4.6 i.e. on windows and OS X)'
|
||||||
return
|
return
|
||||||
|
test_strcmp(german + french)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user