diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 675d72fb87..f4d820bff4 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -32,18 +32,18 @@ icu_Collator_new(PyTypeObject *type, PyObject *args, PyObject *kwds) icu_Collator *self; const char *loc; UErrorCode status = U_ZERO_ERROR; + UCollator *collator; if (!PyArg_ParseTuple(args, "s", &loc)) return NULL; + collator = ucol_open(loc, &status); + if (collator == NULL || U_FAILURE(status)) { + PyErr_SetString(PyExc_Exception, "Failed to create collator."); + return NULL; + } self = (icu_Collator *)type->tp_alloc(type, 0); if (self != NULL) { - self->collator = ucol_open(loc, &status); - if (self->collator == NULL || U_FAILURE(status)) { - PyErr_SetString(PyExc_Exception, "Failed to create collator."); - self->collator = NULL; - Py_DECREF(self); - return NULL; - } + self->collator = collator; self->contractions = NULL; } @@ -302,6 +302,10 @@ icu_Collator_span_contractions(icu_Collator *self, PyObject *args, PyObject *kwa return Py_BuildValue("i", uset_span(self->contractions, s, slen, span_type)); } // }}} + +static PyObject* +icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs); + static PyMethodDef icu_Collator_methods[] = { {"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS, "sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU." @@ -323,6 +327,10 @@ static PyMethodDef icu_Collator_methods[] = { "span_contractions(src, span_condition) -> returns the length of the initial substring according to span_condition in the set of contractions for this collator. Returns 0 if src does not fit the span_condition. The span_condition can be one of USET_SPAN_NOT_CONTAINED, USET_SPAN_CONTAINED, USET_SPAN_SIMPLE." }, + {"clone", (PyCFunction)icu_Collator_clone, METH_VARARGS, + "clone() -> returns a clone of this collator." + }, + {NULL} /* Sentinel */ }; @@ -390,6 +398,31 @@ static PyTypeObject icu_CollatorType = { // {{{ // }} +// Collator.clone {{{ +static PyObject* +icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs) +{ + UCollator *collator; + UErrorCode status = U_ZERO_ERROR; + int32_t bufsize = -1; + icu_Collator *clone; + + collator = ucol_safeClone(self->collator, NULL, &bufsize, &status); + + if (collator == NULL || U_FAILURE(status)) { + PyErr_SetString(PyExc_Exception, "Failed to create collator."); + return NULL; + } + + clone = PyObject_New(icu_Collator, &icu_CollatorType); + if (clone == NULL) return PyErr_NoMemory(); + + clone->collator = collator; + clone->contractions = NULL; + + return (PyObject*) clone; + +} // }}} // }}} diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index b5e17042ae..d9ae3c602c 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -12,7 +12,7 @@ from functools import partial from calibre.constants import plugins from calibre.utils.config_base import tweaks -_icu = _collator = None +_icu = _collator = _primary_collator = None _locale = None _none = u'' @@ -48,6 +48,12 @@ def load_collator(): _collator = icu.Collator(get_locale()) return _collator +def primary_collator(): + global _primary_collator + if _primary_collator is None: + _primary_collator = _collator.clone() + _primary_collator.strength = _icu.UCOL_PRIMARY + return _primary_collator def py_sort_key(obj): if not obj: @@ -65,18 +71,11 @@ def py_find(pattern, source): return pos, len(pattern) return -1, -1 -def icu_find(collator, pattern, source, strength=None): - if strength is not None: - ostrength = collator.strength - collator.strength = strength +def icu_find(collator, pattern, source): try: - try: - return collator.find(pattern, source) - except TypeError: - return collator.find(unicode(pattern), unicode(source)) - finally: - if strength is not None: - collator.strength = ostrength + return collator.find(pattern, source) + except TypeError: + return collator.find(unicode(pattern), unicode(source)) def py_case_sensitive_sort_key(obj): if not obj: @@ -88,18 +87,8 @@ def icu_case_sensitive_sort_key(collator, obj): return _none2 return collator.sort_key(obj) -def icu_strcmp(collator, a, b, strength=None): - if strength is not None: - ostrength = collator.strength - collator.strength = strength - try: - s = collator.strength - if s >= _icu.UCOL_TERTIARY: - a, b = lower(a), lower(b) - return collator.strcmp(a, b) - finally: - if strength is not None: - collator.strength = ostrength +def icu_strcmp(collator, a, b): + return collator.strcmp(lower(a), lower(b)) def py_strcmp(a, b, strength=None): return cmp(a.lower(), b.lower()) @@ -183,14 +172,14 @@ def primary_strcmp(a, b): if _icu_not_ok: from calibre.utils.filenames import ascii_text return py_strcmp(ascii_text(a), ascii_text(b)) - return icu_strcmp(_collator, a, b, _icu.UCOL_PRIMARY) + return primary_collator().strcmp(a, b) def primary_find(pat, src): 'find that ignores case and accents on letters' if _icu_not_ok: from calibre.utils.filenames import ascii_text return py_find(ascii_text(pat), ascii_text(src)) - return icu_find(_collator, pat, src, _icu.UCOL_PRIMARY) + return icu_find(primary_collator(), pat, src) ################################################################################ @@ -315,6 +304,18 @@ pêché''' print 'Capitalize:', x, '->', 'py:', x.capitalize().encode('utf-8'), 'icu:', capitalize(x).encode('utf-8') print + print '\nTesting primary collation' + for k, v in {u'pèché': u'peche', u'flüße':u'flusse'}.iteritems(): + if primary_strcmp(k, v) != 0: + print 'primary_strcmp() failed with %s != %s'%(k, v) + if primary_find(v, u' '+k)[0] != 1: + print 'primary_find() failed with %s not in %s'%(v, k) + + global _primary_collator + _primary_collator = _icu.Collator('es') + if primary_strcmp(u'peña', u'pena') == 0: + print 'Primary collation in Spanish locale failed' + # }}} if __name__ == '__main__':