mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Use a cloned collator for primary collation
This commit is contained in:
parent
f8dfd7fdda
commit
bb606bc3ab
@ -32,18 +32,18 @@ icu_Collator_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
icu_Collator *self;
|
||||
const char *loc;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCollator *collator;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "s", &loc)) return NULL;
|
||||
collator = ucol_open(loc, &status);
|
||||
if (collator == NULL || U_FAILURE(status)) {
|
||||
PyErr_SetString(PyExc_Exception, "Failed to create collator.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
self = (icu_Collator *)type->tp_alloc(type, 0);
|
||||
if (self != NULL) {
|
||||
self->collator = ucol_open(loc, &status);
|
||||
if (self->collator == NULL || U_FAILURE(status)) {
|
||||
PyErr_SetString(PyExc_Exception, "Failed to create collator.");
|
||||
self->collator = NULL;
|
||||
Py_DECREF(self);
|
||||
return NULL;
|
||||
}
|
||||
self->collator = collator;
|
||||
self->contractions = NULL;
|
||||
}
|
||||
|
||||
@ -302,6 +302,10 @@ icu_Collator_span_contractions(icu_Collator *self, PyObject *args, PyObject *kwa
|
||||
return Py_BuildValue("i", uset_span(self->contractions, s, slen, span_type));
|
||||
} // }}}
|
||||
|
||||
|
||||
static PyObject*
|
||||
icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs);
|
||||
|
||||
static PyMethodDef icu_Collator_methods[] = {
|
||||
{"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS,
|
||||
"sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU."
|
||||
@ -323,6 +327,10 @@ static PyMethodDef icu_Collator_methods[] = {
|
||||
"span_contractions(src, span_condition) -> returns the length of the initial substring according to span_condition in the set of contractions for this collator. Returns 0 if src does not fit the span_condition. The span_condition can be one of USET_SPAN_NOT_CONTAINED, USET_SPAN_CONTAINED, USET_SPAN_SIMPLE."
|
||||
},
|
||||
|
||||
{"clone", (PyCFunction)icu_Collator_clone, METH_VARARGS,
|
||||
"clone() -> returns a clone of this collator."
|
||||
},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
@ -390,6 +398,31 @@ static PyTypeObject icu_CollatorType = { // {{{
|
||||
|
||||
// }}
|
||||
|
||||
// Collator.clone {{{
|
||||
static PyObject*
|
||||
icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
UCollator *collator;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t bufsize = -1;
|
||||
icu_Collator *clone;
|
||||
|
||||
collator = ucol_safeClone(self->collator, NULL, &bufsize, &status);
|
||||
|
||||
if (collator == NULL || U_FAILURE(status)) {
|
||||
PyErr_SetString(PyExc_Exception, "Failed to create collator.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
clone = PyObject_New(icu_Collator, &icu_CollatorType);
|
||||
if (clone == NULL) return PyErr_NoMemory();
|
||||
|
||||
clone->collator = collator;
|
||||
clone->contractions = NULL;
|
||||
|
||||
return (PyObject*) clone;
|
||||
|
||||
} // }}}
|
||||
|
||||
// }}}
|
||||
|
||||
|
@ -12,7 +12,7 @@ from functools import partial
|
||||
from calibre.constants import plugins
|
||||
from calibre.utils.config_base import tweaks
|
||||
|
||||
_icu = _collator = None
|
||||
_icu = _collator = _primary_collator = None
|
||||
_locale = None
|
||||
|
||||
_none = u''
|
||||
@ -48,6 +48,12 @@ def load_collator():
|
||||
_collator = icu.Collator(get_locale())
|
||||
return _collator
|
||||
|
||||
def primary_collator():
|
||||
global _primary_collator
|
||||
if _primary_collator is None:
|
||||
_primary_collator = _collator.clone()
|
||||
_primary_collator.strength = _icu.UCOL_PRIMARY
|
||||
return _primary_collator
|
||||
|
||||
def py_sort_key(obj):
|
||||
if not obj:
|
||||
@ -65,18 +71,11 @@ def py_find(pattern, source):
|
||||
return pos, len(pattern)
|
||||
return -1, -1
|
||||
|
||||
def icu_find(collator, pattern, source, strength=None):
|
||||
if strength is not None:
|
||||
ostrength = collator.strength
|
||||
collator.strength = strength
|
||||
def icu_find(collator, pattern, source):
|
||||
try:
|
||||
try:
|
||||
return collator.find(pattern, source)
|
||||
except TypeError:
|
||||
return collator.find(unicode(pattern), unicode(source))
|
||||
finally:
|
||||
if strength is not None:
|
||||
collator.strength = ostrength
|
||||
return collator.find(pattern, source)
|
||||
except TypeError:
|
||||
return collator.find(unicode(pattern), unicode(source))
|
||||
|
||||
def py_case_sensitive_sort_key(obj):
|
||||
if not obj:
|
||||
@ -88,18 +87,8 @@ def icu_case_sensitive_sort_key(collator, obj):
|
||||
return _none2
|
||||
return collator.sort_key(obj)
|
||||
|
||||
def icu_strcmp(collator, a, b, strength=None):
|
||||
if strength is not None:
|
||||
ostrength = collator.strength
|
||||
collator.strength = strength
|
||||
try:
|
||||
s = collator.strength
|
||||
if s >= _icu.UCOL_TERTIARY:
|
||||
a, b = lower(a), lower(b)
|
||||
return collator.strcmp(a, b)
|
||||
finally:
|
||||
if strength is not None:
|
||||
collator.strength = ostrength
|
||||
def icu_strcmp(collator, a, b):
|
||||
return collator.strcmp(lower(a), lower(b))
|
||||
|
||||
def py_strcmp(a, b, strength=None):
|
||||
return cmp(a.lower(), b.lower())
|
||||
@ -183,14 +172,14 @@ def primary_strcmp(a, b):
|
||||
if _icu_not_ok:
|
||||
from calibre.utils.filenames import ascii_text
|
||||
return py_strcmp(ascii_text(a), ascii_text(b))
|
||||
return icu_strcmp(_collator, a, b, _icu.UCOL_PRIMARY)
|
||||
return primary_collator().strcmp(a, b)
|
||||
|
||||
def primary_find(pat, src):
|
||||
'find that ignores case and accents on letters'
|
||||
if _icu_not_ok:
|
||||
from calibre.utils.filenames import ascii_text
|
||||
return py_find(ascii_text(pat), ascii_text(src))
|
||||
return icu_find(_collator, pat, src, _icu.UCOL_PRIMARY)
|
||||
return icu_find(primary_collator(), pat, src)
|
||||
|
||||
################################################################################
|
||||
|
||||
@ -315,6 +304,18 @@ pêché'''
|
||||
print 'Capitalize:', x, '->', 'py:', x.capitalize().encode('utf-8'), 'icu:', capitalize(x).encode('utf-8')
|
||||
print
|
||||
|
||||
print '\nTesting primary collation'
|
||||
for k, v in {u'pèché': u'peche', u'flüße':u'flusse'}.iteritems():
|
||||
if primary_strcmp(k, v) != 0:
|
||||
print 'primary_strcmp() failed with %s != %s'%(k, v)
|
||||
if primary_find(v, u' '+k)[0] != 1:
|
||||
print 'primary_find() failed with %s not in %s'%(v, k)
|
||||
|
||||
global _primary_collator
|
||||
_primary_collator = _icu.Collator('es')
|
||||
if primary_strcmp(u'peña', u'pena') == 0:
|
||||
print 'Primary collation in Spanish locale failed'
|
||||
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
x
Reference in New Issue
Block a user