mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Use a cloned collator for primary collation
This commit is contained in:
parent
f8dfd7fdda
commit
bb606bc3ab
@ -32,18 +32,18 @@ icu_Collator_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||||||
icu_Collator *self;
|
icu_Collator *self;
|
||||||
const char *loc;
|
const char *loc;
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
UCollator *collator;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "s", &loc)) return NULL;
|
if (!PyArg_ParseTuple(args, "s", &loc)) return NULL;
|
||||||
|
collator = ucol_open(loc, &status);
|
||||||
|
if (collator == NULL || U_FAILURE(status)) {
|
||||||
|
PyErr_SetString(PyExc_Exception, "Failed to create collator.");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
self = (icu_Collator *)type->tp_alloc(type, 0);
|
self = (icu_Collator *)type->tp_alloc(type, 0);
|
||||||
if (self != NULL) {
|
if (self != NULL) {
|
||||||
self->collator = ucol_open(loc, &status);
|
self->collator = collator;
|
||||||
if (self->collator == NULL || U_FAILURE(status)) {
|
|
||||||
PyErr_SetString(PyExc_Exception, "Failed to create collator.");
|
|
||||||
self->collator = NULL;
|
|
||||||
Py_DECREF(self);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
self->contractions = NULL;
|
self->contractions = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -302,6 +302,10 @@ icu_Collator_span_contractions(icu_Collator *self, PyObject *args, PyObject *kwa
|
|||||||
return Py_BuildValue("i", uset_span(self->contractions, s, slen, span_type));
|
return Py_BuildValue("i", uset_span(self->contractions, s, slen, span_type));
|
||||||
} // }}}
|
} // }}}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs);
|
||||||
|
|
||||||
static PyMethodDef icu_Collator_methods[] = {
|
static PyMethodDef icu_Collator_methods[] = {
|
||||||
{"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS,
|
{"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS,
|
||||||
"sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU."
|
"sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU."
|
||||||
@ -323,6 +327,10 @@ static PyMethodDef icu_Collator_methods[] = {
|
|||||||
"span_contractions(src, span_condition) -> returns the length of the initial substring according to span_condition in the set of contractions for this collator. Returns 0 if src does not fit the span_condition. The span_condition can be one of USET_SPAN_NOT_CONTAINED, USET_SPAN_CONTAINED, USET_SPAN_SIMPLE."
|
"span_contractions(src, span_condition) -> returns the length of the initial substring according to span_condition in the set of contractions for this collator. Returns 0 if src does not fit the span_condition. The span_condition can be one of USET_SPAN_NOT_CONTAINED, USET_SPAN_CONTAINED, USET_SPAN_SIMPLE."
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{"clone", (PyCFunction)icu_Collator_clone, METH_VARARGS,
|
||||||
|
"clone() -> returns a clone of this collator."
|
||||||
|
},
|
||||||
|
|
||||||
{NULL} /* Sentinel */
|
{NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -390,6 +398,31 @@ static PyTypeObject icu_CollatorType = { // {{{
|
|||||||
|
|
||||||
// }}
|
// }}
|
||||||
|
|
||||||
|
// Collator.clone {{{
|
||||||
|
static PyObject*
|
||||||
|
icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs)
|
||||||
|
{
|
||||||
|
UCollator *collator;
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
int32_t bufsize = -1;
|
||||||
|
icu_Collator *clone;
|
||||||
|
|
||||||
|
collator = ucol_safeClone(self->collator, NULL, &bufsize, &status);
|
||||||
|
|
||||||
|
if (collator == NULL || U_FAILURE(status)) {
|
||||||
|
PyErr_SetString(PyExc_Exception, "Failed to create collator.");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
clone = PyObject_New(icu_Collator, &icu_CollatorType);
|
||||||
|
if (clone == NULL) return PyErr_NoMemory();
|
||||||
|
|
||||||
|
clone->collator = collator;
|
||||||
|
clone->contractions = NULL;
|
||||||
|
|
||||||
|
return (PyObject*) clone;
|
||||||
|
|
||||||
|
} // }}}
|
||||||
|
|
||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ from functools import partial
|
|||||||
from calibre.constants import plugins
|
from calibre.constants import plugins
|
||||||
from calibre.utils.config_base import tweaks
|
from calibre.utils.config_base import tweaks
|
||||||
|
|
||||||
_icu = _collator = None
|
_icu = _collator = _primary_collator = None
|
||||||
_locale = None
|
_locale = None
|
||||||
|
|
||||||
_none = u''
|
_none = u''
|
||||||
@ -48,6 +48,12 @@ def load_collator():
|
|||||||
_collator = icu.Collator(get_locale())
|
_collator = icu.Collator(get_locale())
|
||||||
return _collator
|
return _collator
|
||||||
|
|
||||||
|
def primary_collator():
|
||||||
|
global _primary_collator
|
||||||
|
if _primary_collator is None:
|
||||||
|
_primary_collator = _collator.clone()
|
||||||
|
_primary_collator.strength = _icu.UCOL_PRIMARY
|
||||||
|
return _primary_collator
|
||||||
|
|
||||||
def py_sort_key(obj):
|
def py_sort_key(obj):
|
||||||
if not obj:
|
if not obj:
|
||||||
@ -65,18 +71,11 @@ def py_find(pattern, source):
|
|||||||
return pos, len(pattern)
|
return pos, len(pattern)
|
||||||
return -1, -1
|
return -1, -1
|
||||||
|
|
||||||
def icu_find(collator, pattern, source, strength=None):
|
def icu_find(collator, pattern, source):
|
||||||
if strength is not None:
|
|
||||||
ostrength = collator.strength
|
|
||||||
collator.strength = strength
|
|
||||||
try:
|
try:
|
||||||
try:
|
return collator.find(pattern, source)
|
||||||
return collator.find(pattern, source)
|
except TypeError:
|
||||||
except TypeError:
|
return collator.find(unicode(pattern), unicode(source))
|
||||||
return collator.find(unicode(pattern), unicode(source))
|
|
||||||
finally:
|
|
||||||
if strength is not None:
|
|
||||||
collator.strength = ostrength
|
|
||||||
|
|
||||||
def py_case_sensitive_sort_key(obj):
|
def py_case_sensitive_sort_key(obj):
|
||||||
if not obj:
|
if not obj:
|
||||||
@ -88,18 +87,8 @@ def icu_case_sensitive_sort_key(collator, obj):
|
|||||||
return _none2
|
return _none2
|
||||||
return collator.sort_key(obj)
|
return collator.sort_key(obj)
|
||||||
|
|
||||||
def icu_strcmp(collator, a, b, strength=None):
|
def icu_strcmp(collator, a, b):
|
||||||
if strength is not None:
|
return collator.strcmp(lower(a), lower(b))
|
||||||
ostrength = collator.strength
|
|
||||||
collator.strength = strength
|
|
||||||
try:
|
|
||||||
s = collator.strength
|
|
||||||
if s >= _icu.UCOL_TERTIARY:
|
|
||||||
a, b = lower(a), lower(b)
|
|
||||||
return collator.strcmp(a, b)
|
|
||||||
finally:
|
|
||||||
if strength is not None:
|
|
||||||
collator.strength = ostrength
|
|
||||||
|
|
||||||
def py_strcmp(a, b, strength=None):
|
def py_strcmp(a, b, strength=None):
|
||||||
return cmp(a.lower(), b.lower())
|
return cmp(a.lower(), b.lower())
|
||||||
@ -183,14 +172,14 @@ def primary_strcmp(a, b):
|
|||||||
if _icu_not_ok:
|
if _icu_not_ok:
|
||||||
from calibre.utils.filenames import ascii_text
|
from calibre.utils.filenames import ascii_text
|
||||||
return py_strcmp(ascii_text(a), ascii_text(b))
|
return py_strcmp(ascii_text(a), ascii_text(b))
|
||||||
return icu_strcmp(_collator, a, b, _icu.UCOL_PRIMARY)
|
return primary_collator().strcmp(a, b)
|
||||||
|
|
||||||
def primary_find(pat, src):
|
def primary_find(pat, src):
|
||||||
'find that ignores case and accents on letters'
|
'find that ignores case and accents on letters'
|
||||||
if _icu_not_ok:
|
if _icu_not_ok:
|
||||||
from calibre.utils.filenames import ascii_text
|
from calibre.utils.filenames import ascii_text
|
||||||
return py_find(ascii_text(pat), ascii_text(src))
|
return py_find(ascii_text(pat), ascii_text(src))
|
||||||
return icu_find(_collator, pat, src, _icu.UCOL_PRIMARY)
|
return icu_find(primary_collator(), pat, src)
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
||||||
@ -315,6 +304,18 @@ pêché'''
|
|||||||
print 'Capitalize:', x, '->', 'py:', x.capitalize().encode('utf-8'), 'icu:', capitalize(x).encode('utf-8')
|
print 'Capitalize:', x, '->', 'py:', x.capitalize().encode('utf-8'), 'icu:', capitalize(x).encode('utf-8')
|
||||||
print
|
print
|
||||||
|
|
||||||
|
print '\nTesting primary collation'
|
||||||
|
for k, v in {u'pèché': u'peche', u'flüße':u'flusse'}.iteritems():
|
||||||
|
if primary_strcmp(k, v) != 0:
|
||||||
|
print 'primary_strcmp() failed with %s != %s'%(k, v)
|
||||||
|
if primary_find(v, u' '+k)[0] != 1:
|
||||||
|
print 'primary_find() failed with %s not in %s'%(v, k)
|
||||||
|
|
||||||
|
global _primary_collator
|
||||||
|
_primary_collator = _icu.Collator('es')
|
||||||
|
if primary_strcmp(u'peña', u'pena') == 0:
|
||||||
|
print 'Primary collation in Spanish locale failed'
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user