Use a secondary collator for sort_key and implement primary_startswith

This commit is contained in:
Kovid Goyal 2012-07-08 19:39:56 +05:30
parent 5395cda9c7
commit 4bed21a52f
2 changed files with 80 additions and 4 deletions

View File

@ -272,6 +272,44 @@ icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs)
return Py_BuildValue("O", ans); return Py_BuildValue("O", ans);
} // }}} } // }}}
// Collator.startswith {{{
static PyObject *
icu_Collator_startswith(icu_Collator *self, PyObject *args, PyObject *kwargs) {
PyObject *a_, *b_;
size_t asz, bsz;
int32_t actual_a, actual_b;
UChar *a, *b;
wchar_t *aw, *bw;
UErrorCode status = U_ZERO_ERROR;
int ans = 0;
if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL;
asz = PyUnicode_GetSize(a_); bsz = PyUnicode_GetSize(b_);
if (asz < bsz) Py_RETURN_FALSE;
if (bsz == 0) Py_RETURN_TRUE;
a = (UChar*)calloc(asz*4 + 2, sizeof(UChar));
b = (UChar*)calloc(bsz*4 + 2, sizeof(UChar));
aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t));
bw = (wchar_t*)calloc(bsz*4 + 2, sizeof(wchar_t));
if (a == NULL || b == NULL || aw == NULL || bw == NULL) return PyErr_NoMemory();
actual_a = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1);
actual_b = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)b_, bw, bsz*4+1);
if (actual_a > -1 && actual_b > -1) {
u_strFromWCS(a, asz*4 + 1, &actual_a, aw, -1, &status);
u_strFromWCS(b, bsz*4 + 1, &actual_b, bw, -1, &status);
if (U_SUCCESS(status) && ucol_equal(self->collator, a, actual_b, b, actual_b))
ans = 1;
}
free(a); free(b); free(aw); free(bw);
if (ans) Py_RETURN_TRUE;
Py_RETURN_FALSE;
} // }}}
static PyObject* static PyObject*
icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs); icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs);
@ -296,6 +334,10 @@ static PyMethodDef icu_Collator_methods[] = {
"clone() -> returns a clone of this collator." "clone() -> returns a clone of this collator."
}, },
{"startswith", (PyCFunction)icu_Collator_startswith, METH_VARARGS,
"startswith(a, b) -> returns True iff a startswith b, following the current collation rules."
},
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };

View File

@ -12,7 +12,7 @@ from functools import partial
from calibre.constants import plugins from calibre.constants import plugins
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
_icu = _collator = _primary_collator = None _icu = _collator = _primary_collator = _secondary_collator = None
_locale = None _locale = None
_none = u'' _none = u''
@ -55,6 +55,13 @@ def primary_collator():
_primary_collator.strength = _icu.UCOL_PRIMARY _primary_collator.strength = _icu.UCOL_PRIMARY
return _primary_collator return _primary_collator
def secondary_collator():
global _secondary_collator
if _secondary_collator is None:
_secondary_collator = _collator.clone()
_secondary_collator.strength = _icu.UCOL_SECONDARY
return _secondary_collator
def py_sort_key(obj): def py_sort_key(obj):
if not obj: if not obj:
return _none return _none
@ -63,7 +70,10 @@ def py_sort_key(obj):
def icu_sort_key(collator, obj): def icu_sort_key(collator, obj):
if not obj: if not obj:
return _none2 return _none2
return collator.sort_key(lower(obj)) try:
return _secondary_collator.sort_key(obj)
except AttributeError:
return secondary_collator().sort_key(obj)
def py_find(pattern, source): def py_find(pattern, source):
pos = source.find(pattern) pos = source.find(pattern)
@ -77,6 +87,12 @@ def icu_find(collator, pattern, source):
except TypeError: except TypeError:
return collator.find(unicode(pattern), unicode(source)) return collator.find(unicode(pattern), unicode(source))
def icu_startswith(collator, a, b):
try:
return collator.startswith(a, b)
except TypeError:
return collator.startswith(unicode(a), unicode(b))
def py_case_sensitive_sort_key(obj): def py_case_sensitive_sort_key(obj):
if not obj: if not obj:
return _none return _none
@ -180,6 +196,15 @@ def primary_sort_key(val):
except AttributeError: except AttributeError:
return primary_collator().sort_key(val) return primary_collator().sort_key(val)
def primary_startswith(a, b):
if _icu_not_ok:
from calibre.utils.filenames import ascii_text
return ascii_text(a).lower().startswith(ascii_text(b).lower())
try:
return icu_startswith(_primary_collator, a, b)
except AttributeError:
return icu_startswith(primary_collator(), a, b)
################################################################################ ################################################################################
def test(): # {{{ def test(): # {{{
@ -299,8 +324,8 @@ pêché'''
print print
print '\nTesting primary collation' print '\nTesting primary collation'
for k, v in {u'pèché': u'peche', u'flüße':u'flusse', for k, v in {u'pèché': u'peche', u'flüße':u'Flusse',
u'Štepánek':u'Štepanek'}.iteritems(): u'Štepánek':u'ŠtepaneK'}.iteritems():
if primary_strcmp(k, v) != 0: if primary_strcmp(k, v) != 0:
prints('primary_strcmp() failed with %s != %s'%(k, v)) prints('primary_strcmp() failed with %s != %s'%(k, v))
return return
@ -309,10 +334,12 @@ pêché'''
return return
global _primary_collator global _primary_collator
orig = _primary_collator
_primary_collator = _icu.Collator('es') _primary_collator = _icu.Collator('es')
if primary_strcmp(u'peña', u'pena') == 0: if primary_strcmp(u'peña', u'pena') == 0:
print 'Primary collation in Spanish locale failed' print 'Primary collation in Spanish locale failed'
return return
_primary_collator = orig
print '\nTesting contractions' print '\nTesting contractions'
c = _icu.Collator('cs') c = _icu.Collator('cs')
@ -322,6 +349,13 @@ pêché'''
print 'Contractions for the Czech language failed' print 'Contractions for the Czech language failed'
return return
print '\nTesting startswith'
p = primary_startswith
if (not p('asd', 'asd') or not p('asd', 'A') or
not p('x', '')):
print 'startswith() failed'
return
# }}} # }}}
if __name__ == '__main__': if __name__ == '__main__':