mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Use a secondary collator for sort_key and implement primary_startswith
This commit is contained in:
parent
5395cda9c7
commit
4bed21a52f
@ -272,6 +272,44 @@ icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs)
|
||||
return Py_BuildValue("O", ans);
|
||||
} // }}}
|
||||
|
||||
// Collator.startswith {{{
|
||||
static PyObject *
|
||||
icu_Collator_startswith(icu_Collator *self, PyObject *args, PyObject *kwargs) {
|
||||
PyObject *a_, *b_;
|
||||
size_t asz, bsz;
|
||||
int32_t actual_a, actual_b;
|
||||
UChar *a, *b;
|
||||
wchar_t *aw, *bw;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int ans = 0;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL;
|
||||
asz = PyUnicode_GetSize(a_); bsz = PyUnicode_GetSize(b_);
|
||||
if (asz < bsz) Py_RETURN_FALSE;
|
||||
if (bsz == 0) Py_RETURN_TRUE;
|
||||
|
||||
a = (UChar*)calloc(asz*4 + 2, sizeof(UChar));
|
||||
b = (UChar*)calloc(bsz*4 + 2, sizeof(UChar));
|
||||
aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t));
|
||||
bw = (wchar_t*)calloc(bsz*4 + 2, sizeof(wchar_t));
|
||||
|
||||
if (a == NULL || b == NULL || aw == NULL || bw == NULL) return PyErr_NoMemory();
|
||||
|
||||
actual_a = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1);
|
||||
actual_b = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)b_, bw, bsz*4+1);
|
||||
if (actual_a > -1 && actual_b > -1) {
|
||||
u_strFromWCS(a, asz*4 + 1, &actual_a, aw, -1, &status);
|
||||
u_strFromWCS(b, bsz*4 + 1, &actual_b, bw, -1, &status);
|
||||
|
||||
if (U_SUCCESS(status) && ucol_equal(self->collator, a, actual_b, b, actual_b))
|
||||
ans = 1;
|
||||
}
|
||||
|
||||
free(a); free(b); free(aw); free(bw);
|
||||
if (ans) Py_RETURN_TRUE;
|
||||
Py_RETURN_FALSE;
|
||||
} // }}}
|
||||
|
||||
static PyObject*
|
||||
icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs);
|
||||
|
||||
@ -296,6 +334,10 @@ static PyMethodDef icu_Collator_methods[] = {
|
||||
"clone() -> returns a clone of this collator."
|
||||
},
|
||||
|
||||
{"startswith", (PyCFunction)icu_Collator_startswith, METH_VARARGS,
|
||||
"startswith(a, b) -> returns True iff a startswith b, following the current collation rules."
|
||||
},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
@ -12,7 +12,7 @@ from functools import partial
|
||||
from calibre.constants import plugins
|
||||
from calibre.utils.config_base import tweaks
|
||||
|
||||
_icu = _collator = _primary_collator = None
|
||||
_icu = _collator = _primary_collator = _secondary_collator = None
|
||||
_locale = None
|
||||
|
||||
_none = u''
|
||||
@ -55,6 +55,13 @@ def primary_collator():
|
||||
_primary_collator.strength = _icu.UCOL_PRIMARY
|
||||
return _primary_collator
|
||||
|
||||
def secondary_collator():
|
||||
global _secondary_collator
|
||||
if _secondary_collator is None:
|
||||
_secondary_collator = _collator.clone()
|
||||
_secondary_collator.strength = _icu.UCOL_SECONDARY
|
||||
return _secondary_collator
|
||||
|
||||
def py_sort_key(obj):
|
||||
if not obj:
|
||||
return _none
|
||||
@ -63,7 +70,10 @@ def py_sort_key(obj):
|
||||
def icu_sort_key(collator, obj):
|
||||
if not obj:
|
||||
return _none2
|
||||
return collator.sort_key(lower(obj))
|
||||
try:
|
||||
return _secondary_collator.sort_key(obj)
|
||||
except AttributeError:
|
||||
return secondary_collator().sort_key(obj)
|
||||
|
||||
def py_find(pattern, source):
|
||||
pos = source.find(pattern)
|
||||
@ -77,6 +87,12 @@ def icu_find(collator, pattern, source):
|
||||
except TypeError:
|
||||
return collator.find(unicode(pattern), unicode(source))
|
||||
|
||||
def icu_startswith(collator, a, b):
|
||||
try:
|
||||
return collator.startswith(a, b)
|
||||
except TypeError:
|
||||
return collator.startswith(unicode(a), unicode(b))
|
||||
|
||||
def py_case_sensitive_sort_key(obj):
|
||||
if not obj:
|
||||
return _none
|
||||
@ -180,6 +196,15 @@ def primary_sort_key(val):
|
||||
except AttributeError:
|
||||
return primary_collator().sort_key(val)
|
||||
|
||||
def primary_startswith(a, b):
|
||||
if _icu_not_ok:
|
||||
from calibre.utils.filenames import ascii_text
|
||||
return ascii_text(a).lower().startswith(ascii_text(b).lower())
|
||||
try:
|
||||
return icu_startswith(_primary_collator, a, b)
|
||||
except AttributeError:
|
||||
return icu_startswith(primary_collator(), a, b)
|
||||
|
||||
################################################################################
|
||||
|
||||
def test(): # {{{
|
||||
@ -299,8 +324,8 @@ pêché'''
|
||||
print
|
||||
|
||||
print '\nTesting primary collation'
|
||||
for k, v in {u'pèché': u'peche', u'flüße':u'flusse',
|
||||
u'Štepánek':u'Štepanek'}.iteritems():
|
||||
for k, v in {u'pèché': u'peche', u'flüße':u'Flusse',
|
||||
u'Štepánek':u'ŠtepaneK'}.iteritems():
|
||||
if primary_strcmp(k, v) != 0:
|
||||
prints('primary_strcmp() failed with %s != %s'%(k, v))
|
||||
return
|
||||
@ -309,10 +334,12 @@ pêché'''
|
||||
return
|
||||
|
||||
global _primary_collator
|
||||
orig = _primary_collator
|
||||
_primary_collator = _icu.Collator('es')
|
||||
if primary_strcmp(u'peña', u'pena') == 0:
|
||||
print 'Primary collation in Spanish locale failed'
|
||||
return
|
||||
_primary_collator = orig
|
||||
|
||||
print '\nTesting contractions'
|
||||
c = _icu.Collator('cs')
|
||||
@ -322,6 +349,13 @@ pêché'''
|
||||
print 'Contractions for the Czech language failed'
|
||||
return
|
||||
|
||||
print '\nTesting startswith'
|
||||
p = primary_startswith
|
||||
if (not p('asd', 'asd') or not p('asd', 'A') or
|
||||
not p('x', '')):
|
||||
print 'startswith() failed'
|
||||
return
|
||||
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
x
Reference in New Issue
Block a user