From 4bed21a52f4c35c99e3667cabe37834f0873385d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Jul 2012 19:39:56 +0530 Subject: [PATCH] Use a secondary collator for sort_key and implement primary_startswith --- src/calibre/utils/icu.c | 42 ++++++++++++++++++++++++++++++++++++++++ src/calibre/utils/icu.py | 42 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 8e8a8e9ec8..c451e9cdac 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -272,6 +272,44 @@ icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs) return Py_BuildValue("O", ans); } // }}} +// Collator.startswith {{{ +static PyObject * +icu_Collator_startswith(icu_Collator *self, PyObject *args, PyObject *kwargs) { + PyObject *a_, *b_; + size_t asz, bsz; + int32_t actual_a, actual_b; + UChar *a, *b; + wchar_t *aw, *bw; + UErrorCode status = U_ZERO_ERROR; + int ans = 0; + + if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL; + asz = PyUnicode_GetSize(a_); bsz = PyUnicode_GetSize(b_); + if (asz < bsz) Py_RETURN_FALSE; + if (bsz == 0) Py_RETURN_TRUE; + + a = (UChar*)calloc(asz*4 + 2, sizeof(UChar)); + b = (UChar*)calloc(bsz*4 + 2, sizeof(UChar)); + aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t)); + bw = (wchar_t*)calloc(bsz*4 + 2, sizeof(wchar_t)); + + if (a == NULL || b == NULL || aw == NULL || bw == NULL) return PyErr_NoMemory(); + + actual_a = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1); + actual_b = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)b_, bw, bsz*4+1); + if (actual_a > -1 && actual_b > -1) { + u_strFromWCS(a, asz*4 + 1, &actual_a, aw, -1, &status); + u_strFromWCS(b, bsz*4 + 1, &actual_b, bw, -1, &status); + + if (U_SUCCESS(status) && ucol_equal(self->collator, a, actual_b, b, actual_b)) + ans = 1; + } + + free(a); free(b); free(aw); free(bw); + if (ans) Py_RETURN_TRUE; + Py_RETURN_FALSE; +} // }}} + static PyObject* icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs); @@ -296,6 +334,10 @@ static PyMethodDef icu_Collator_methods[] = { "clone() -> returns a clone of this collator." }, + {"startswith", (PyCFunction)icu_Collator_startswith, METH_VARARGS, + "startswith(a, b) -> returns True iff a startswith b, following the current collation rules." + }, + {NULL} /* Sentinel */ }; diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 50e7274b30..0dab76cd30 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -12,7 +12,7 @@ from functools import partial from calibre.constants import plugins from calibre.utils.config_base import tweaks -_icu = _collator = _primary_collator = None +_icu = _collator = _primary_collator = _secondary_collator = None _locale = None _none = u'' @@ -55,6 +55,13 @@ def primary_collator(): _primary_collator.strength = _icu.UCOL_PRIMARY return _primary_collator +def secondary_collator(): + global _secondary_collator + if _secondary_collator is None: + _secondary_collator = _collator.clone() + _secondary_collator.strength = _icu.UCOL_SECONDARY + return _secondary_collator + def py_sort_key(obj): if not obj: return _none @@ -63,7 +70,10 @@ def py_sort_key(obj): def icu_sort_key(collator, obj): if not obj: return _none2 - return collator.sort_key(lower(obj)) + try: + return _secondary_collator.sort_key(obj) + except AttributeError: + return secondary_collator().sort_key(obj) def py_find(pattern, source): pos = source.find(pattern) @@ -77,6 +87,12 @@ def icu_find(collator, pattern, source): except TypeError: return collator.find(unicode(pattern), unicode(source)) +def icu_startswith(collator, a, b): + try: + return collator.startswith(a, b) + except TypeError: + return collator.startswith(unicode(a), unicode(b)) + def py_case_sensitive_sort_key(obj): if not obj: return _none @@ -180,6 +196,15 @@ def primary_sort_key(val): except AttributeError: return primary_collator().sort_key(val) +def primary_startswith(a, b): + if _icu_not_ok: + from calibre.utils.filenames import ascii_text + return ascii_text(a).lower().startswith(ascii_text(b).lower()) + try: + return icu_startswith(_primary_collator, a, b) + except AttributeError: + return icu_startswith(primary_collator(), a, b) + ################################################################################ def test(): # {{{ @@ -299,8 +324,8 @@ pêché''' print print '\nTesting primary collation' - for k, v in {u'pèché': u'peche', u'flüße':u'flusse', - u'Štepánek':u'Štepanek'}.iteritems(): + for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', + u'Štepánek':u'ŠtepaneK'}.iteritems(): if primary_strcmp(k, v) != 0: prints('primary_strcmp() failed with %s != %s'%(k, v)) return @@ -309,10 +334,12 @@ pêché''' return global _primary_collator + orig = _primary_collator _primary_collator = _icu.Collator('es') if primary_strcmp(u'peña', u'pena') == 0: print 'Primary collation in Spanish locale failed' return + _primary_collator = orig print '\nTesting contractions' c = _icu.Collator('cs') @@ -322,6 +349,13 @@ pêché''' print 'Contractions for the Czech language failed' return + print '\nTesting startswith' + p = primary_startswith + if (not p('asd', 'asd') or not p('asd', 'A') or + not p('x', '')): + print 'startswith() failed' + return + # }}} if __name__ == '__main__':