diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 79a888f272..51d9ac25ba 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -131,7 +131,7 @@ icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { if (ans == NULL) return PyErr_NoMemory(); return ans; -} +} // }}} // Collator.strcmp {{{ static PyObject * @@ -162,7 +162,8 @@ icu_Collator_strcmp(icu_Collator *self, PyObject *args, PyObject *kwargs) { free(a); free(b); return Py_BuildValue("i", res); -} +} // }}} + static PyMethodDef icu_Collator_methods[] = { @@ -242,7 +243,156 @@ static PyTypeObject icu_CollatorType = { // {{{ // Module initialization {{{ +// upper {{{ +static PyObject * +icu_upper(PyObject *self, PyObject *args) { + char *input, *ans, *buf3 = NULL; + const char *loc; + size_t sz; + UChar *buf, *buf2; + PyObject *ret; + UErrorCode status = U_ZERO_ERROR; + + + if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; + + sz = strlen(input); + + buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); + buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); + + + if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); + + u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); + u_strToUpper(buf2, sz*8, buf, -1, loc, &status); + + ans = input; + sz = u_strlen(buf2); + free(buf); + + if (U_SUCCESS(status) && sz > 0) { + buf3 = (char*)calloc(sz*5+1, sizeof(char)); + if (buf3 == NULL) return PyErr_NoMemory(); + u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); + if (U_SUCCESS(status)) ans = buf3; + } + + ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); + if (ret == NULL) return PyErr_NoMemory(); + + free(buf2); + if (buf3 != NULL) free(buf3); + PyMem_Free(input); + + return ret; +} + +// lower {{{ +static PyObject * +icu_lower(PyObject *self, PyObject *args) { + char *input, *ans, *buf3 = NULL; + const char *loc; + size_t sz; + UChar *buf, *buf2; + PyObject *ret; + UErrorCode status = U_ZERO_ERROR; + + + if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; + + sz = strlen(input); + + buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); + buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); + + + if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); + + u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); + u_strToLower(buf2, sz*8, buf, -1, loc, &status); + + ans = input; + sz = u_strlen(buf2); + free(buf); + + if (U_SUCCESS(status) && sz > 0) { + buf3 = (char*)calloc(sz*5+1, sizeof(char)); + if (buf3 == NULL) return PyErr_NoMemory(); + u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); + if (U_SUCCESS(status)) ans = buf3; + } + + ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); + if (ret == NULL) return PyErr_NoMemory(); + + free(buf2); + if (buf3 != NULL) free(buf3); + PyMem_Free(input); + + return ret; +} + +// title {{{ +static PyObject * +icu_title(PyObject *self, PyObject *args) { + char *input, *ans, *buf3 = NULL; + const char *loc; + size_t sz; + UChar *buf, *buf2; + PyObject *ret; + UErrorCode status = U_ZERO_ERROR; + + + if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; + + sz = strlen(input); + + buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); + buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); + + + if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); + + u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); + u_strToTitle(buf2, sz*8, buf, -1, NULL, loc, &status); + + ans = input; + sz = u_strlen(buf2); + free(buf); + + if (U_SUCCESS(status) && sz > 0) { + buf3 = (char*)calloc(sz*5+1, sizeof(char)); + if (buf3 == NULL) return PyErr_NoMemory(); + u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); + if (U_SUCCESS(status)) ans = buf3; + } + + ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); + if (ret == NULL) return PyErr_NoMemory(); + + free(buf2); + if (buf3 != NULL) free(buf3); + PyMem_Free(input); + + return ret; +} + + + static PyMethodDef icu_methods[] = { + {"upper", icu_upper, METH_VARARGS, + "upper(locale, unicode object) -> upper cased unicode object using locale rules." + }, + + {"lower", icu_lower, METH_VARARGS, + "lower(locale, unicode object) -> lower cased unicode object using locale rules." + }, + + {"title", icu_title, METH_VARARGS, + "title(locale, unicode object) -> Title cased unicode object using locale rules." + }, + {NULL} /* Sentinel */ }; diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 5251380973..398d2fe75c 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -10,10 +10,18 @@ from functools import partial from calibre.constants import plugins _icu = _collator = None +_locale = None _none = u'' _none2 = b'' +def get_locale(): + global _locale + if _locale is None: + from calibre.utils.localization import get_lang + _locale = get_lang() + return _locale + def load_icu(): global _icu if _icu is None: @@ -28,11 +36,10 @@ def load_icu(): def load_collator(): global _collator - from calibre.utils.localization import get_lang if _collator is None: icu = load_icu() if icu is not None: - _collator = icu.Collator(get_lang()) + _collator = icu.Collator(get_locale()) return _collator @@ -76,6 +83,13 @@ case_sensitive_sort_key = py_case_sensitive_sort_key if _icu_not_ok else \ icu_case_sensitive_sort_key case_sensitive_strcmp = cmp if _icu_not_ok else icu_case_sensitive_strcmp +upper = (lambda s: s.upper()) if _icu_not_ok else \ + partial(_icu.upper, get_locale()) +lower = (lambda s: s.lower()) if _icu_not_ok else \ + partial(_icu.lower, get_locale()) +title_case = (lambda s: s.title()) if _icu_not_ok else \ + partial(_icu.title, get_locale()) + def test(): # {{{ # Data {{{ @@ -188,5 +202,13 @@ pĂȘchĂ©''' print 'French failed (note that French fails with icu < 4.6 i.e. on windows and OS X)' return test_strcmp(german + french) + + print '\nTesting case transforms in current locale' + for x in ('a', 'Alice\'s code'): + print 'Upper:', x, '->', 'py:', x.upper().encode('utf-8'), 'icu:', upper(x).encode('utf-8') + print 'Lower:', x, '->', 'py:', x.lower().encode('utf-8'), 'icu:', lower(x).encode('utf-8') + print 'Title:', x, '->', 'py:', x.title().encode('utf-8'), 'icu:', title_case(x).encode('utf-8') + print + # }}}