mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Replace use of deprecated ICU unorm.h API
This commit is contained in:
parent
ff952ad851
commit
88e9494e6b
@ -50,7 +50,7 @@ icu_Collator_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "s", &loc)) return NULL;
|
if (!PyArg_ParseTuple(args, "s", &loc)) return NULL;
|
||||||
collator = ucol_open(loc, &status);
|
collator = ucol_open(loc, &status);
|
||||||
if (collator == NULL || U_FAILURE(status)) {
|
if (collator == NULL || U_FAILURE(status)) {
|
||||||
PyErr_SetString(PyExc_Exception, "Failed to create collator.");
|
PyErr_SetString(PyExc_Exception, "Failed to create collator.");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -144,7 +144,7 @@ icu_Collator_sort_key(icu_Collator *self, PyObject *input) {
|
|||||||
UChar *buf = NULL;
|
UChar *buf = NULL;
|
||||||
uint8_t *buf2 = NULL;
|
uint8_t *buf2 = NULL;
|
||||||
PyObject *ans = NULL;
|
PyObject *ans = NULL;
|
||||||
|
|
||||||
buf = python_to_icu(input, &sz, 1);
|
buf = python_to_icu(input, &sz, 1);
|
||||||
if (buf == NULL) return NULL;
|
if (buf == NULL) return NULL;
|
||||||
|
|
||||||
@ -173,7 +173,7 @@ icu_Collator_strcmp(icu_Collator *self, PyObject *args) {
|
|||||||
int32_t asz = 0, bsz = 0;
|
int32_t asz = 0, bsz = 0;
|
||||||
UChar *a = NULL, *b = NULL;
|
UChar *a = NULL, *b = NULL;
|
||||||
UCollationResult res = UCOL_EQUAL;
|
UCollationResult res = UCOL_EQUAL;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;
|
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;
|
||||||
|
|
||||||
a = python_to_icu(a_, &asz, 1);
|
a = python_to_icu(a_, &asz, 1);
|
||||||
@ -182,7 +182,7 @@ icu_Collator_strcmp(icu_Collator *self, PyObject *args) {
|
|||||||
if (b == NULL) goto end;
|
if (b == NULL) goto end;
|
||||||
res = ucol_strcoll(self->collator, a, asz, b, bsz);
|
res = ucol_strcoll(self->collator, a, asz, b, bsz);
|
||||||
end:
|
end:
|
||||||
if (a != NULL) free(a);
|
if (a != NULL) free(a);
|
||||||
if (b != NULL) free(b);
|
if (b != NULL) free(b);
|
||||||
|
|
||||||
return (PyErr_Occurred()) ? NULL : Py_BuildValue("i", res);
|
return (PyErr_Occurred()) ? NULL : Py_BuildValue("i", res);
|
||||||
@ -191,7 +191,7 @@ end:
|
|||||||
// Collator.find {{{
|
// Collator.find {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_Collator_find(icu_Collator *self, PyObject *args) {
|
icu_Collator_find(icu_Collator *self, PyObject *args) {
|
||||||
#if PY_VERSION_HEX >= 0x03030000
|
#if PY_VERSION_HEX >= 0x03030000
|
||||||
#error Not implemented for python >= 3.3
|
#error Not implemented for python >= 3.3
|
||||||
#endif
|
#endif
|
||||||
PyObject *a_ = NULL, *b_ = NULL;
|
PyObject *a_ = NULL, *b_ = NULL;
|
||||||
@ -199,7 +199,7 @@ icu_Collator_find(icu_Collator *self, PyObject *args) {
|
|||||||
int32_t asz = 0, bsz = 0, pos = -1, length = -1;
|
int32_t asz = 0, bsz = 0, pos = -1, length = -1;
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
UStringSearch *search = NULL;
|
UStringSearch *search = NULL;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;
|
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;
|
||||||
|
|
||||||
a = python_to_icu(a_, &asz, 1);
|
a = python_to_icu(a_, &asz, 1);
|
||||||
@ -238,7 +238,7 @@ icu_Collator_contains(icu_Collator *self, PyObject *args) {
|
|||||||
uint8_t found = 0;
|
uint8_t found = 0;
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
UStringSearch *search = NULL;
|
UStringSearch *search = NULL;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;
|
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;
|
||||||
|
|
||||||
a = python_to_icu(a_, &asz, 1);
|
a = python_to_icu(a_, &asz, 1);
|
||||||
@ -276,7 +276,7 @@ icu_Collator_contractions(icu_Collator *self, PyObject *args) {
|
|||||||
if (self->contractions == NULL) return PyErr_NoMemory();
|
if (self->contractions == NULL) return PyErr_NoMemory();
|
||||||
self->contractions = ucol_getTailoredSet(self->collator, &status);
|
self->contractions = ucol_getTailoredSet(self->collator, &status);
|
||||||
}
|
}
|
||||||
status = U_ZERO_ERROR;
|
status = U_ZERO_ERROR;
|
||||||
count = uset_getItemCount(self->contractions);
|
count = uset_getItemCount(self->contractions);
|
||||||
|
|
||||||
str = (UChar*)calloc(100, sizeof(UChar));
|
str = (UChar*)calloc(100, sizeof(UChar));
|
||||||
@ -299,7 +299,7 @@ icu_Collator_contractions(icu_Collator *self, PyObject *args) {
|
|||||||
}
|
}
|
||||||
end:
|
end:
|
||||||
if (str != NULL) free(str);
|
if (str != NULL) free(str);
|
||||||
|
|
||||||
return ans;
|
return ans;
|
||||||
} // }}}
|
} // }}}
|
||||||
|
|
||||||
@ -310,7 +310,7 @@ icu_Collator_startswith(icu_Collator *self, PyObject *args) {
|
|||||||
int32_t asz = 0, bsz = 0;
|
int32_t asz = 0, bsz = 0;
|
||||||
UChar *a = NULL, *b = NULL;
|
UChar *a = NULL, *b = NULL;
|
||||||
uint8_t ans = 0;
|
uint8_t ans = 0;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;
|
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;
|
||||||
|
|
||||||
a = python_to_icu(a_, &asz, 1);
|
a = python_to_icu(a_, &asz, 1);
|
||||||
@ -320,7 +320,7 @@ icu_Collator_startswith(icu_Collator *self, PyObject *args) {
|
|||||||
|
|
||||||
if (asz < bsz) goto end;
|
if (asz < bsz) goto end;
|
||||||
if (bsz == 0) { ans = 1; goto end; }
|
if (bsz == 0) { ans = 1; goto end; }
|
||||||
|
|
||||||
ans = ucol_equal(self->collator, a, bsz, b, bsz);
|
ans = ucol_equal(self->collator, a, bsz, b, bsz);
|
||||||
|
|
||||||
end:
|
end:
|
||||||
@ -340,7 +340,7 @@ icu_Collator_collation_order(icu_Collator *self, PyObject *a_) {
|
|||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
UCollationElements *iter = NULL;
|
UCollationElements *iter = NULL;
|
||||||
int order = 0, len = -1;
|
int order = 0, len = -1;
|
||||||
|
|
||||||
a = python_to_icu(a_, &asz, 1);
|
a = python_to_icu(a_, &asz, 1);
|
||||||
if (a == NULL) goto end;
|
if (a == NULL) goto end;
|
||||||
|
|
||||||
@ -420,17 +420,17 @@ static PyMethodDef icu_Collator_methods[] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static PyGetSetDef icu_Collator_getsetters[] = {
|
static PyGetSetDef icu_Collator_getsetters[] = {
|
||||||
{(char *)"actual_locale",
|
{(char *)"actual_locale",
|
||||||
(getter)icu_Collator_actual_locale, NULL,
|
(getter)icu_Collator_actual_locale, NULL,
|
||||||
(char *)"Actual locale used by this collator.",
|
(char *)"Actual locale used by this collator.",
|
||||||
NULL},
|
NULL},
|
||||||
|
|
||||||
{(char *)"capsule",
|
{(char *)"capsule",
|
||||||
(getter)icu_Collator_capsule, NULL,
|
(getter)icu_Collator_capsule, NULL,
|
||||||
(char *)"A capsule enclosing the pointer to the ICU collator struct",
|
(char *)"A capsule enclosing the pointer to the ICU collator struct",
|
||||||
NULL},
|
NULL},
|
||||||
|
|
||||||
{(char *)"display_name",
|
{(char *)"display_name",
|
||||||
(getter)icu_Collator_display_name, NULL,
|
(getter)icu_Collator_display_name, NULL,
|
||||||
(char *)"Display name of this collator in English. The name reflects the actual data source used.",
|
(char *)"Display name of this collator in English. The name reflects the actual data source used.",
|
||||||
NULL},
|
NULL},
|
||||||
@ -557,7 +557,7 @@ icu_BreakIterator_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "is", &break_iterator_type, &locale)) return NULL;
|
if (!PyArg_ParseTuple(args, "is", &break_iterator_type, &locale)) return NULL;
|
||||||
break_iterator = ubrk_open(break_iterator_type, locale, NULL, 0, &status);
|
break_iterator = ubrk_open(break_iterator_type, locale, NULL, 0, &status);
|
||||||
if (break_iterator == NULL || U_FAILURE(status)) {
|
if (break_iterator == NULL || U_FAILURE(status)) {
|
||||||
PyErr_SetString(PyExc_ValueError, u_errorName(status));
|
PyErr_SetString(PyExc_ValueError, u_errorName(status));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -577,7 +577,7 @@ icu_BreakIterator_set_text(icu_BreakIterator *self, PyObject *input) {
|
|||||||
int32_t sz = 0;
|
int32_t sz = 0;
|
||||||
UChar *buf = NULL;
|
UChar *buf = NULL;
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
|
||||||
buf = python_to_icu(input, &sz, 1);
|
buf = python_to_icu(input, &sz, 1);
|
||||||
if (buf == NULL) return NULL;
|
if (buf == NULL) return NULL;
|
||||||
ubrk_setText(self->break_iterator, buf, sz, &status);
|
ubrk_setText(self->break_iterator, buf, sz, &status);
|
||||||
@ -595,13 +595,13 @@ icu_BreakIterator_set_text(icu_BreakIterator *self, PyObject *input) {
|
|||||||
// BreakIterator.index {{{
|
// BreakIterator.index {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_BreakIterator_index(icu_BreakIterator *self, PyObject *token) {
|
icu_BreakIterator_index(icu_BreakIterator *self, PyObject *token) {
|
||||||
#if PY_VERSION_HEX >= 0x03030000
|
#if PY_VERSION_HEX >= 0x03030000
|
||||||
#error Not implemented for python >= 3.3
|
#error Not implemented for python >= 3.3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
UChar *buf = NULL, *needle = NULL;
|
UChar *buf = NULL, *needle = NULL;
|
||||||
int32_t word_start = 0, p = 0, sz = 0, ans = -1, leading_hyphen = 0, trailing_hyphen = 0;
|
int32_t word_start = 0, p = 0, sz = 0, ans = -1, leading_hyphen = 0, trailing_hyphen = 0;
|
||||||
|
|
||||||
buf = python_to_icu(token, &sz, 1);
|
buf = python_to_icu(token, &sz, 1);
|
||||||
if (buf == NULL) return NULL;
|
if (buf == NULL) return NULL;
|
||||||
if (sz < 1) goto end;
|
if (sz < 1) goto end;
|
||||||
@ -613,7 +613,7 @@ icu_BreakIterator_index(icu_BreakIterator *self, PyObject *token) {
|
|||||||
p = ubrk_first(self->break_iterator);
|
p = ubrk_first(self->break_iterator);
|
||||||
while (p != UBRK_DONE) {
|
while (p != UBRK_DONE) {
|
||||||
word_start = p; p = ubrk_next(self->break_iterator);
|
word_start = p; p = ubrk_next(self->break_iterator);
|
||||||
if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)
|
if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)
|
||||||
continue; // We are not at the start of a word
|
continue; // We are not at the start of a word
|
||||||
|
|
||||||
if (self->text_len >= word_start + sz && memcmp(self->text + word_start, needle, sz * sizeof(UChar)) == 0) {
|
if (self->text_len >= word_start + sz && memcmp(self->text + word_start, needle, sz * sizeof(UChar)) == 0) {
|
||||||
@ -655,7 +655,7 @@ end:
|
|||||||
// BreakIterator.split2 {{{
|
// BreakIterator.split2 {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args) {
|
icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args) {
|
||||||
#if PY_VERSION_HEX >= 0x03030000
|
#if PY_VERSION_HEX >= 0x03030000
|
||||||
#error Not implemented for python >= 3.3
|
#error Not implemented for python >= 3.3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -663,14 +663,14 @@ icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args) {
|
|||||||
int is_hyphen_sep = 0, leading_hyphen = 0, trailing_hyphen = 0;
|
int is_hyphen_sep = 0, leading_hyphen = 0, trailing_hyphen = 0;
|
||||||
UChar sep = 0;
|
UChar sep = 0;
|
||||||
PyObject *ans = NULL, *temp = NULL, *t = NULL;
|
PyObject *ans = NULL, *temp = NULL, *t = NULL;
|
||||||
|
|
||||||
ans = PyList_New(0);
|
ans = PyList_New(0);
|
||||||
if (ans == NULL) return PyErr_NoMemory();
|
if (ans == NULL) return PyErr_NoMemory();
|
||||||
|
|
||||||
p = ubrk_first(self->break_iterator);
|
p = ubrk_first(self->break_iterator);
|
||||||
while (p != UBRK_DONE) {
|
while (p != UBRK_DONE) {
|
||||||
word_start = p; p = ubrk_next(self->break_iterator);
|
word_start = p; p = ubrk_next(self->break_iterator);
|
||||||
if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)
|
if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)
|
||||||
continue; // We are not at the start of a word
|
continue; // We are not at the start of a word
|
||||||
sz = (p == UBRK_DONE) ? self->text_len - word_start : p - word_start;
|
sz = (p == UBRK_DONE) ? self->text_len - word_start : p - word_start;
|
||||||
if (sz > 0) {
|
if (sz > 0) {
|
||||||
@ -703,12 +703,12 @@ icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args) {
|
|||||||
} else {
|
} else {
|
||||||
sz += leading_hyphen + trailing_hyphen;
|
sz += leading_hyphen + trailing_hyphen;
|
||||||
last_sz = sz;
|
last_sz = sz;
|
||||||
temp = Py_BuildValue("ll", (long)(word_start - leading_hyphen), (long)sz);
|
temp = Py_BuildValue("ll", (long)(word_start - leading_hyphen), (long)sz);
|
||||||
if (temp == NULL) {
|
if (temp == NULL) {
|
||||||
Py_DECREF(ans); ans = NULL; break;
|
Py_DECREF(ans); ans = NULL; break;
|
||||||
}
|
}
|
||||||
if (PyList_Append(ans, temp) != 0) {
|
if (PyList_Append(ans, temp) != 0) {
|
||||||
Py_DECREF(temp); Py_DECREF(ans); ans = NULL; break;
|
Py_DECREF(temp); Py_DECREF(ans); ans = NULL; break;
|
||||||
}
|
}
|
||||||
Py_DECREF(temp);
|
Py_DECREF(temp);
|
||||||
}
|
}
|
||||||
@ -912,18 +912,18 @@ icu_get_available_transliterators(PyObject *self, PyObject *args) {
|
|||||||
// character_name {{{
|
// character_name {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_character_name(PyObject *self, PyObject *args) {
|
icu_character_name(PyObject *self, PyObject *args) {
|
||||||
char name[512] = {0};
|
char name[512] = {0};
|
||||||
int32_t sz = 0, alias = 0;
|
int32_t sz = 0, alias = 0;
|
||||||
UChar *buf;
|
UChar *buf;
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
PyObject *palias = NULL, *result = NULL, *input = NULL;
|
PyObject *palias = NULL, *result = NULL, *input = NULL;
|
||||||
UChar32 code = 0;
|
UChar32 code = 0;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "O|O", &input, &palias)) return NULL;
|
if (!PyArg_ParseTuple(args, "O|O", &input, &palias)) return NULL;
|
||||||
|
|
||||||
if (palias != NULL && PyObject_IsTrue(palias)) alias = 1;
|
if (palias != NULL && PyObject_IsTrue(palias)) alias = 1;
|
||||||
buf = python_to_icu(input, &sz, 1);
|
buf = python_to_icu(input, &sz, 1);
|
||||||
if (buf == NULL) goto end;
|
if (buf == NULL) goto end;
|
||||||
U16_GET(buf, 0, 0, sz, code);
|
U16_GET(buf, 0, 0, sz, code);
|
||||||
if (alias) {
|
if (alias) {
|
||||||
sz = u_charName(code, U_CHAR_NAME_ALIAS, name, 511, &status);
|
sz = u_charName(code, U_CHAR_NAME_ALIAS, name, 511, &status);
|
||||||
@ -941,16 +941,16 @@ end:
|
|||||||
// character_name_from_code {{{
|
// character_name_from_code {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_character_name_from_code(PyObject *self, PyObject *args) {
|
icu_character_name_from_code(PyObject *self, PyObject *args) {
|
||||||
char name[512] = {0};
|
char name[512] = {0};
|
||||||
int32_t sz, alias = 0;
|
int32_t sz, alias = 0;
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
PyObject *palias = NULL, *result = NULL;
|
PyObject *palias = NULL, *result = NULL;
|
||||||
UChar32 code = 0;
|
UChar32 code = 0;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "I|O", &code, &palias)) return NULL;
|
if (!PyArg_ParseTuple(args, "I|O", &code, &palias)) return NULL;
|
||||||
|
|
||||||
if (palias != NULL && PyObject_IsTrue(palias)) alias = 1;
|
if (palias != NULL && PyObject_IsTrue(palias)) alias = 1;
|
||||||
|
|
||||||
if (alias) {
|
if (alias) {
|
||||||
sz = u_charName(code, U_CHAR_NAME_ALIAS, name, 511, &status);
|
sz = u_charName(code, U_CHAR_NAME_ALIAS, name, 511, &status);
|
||||||
} else {
|
} else {
|
||||||
@ -969,7 +969,7 @@ icu_chr(PyObject *self, PyObject *args) {
|
|||||||
UChar32 code = 0;
|
UChar32 code = 0;
|
||||||
UChar buf[5] = {0};
|
UChar buf[5] = {0};
|
||||||
int32_t sz = 0;
|
int32_t sz = 0;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "I", &code)) return NULL;
|
if (!PyArg_ParseTuple(args, "I", &code)) return NULL;
|
||||||
|
|
||||||
u_strFromUTF32(buf, 4, &sz, &code, 1, &status);
|
u_strFromUTF32(buf, 4, &sz, &code, 1, &status);
|
||||||
@ -996,26 +996,50 @@ icu_ord_string(PyObject *self, PyObject *input) {
|
|||||||
end:
|
end:
|
||||||
if (input_buf != NULL) free(input_buf);
|
if (input_buf != NULL) free(input_buf);
|
||||||
return ans;
|
return ans;
|
||||||
|
|
||||||
} // }}}
|
} // }}}
|
||||||
|
|
||||||
// normalize {{{
|
// normalize {{{
|
||||||
|
typedef enum { NFC, NFKC, NFD, NFKD } NORM_MODES;
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_normalize(PyObject *self, PyObject *args) {
|
icu_normalize(PyObject *self, PyObject *args) {
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
int32_t sz = 0, mode = UNORM_DEFAULT, cap = 0, rsz = 0;
|
int32_t sz = 0, cap = 0, rsz = 0;
|
||||||
|
NORM_MODES mode;
|
||||||
UChar *dest = NULL, *source = NULL;
|
UChar *dest = NULL, *source = NULL;
|
||||||
PyObject *ret = NULL, *src = NULL;
|
PyObject *ret = NULL, *src = NULL;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL;
|
if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL;
|
||||||
|
const UNormalizer2 *n = NULL;
|
||||||
|
switch (mode) {
|
||||||
|
case NFC:
|
||||||
|
n = unorm2_getNFCInstance(&status);
|
||||||
|
break;
|
||||||
|
case NFKC:
|
||||||
|
n = unorm2_getNFKCInstance(&status);
|
||||||
|
break;
|
||||||
|
case NFD:
|
||||||
|
n = unorm2_getNFDInstance(&status);
|
||||||
|
break;
|
||||||
|
case NFKD:
|
||||||
|
n = unorm2_getNFKDInstance(&status);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
PyErr_SetString(PyExc_ValueError, u_errorName(status));
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
|
||||||
source = python_to_icu(src, &sz, 1);
|
source = python_to_icu(src, &sz, 1);
|
||||||
if (source == NULL) goto end;
|
if (source == NULL) goto end;
|
||||||
cap = 2 * sz;
|
cap = 2 * sz;
|
||||||
dest = (UChar*) calloc(cap, sizeof(UChar));
|
dest = (UChar*) calloc(cap, sizeof(UChar));
|
||||||
if (dest == NULL) { PyErr_NoMemory(); goto end; }
|
if (dest == NULL) { PyErr_NoMemory(); goto end; }
|
||||||
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
rsz = unorm_normalize(source, sz, (UNormalizationMode)mode, 0, dest, cap, &status);
|
rsz = unorm2_normalize(n, source, sz, dest, cap, &status);
|
||||||
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
||||||
cap *= 2;
|
cap *= 2;
|
||||||
dest = (UChar*) realloc(dest, cap*sizeof(UChar));
|
dest = (UChar*) realloc(dest, cap*sizeof(UChar));
|
||||||
@ -1029,7 +1053,7 @@ icu_normalize(PyObject *self, PyObject *args) {
|
|||||||
PyErr_SetString(PyExc_ValueError, u_errorName(status));
|
PyErr_SetString(PyExc_ValueError, u_errorName(status));
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = icu_to_python(dest, rsz);
|
ret = icu_to_python(dest, rsz);
|
||||||
|
|
||||||
end:
|
end:
|
||||||
@ -1044,7 +1068,7 @@ icu_roundtrip(PyObject *self, PyObject *src) {
|
|||||||
int32_t sz = 0;
|
int32_t sz = 0;
|
||||||
UChar *icu = NULL;
|
UChar *icu = NULL;
|
||||||
PyObject *ret = NULL;
|
PyObject *ret = NULL;
|
||||||
|
|
||||||
icu = python_to_icu(src, &sz, 1);
|
icu = python_to_icu(src, &sz, 1);
|
||||||
if (icu != NULL) {
|
if (icu != NULL) {
|
||||||
ret = icu_to_python(icu, sz);
|
ret = icu_to_python(icu, sz);
|
||||||
@ -1071,7 +1095,7 @@ icu_break_iterator_locales(PyObject *self, PyObject *args) {
|
|||||||
PyTuple_SET_ITEM(ret, i, t);
|
PyTuple_SET_ITEM(ret, i, t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
} // }}}
|
} // }}}
|
||||||
|
|
||||||
@ -1080,7 +1104,7 @@ static PyObject *
|
|||||||
icu_string_length(PyObject *self, PyObject *src) {
|
icu_string_length(PyObject *self, PyObject *src) {
|
||||||
int32_t sz = 0;
|
int32_t sz = 0;
|
||||||
UChar *icu = NULL;
|
UChar *icu = NULL;
|
||||||
|
|
||||||
icu = python_to_icu(src, &sz, 1);
|
icu = python_to_icu(src, &sz, 1);
|
||||||
if (icu == NULL) return NULL;
|
if (icu == NULL) return NULL;
|
||||||
sz = u_countChar32(icu, sz);
|
sz = u_countChar32(icu, sz);
|
||||||
@ -1091,7 +1115,7 @@ icu_string_length(PyObject *self, PyObject *src) {
|
|||||||
// utf16_length {{{
|
// utf16_length {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_utf16_length(PyObject *self, PyObject *src) {
|
icu_utf16_length(PyObject *self, PyObject *src) {
|
||||||
#if PY_VERSION_HEX >= 0x03030000
|
#if PY_VERSION_HEX >= 0x03030000
|
||||||
#error Not implemented for python >= 3.3
|
#error Not implemented for python >= 3.3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1100,7 +1124,7 @@ icu_utf16_length(PyObject *self, PyObject *src) {
|
|||||||
int32_t i = 0, t = 0;
|
int32_t i = 0, t = 0;
|
||||||
Py_UNICODE *data = NULL;
|
Py_UNICODE *data = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!PyUnicode_Check(src)) { PyErr_SetString(PyExc_TypeError, "Must be a unicode object"); return NULL; }
|
if (!PyUnicode_Check(src)) { PyErr_SetString(PyExc_TypeError, "Must be a unicode object"); return NULL; }
|
||||||
sz = (int32_t)PyUnicode_GET_SIZE(src);
|
sz = (int32_t)PyUnicode_GET_SIZE(src);
|
||||||
#ifdef Py_UNICODE_WIDE
|
#ifdef Py_UNICODE_WIDE
|
||||||
@ -1135,39 +1159,39 @@ static PyMethodDef icu_methods[] = {
|
|||||||
"get_available_transliterators() -> Return list of available transliterators. This list is rather limited on OS X."
|
"get_available_transliterators() -> Return list of available transliterators. This list is rather limited on OS X."
|
||||||
},
|
},
|
||||||
|
|
||||||
{"character_name", icu_character_name, METH_VARARGS,
|
{"character_name", icu_character_name, METH_VARARGS,
|
||||||
"character_name(char, alias=False) -> Return name for the first character in char, which must be a unicode string."
|
"character_name(char, alias=False) -> Return name for the first character in char, which must be a unicode string."
|
||||||
},
|
},
|
||||||
|
|
||||||
{"character_name_from_code", icu_character_name_from_code, METH_VARARGS,
|
{"character_name_from_code", icu_character_name_from_code, METH_VARARGS,
|
||||||
"character_name_from_code(code, alias=False) -> Return the name for the specified unicode code point"
|
"character_name_from_code(code, alias=False) -> Return the name for the specified unicode code point"
|
||||||
},
|
},
|
||||||
|
|
||||||
{"chr", icu_chr, METH_VARARGS,
|
{"chr", icu_chr, METH_VARARGS,
|
||||||
"chr(code) -> Return a python unicode string corresponding to the specified character code. The string can have length 1 or 2 (for non BMP codes on narrow python builds)."
|
"chr(code) -> Return a python unicode string corresponding to the specified character code. The string can have length 1 or 2 (for non BMP codes on narrow python builds)."
|
||||||
},
|
},
|
||||||
|
|
||||||
{"ord_string", icu_ord_string, METH_O,
|
{"ord_string", icu_ord_string, METH_O,
|
||||||
"ord_string(code) -> Convert a python unicode string to a tuple of unicode codepoints."
|
"ord_string(code) -> Convert a python unicode string to a tuple of unicode codepoints."
|
||||||
},
|
},
|
||||||
|
|
||||||
{"normalize", icu_normalize, METH_VARARGS,
|
{"normalize", icu_normalize, METH_VARARGS,
|
||||||
"normalize(mode, unicode_text) -> Return a python unicode string which is normalized in the specified mode."
|
"normalize(mode, unicode_text) -> Return a python unicode string which is normalized in the specified mode."
|
||||||
},
|
},
|
||||||
|
|
||||||
{"roundtrip", icu_roundtrip, METH_O,
|
{"roundtrip", icu_roundtrip, METH_O,
|
||||||
"roundtrip(string) -> Roundtrip a unicode object from python to ICU back to python (useful for testing)"
|
"roundtrip(string) -> Roundtrip a unicode object from python to ICU back to python (useful for testing)"
|
||||||
},
|
},
|
||||||
|
|
||||||
{"available_locales_for_break_iterator", icu_break_iterator_locales, METH_NOARGS,
|
{"available_locales_for_break_iterator", icu_break_iterator_locales, METH_NOARGS,
|
||||||
"available_locales_for_break_iterator() -> Return tuple of all available locales for the BreakIterator"
|
"available_locales_for_break_iterator() -> Return tuple of all available locales for the BreakIterator"
|
||||||
},
|
},
|
||||||
|
|
||||||
{"string_length", icu_string_length, METH_O,
|
{"string_length", icu_string_length, METH_O,
|
||||||
"string_length(string) -> Return the length of a string (number of unicode code points in the string). Useful on narrow python builds where len() returns an incorrect answer if the string contains surrogate pairs."
|
"string_length(string) -> Return the length of a string (number of unicode code points in the string). Useful on narrow python builds where len() returns an incorrect answer if the string contains surrogate pairs."
|
||||||
},
|
},
|
||||||
|
|
||||||
{"utf16_length", icu_utf16_length, METH_O,
|
{"utf16_length", icu_utf16_length, METH_O,
|
||||||
"utf16_length(string) -> Return the length of a string (number of UTF-16 code points in the string). Useful on wide python builds where len() returns an incorrect answer if the string contains surrogate pairs."
|
"utf16_length(string) -> Return the length of a string (number of UTF-16 code points in the string). Useful on wide python builds where len() returns an incorrect answer if the string contains surrogate pairs."
|
||||||
},
|
},
|
||||||
|
|
||||||
@ -1177,7 +1201,7 @@ static PyMethodDef icu_methods[] = {
|
|||||||
#define ADDUCONST(x) PyModule_AddIntConstant(m, #x, x)
|
#define ADDUCONST(x) PyModule_AddIntConstant(m, #x, x)
|
||||||
|
|
||||||
CALIBRE_MODINIT_FUNC
|
CALIBRE_MODINIT_FUNC
|
||||||
initicu(void)
|
initicu(void)
|
||||||
{
|
{
|
||||||
PyObject* m;
|
PyObject* m;
|
||||||
UVersionInfo ver, uver;
|
UVersionInfo ver, uver;
|
||||||
@ -1232,13 +1256,10 @@ initicu(void)
|
|||||||
ADDUCONST(UCOL_LOWER_FIRST);
|
ADDUCONST(UCOL_LOWER_FIRST);
|
||||||
ADDUCONST(UCOL_UPPER_FIRST);
|
ADDUCONST(UCOL_UPPER_FIRST);
|
||||||
|
|
||||||
ADDUCONST(UNORM_NONE);
|
ADDUCONST(NFD);
|
||||||
ADDUCONST(UNORM_NFD);
|
ADDUCONST(NFKD);
|
||||||
ADDUCONST(UNORM_NFKD);
|
ADDUCONST(NFC);
|
||||||
ADDUCONST(UNORM_NFC);
|
ADDUCONST(NFKC);
|
||||||
ADDUCONST(UNORM_DEFAULT);
|
|
||||||
ADDUCONST(UNORM_NFKC);
|
|
||||||
ADDUCONST(UNORM_FCD);
|
|
||||||
|
|
||||||
ADDUCONST(UPPER_CASE);
|
ADDUCONST(UPPER_CASE);
|
||||||
ADDUCONST(LOWER_CASE);
|
ADDUCONST(LOWER_CASE);
|
||||||
|
@ -28,7 +28,7 @@ if _icu is None:
|
|||||||
raise RuntimeError('Failed to load icu with error: %s' % err)
|
raise RuntimeError('Failed to load icu with error: %s' % err)
|
||||||
del err
|
del err
|
||||||
icu_unicode_version = getattr(_icu, 'unicode_version', None)
|
icu_unicode_version = getattr(_icu, 'unicode_version', None)
|
||||||
_nmodes = {m:getattr(_icu, 'UNORM_'+m, None) for m in ('NFC', 'NFD', 'NFKC', 'NFKD', 'NONE', 'DEFAULT', 'FCD')}
|
_nmodes = {m:getattr(_icu, m) for m in ('NFC', 'NFD', 'NFKC', 'NFKD')}
|
||||||
|
|
||||||
# Ensure that the python internal filesystem and default encodings are not ASCII
|
# Ensure that the python internal filesystem and default encodings are not ASCII
|
||||||
|
|
||||||
@ -38,6 +38,8 @@ def is_ascii(name):
|
|||||||
return codecs.lookup(name).name == b'ascii'
|
return codecs.lookup(name).name == b'ascii'
|
||||||
except (TypeError, LookupError):
|
except (TypeError, LookupError):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if is_ascii(sys.getdefaultencoding()):
|
if is_ascii(sys.getdefaultencoding()):
|
||||||
_icu.set_default_encoding(b'utf-8')
|
_icu.set_default_encoding(b'utf-8')
|
||||||
@ -119,6 +121,7 @@ def case_sensitive_collator():
|
|||||||
# function implementations based on different collators, to allow lazy loading
|
# function implementations based on different collators, to allow lazy loading
|
||||||
# of collators, with maximum runtime performance
|
# of collators, with maximum runtime performance
|
||||||
|
|
||||||
|
|
||||||
_sort_key_template = '''
|
_sort_key_template = '''
|
||||||
def {name}(obj):
|
def {name}(obj):
|
||||||
try:
|
try:
|
||||||
@ -222,6 +225,7 @@ def capitalize(x):
|
|||||||
except (IndexError, TypeError, AttributeError):
|
except (IndexError, TypeError, AttributeError):
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
swapcase = _icu.swap_case
|
swapcase = _icu.swap_case
|
||||||
except AttributeError: # For people running from source
|
except AttributeError: # For people running from source
|
||||||
@ -300,6 +304,7 @@ def partition_by_first_letter(items, reverse=False, key=lambda x:x):
|
|||||||
ans[last_c] = [item]
|
ans[last_c] = [item]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
# Return the number of unicode codepoints in a string
|
# Return the number of unicode codepoints in a string
|
||||||
string_length = _icu.string_length if is_narrow_build else len
|
string_length = _icu.string_length if is_narrow_build else len
|
||||||
|
|
||||||
@ -311,4 +316,3 @@ utf16_length = len if is_narrow_build else _icu.utf16_length
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from calibre.utils.icu_test import run
|
from calibre.utils.icu_test import run
|
||||||
run(verbosity=4)
|
run(verbosity=4)
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
#include <unicode/ustring.h>
|
#include <unicode/ustring.h>
|
||||||
#include <unicode/usearch.h>
|
#include <unicode/usearch.h>
|
||||||
#include <unicode/utrans.h>
|
#include <unicode/utrans.h>
|
||||||
#include <unicode/unorm.h>
|
#include <unicode/unorm2.h>
|
||||||
#include <unicode/ubrk.h>
|
#include <unicode/ubrk.h>
|
||||||
|
|
||||||
#if PY_VERSION_HEX >= 0x03030000
|
#if PY_VERSION_HEX >= 0x03030000
|
||||||
|
Loading…
x
Reference in New Issue
Block a user