Replace use of deprecated ICU unorm.h API

This commit is contained in:
Kovid Goyal 2018-05-01 09:48:44 +05:30
parent ff952ad851
commit 88e9494e6b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 92 additions and 67 deletions

View File

@ -1000,22 +1000,46 @@ end:
} // }}} } // }}}
// normalize {{{ // normalize {{{
typedef enum { NFC, NFKC, NFD, NFKD } NORM_MODES;
static PyObject * static PyObject *
icu_normalize(PyObject *self, PyObject *args) { icu_normalize(PyObject *self, PyObject *args) {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
int32_t sz = 0, mode = UNORM_DEFAULT, cap = 0, rsz = 0; int32_t sz = 0, cap = 0, rsz = 0;
NORM_MODES mode;
UChar *dest = NULL, *source = NULL; UChar *dest = NULL, *source = NULL;
PyObject *ret = NULL, *src = NULL; PyObject *ret = NULL, *src = NULL;
if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL; if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL;
const UNormalizer2 *n = NULL;
switch (mode) {
case NFC:
n = unorm2_getNFCInstance(&status);
break;
case NFKC:
n = unorm2_getNFKCInstance(&status);
break;
case NFD:
n = unorm2_getNFDInstance(&status);
break;
case NFKD:
n = unorm2_getNFKDInstance(&status);
break;
}
if (U_FAILURE(status)) {
PyErr_SetString(PyExc_ValueError, u_errorName(status));
goto end;
}
source = python_to_icu(src, &sz, 1); source = python_to_icu(src, &sz, 1);
if (source == NULL) goto end; if (source == NULL) goto end;
cap = 2 * sz; cap = 2 * sz;
dest = (UChar*) calloc(cap, sizeof(UChar)); dest = (UChar*) calloc(cap, sizeof(UChar));
if (dest == NULL) { PyErr_NoMemory(); goto end; } if (dest == NULL) { PyErr_NoMemory(); goto end; }
while (1) { while (1) {
rsz = unorm_normalize(source, sz, (UNormalizationMode)mode, 0, dest, cap, &status); rsz = unorm2_normalize(n, source, sz, dest, cap, &status);
if (status == U_BUFFER_OVERFLOW_ERROR) { if (status == U_BUFFER_OVERFLOW_ERROR) {
cap *= 2; cap *= 2;
dest = (UChar*) realloc(dest, cap*sizeof(UChar)); dest = (UChar*) realloc(dest, cap*sizeof(UChar));
@ -1232,13 +1256,10 @@ initicu(void)
ADDUCONST(UCOL_LOWER_FIRST); ADDUCONST(UCOL_LOWER_FIRST);
ADDUCONST(UCOL_UPPER_FIRST); ADDUCONST(UCOL_UPPER_FIRST);
ADDUCONST(UNORM_NONE); ADDUCONST(NFD);
ADDUCONST(UNORM_NFD); ADDUCONST(NFKD);
ADDUCONST(UNORM_NFKD); ADDUCONST(NFC);
ADDUCONST(UNORM_NFC); ADDUCONST(NFKC);
ADDUCONST(UNORM_DEFAULT);
ADDUCONST(UNORM_NFKC);
ADDUCONST(UNORM_FCD);
ADDUCONST(UPPER_CASE); ADDUCONST(UPPER_CASE);
ADDUCONST(LOWER_CASE); ADDUCONST(LOWER_CASE);

View File

@ -28,7 +28,7 @@ if _icu is None:
raise RuntimeError('Failed to load icu with error: %s' % err) raise RuntimeError('Failed to load icu with error: %s' % err)
del err del err
icu_unicode_version = getattr(_icu, 'unicode_version', None) icu_unicode_version = getattr(_icu, 'unicode_version', None)
_nmodes = {m:getattr(_icu, 'UNORM_'+m, None) for m in ('NFC', 'NFD', 'NFKC', 'NFKD', 'NONE', 'DEFAULT', 'FCD')} _nmodes = {m:getattr(_icu, m) for m in ('NFC', 'NFD', 'NFKC', 'NFKD')}
# Ensure that the python internal filesystem and default encodings are not ASCII # Ensure that the python internal filesystem and default encodings are not ASCII
@ -38,6 +38,8 @@ def is_ascii(name):
return codecs.lookup(name).name == b'ascii' return codecs.lookup(name).name == b'ascii'
except (TypeError, LookupError): except (TypeError, LookupError):
return True return True
try: try:
if is_ascii(sys.getdefaultencoding()): if is_ascii(sys.getdefaultencoding()):
_icu.set_default_encoding(b'utf-8') _icu.set_default_encoding(b'utf-8')
@ -119,6 +121,7 @@ def case_sensitive_collator():
# function implementations based on different collators, to allow lazy loading # function implementations based on different collators, to allow lazy loading
# of collators, with maximum runtime performance # of collators, with maximum runtime performance
_sort_key_template = ''' _sort_key_template = '''
def {name}(obj): def {name}(obj):
try: try:
@ -222,6 +225,7 @@ def capitalize(x):
except (IndexError, TypeError, AttributeError): except (IndexError, TypeError, AttributeError):
return x return x
try: try:
swapcase = _icu.swap_case swapcase = _icu.swap_case
except AttributeError: # For people running from source except AttributeError: # For people running from source
@ -300,6 +304,7 @@ def partition_by_first_letter(items, reverse=False, key=lambda x:x):
ans[last_c] = [item] ans[last_c] = [item]
return ans return ans
# Return the number of unicode codepoints in a string # Return the number of unicode codepoints in a string
string_length = _icu.string_length if is_narrow_build else len string_length = _icu.string_length if is_narrow_build else len
@ -311,4 +316,3 @@ utf16_length = len if is_narrow_build else _icu.utf16_length
if __name__ == '__main__': if __name__ == '__main__':
from calibre.utils.icu_test import run from calibre.utils.icu_test import run
run(verbosity=4) run(verbosity=4)

View File

@ -19,7 +19,7 @@
#include <unicode/ustring.h> #include <unicode/ustring.h>
#include <unicode/usearch.h> #include <unicode/usearch.h>
#include <unicode/utrans.h> #include <unicode/utrans.h>
#include <unicode/unorm.h> #include <unicode/unorm2.h>
#include <unicode/ubrk.h> #include <unicode/ubrk.h>
#if PY_VERSION_HEX >= 0x03030000 #if PY_VERSION_HEX >= 0x03030000