Replace use of deprecated ICU unorm.h API

2025-07-09 03:04:10 -04:00 · 2018-05-01 09:48:44 +05:30 · 2018-05-01 09:48:44 +05:30 · 88e9494e6b
commit 88e9494e6b
parent ff952ad851
3 changed files with 92 additions and 67 deletions
--- a/src/calibre/utils/icu.c
+++ b/src/calibre/utils/icu.c
@ -1000,22 +1000,46 @@ end:
 } // }}}

 // normalize {{{
+typedef enum { NFC, NFKC, NFD, NFKD } NORM_MODES;
+
 static PyObject *
 icu_normalize(PyObject *self, PyObject *args) {
    UErrorCode status = U_ZERO_ERROR;
-    int32_t sz = 0, mode = UNORM_DEFAULT, cap = 0, rsz = 0;
+    int32_t sz = 0, cap = 0, rsz = 0;
+    NORM_MODES mode;
    UChar *dest = NULL, *source = NULL;
    PyObject *ret = NULL, *src = NULL;

    if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL;
+    const UNormalizer2 *n = NULL;
+    switch (mode) {
+        case NFC:
+            n = unorm2_getNFCInstance(&status);
+            break;
+        case NFKC:
+            n = unorm2_getNFKCInstance(&status);
+            break;
+        case NFD:
+            n = unorm2_getNFDInstance(&status);
+            break;
+        case NFKD:
+            n = unorm2_getNFKDInstance(&status);
+            break;
+    }
+    if (U_FAILURE(status)) {
+        PyErr_SetString(PyExc_ValueError, u_errorName(status));
+        goto end;
+    }
+
    source = python_to_icu(src, &sz, 1);
    if (source == NULL) goto end;
    cap = 2 * sz;
    dest = (UChar*) calloc(cap, sizeof(UChar));
    if (dest == NULL) { PyErr_NoMemory(); goto end; }

+
    while (1) {
-        rsz = unorm_normalize(source, sz, (UNormalizationMode)mode, 0, dest, cap, &status);
+        rsz = unorm2_normalize(n, source, sz, dest, cap, &status);
        if (status == U_BUFFER_OVERFLOW_ERROR) {
            cap *= 2;
            dest = (UChar*) realloc(dest, cap*sizeof(UChar));
@ -1232,13 +1256,10 @@ initicu(void)
    ADDUCONST(UCOL_LOWER_FIRST);
    ADDUCONST(UCOL_UPPER_FIRST);

-    ADDUCONST(UNORM_NONE);
-    ADDUCONST(UNORM_NFD);
-    ADDUCONST(UNORM_NFKD);
-    ADDUCONST(UNORM_NFC);
-    ADDUCONST(UNORM_DEFAULT);
-    ADDUCONST(UNORM_NFKC);
-    ADDUCONST(UNORM_FCD);
+    ADDUCONST(NFD);
+    ADDUCONST(NFKD);
+    ADDUCONST(NFC);
+    ADDUCONST(NFKC);

    ADDUCONST(UPPER_CASE);
    ADDUCONST(LOWER_CASE);
--- a/src/calibre/utils/icu.py
+++ b/src/calibre/utils/icu.py
@ -28,7 +28,7 @@ if _icu is None:
    raise RuntimeError('Failed to load icu with error: %s' % err)
 del err
 icu_unicode_version = getattr(_icu, 'unicode_version', None)
-_nmodes = {m:getattr(_icu, 'UNORM_'+m, None) for m in ('NFC', 'NFD', 'NFKC', 'NFKD', 'NONE', 'DEFAULT', 'FCD')}
+_nmodes = {m:getattr(_icu, m) for m in ('NFC', 'NFD', 'NFKC', 'NFKD')}

 # Ensure that the python internal filesystem and default encodings are not ASCII

@ -38,6 +38,8 @@ def is_ascii(name):
        return codecs.lookup(name).name == b'ascii'
    except (TypeError, LookupError):
        return True
+
+
 try:
    if is_ascii(sys.getdefaultencoding()):
        _icu.set_default_encoding(b'utf-8')
@ -119,6 +121,7 @@ def case_sensitive_collator():
 # function implementations based on different collators, to allow lazy loading
 # of collators, with maximum runtime performance

+
 _sort_key_template = '''
 def {name}(obj):
    try:
@ -222,6 +225,7 @@ def capitalize(x):
    except (IndexError, TypeError, AttributeError):
        return x

+
 try:
    swapcase = _icu.swap_case
 except AttributeError:  # For people running from source
@ -300,6 +304,7 @@ def partition_by_first_letter(items, reverse=False, key=lambda x:x):
            ans[last_c] = [item]
    return ans

+
 # Return the number of unicode codepoints in a string
 string_length = _icu.string_length if is_narrow_build else len

@ -311,4 +316,3 @@ utf16_length = len if is_narrow_build else _icu.utf16_length
 if __name__ == '__main__':
    from calibre.utils.icu_test import run
    run(verbosity=4)
-
--- a/src/calibre/utils/icu_calibre_utils.h
+++ b/src/calibre/utils/icu_calibre_utils.h
@ -19,7 +19,7 @@
 #include <unicode/ustring.h>
 #include <unicode/usearch.h>
 #include <unicode/utrans.h>
-#include <unicode/unorm.h>
+#include <unicode/unorm2.h>
 #include <unicode/ubrk.h>

 #if PY_VERSION_HEX >= 0x03030000