mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Replace use of deprecated ICU unorm.h API
This commit is contained in:
parent
ff952ad851
commit
88e9494e6b
@ -1000,22 +1000,46 @@ end:
|
|||||||
} // }}}
|
} // }}}
|
||||||
|
|
||||||
// normalize {{{
|
// normalize {{{
|
||||||
|
typedef enum { NFC, NFKC, NFD, NFKD } NORM_MODES;
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_normalize(PyObject *self, PyObject *args) {
|
icu_normalize(PyObject *self, PyObject *args) {
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
int32_t sz = 0, mode = UNORM_DEFAULT, cap = 0, rsz = 0;
|
int32_t sz = 0, cap = 0, rsz = 0;
|
||||||
|
NORM_MODES mode;
|
||||||
UChar *dest = NULL, *source = NULL;
|
UChar *dest = NULL, *source = NULL;
|
||||||
PyObject *ret = NULL, *src = NULL;
|
PyObject *ret = NULL, *src = NULL;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL;
|
if (!PyArg_ParseTuple(args, "iO", &mode, &src)) return NULL;
|
||||||
|
const UNormalizer2 *n = NULL;
|
||||||
|
switch (mode) {
|
||||||
|
case NFC:
|
||||||
|
n = unorm2_getNFCInstance(&status);
|
||||||
|
break;
|
||||||
|
case NFKC:
|
||||||
|
n = unorm2_getNFKCInstance(&status);
|
||||||
|
break;
|
||||||
|
case NFD:
|
||||||
|
n = unorm2_getNFDInstance(&status);
|
||||||
|
break;
|
||||||
|
case NFKD:
|
||||||
|
n = unorm2_getNFKDInstance(&status);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
PyErr_SetString(PyExc_ValueError, u_errorName(status));
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
|
||||||
source = python_to_icu(src, &sz, 1);
|
source = python_to_icu(src, &sz, 1);
|
||||||
if (source == NULL) goto end;
|
if (source == NULL) goto end;
|
||||||
cap = 2 * sz;
|
cap = 2 * sz;
|
||||||
dest = (UChar*) calloc(cap, sizeof(UChar));
|
dest = (UChar*) calloc(cap, sizeof(UChar));
|
||||||
if (dest == NULL) { PyErr_NoMemory(); goto end; }
|
if (dest == NULL) { PyErr_NoMemory(); goto end; }
|
||||||
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
rsz = unorm_normalize(source, sz, (UNormalizationMode)mode, 0, dest, cap, &status);
|
rsz = unorm2_normalize(n, source, sz, dest, cap, &status);
|
||||||
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
||||||
cap *= 2;
|
cap *= 2;
|
||||||
dest = (UChar*) realloc(dest, cap*sizeof(UChar));
|
dest = (UChar*) realloc(dest, cap*sizeof(UChar));
|
||||||
@ -1232,13 +1256,10 @@ initicu(void)
|
|||||||
ADDUCONST(UCOL_LOWER_FIRST);
|
ADDUCONST(UCOL_LOWER_FIRST);
|
||||||
ADDUCONST(UCOL_UPPER_FIRST);
|
ADDUCONST(UCOL_UPPER_FIRST);
|
||||||
|
|
||||||
ADDUCONST(UNORM_NONE);
|
ADDUCONST(NFD);
|
||||||
ADDUCONST(UNORM_NFD);
|
ADDUCONST(NFKD);
|
||||||
ADDUCONST(UNORM_NFKD);
|
ADDUCONST(NFC);
|
||||||
ADDUCONST(UNORM_NFC);
|
ADDUCONST(NFKC);
|
||||||
ADDUCONST(UNORM_DEFAULT);
|
|
||||||
ADDUCONST(UNORM_NFKC);
|
|
||||||
ADDUCONST(UNORM_FCD);
|
|
||||||
|
|
||||||
ADDUCONST(UPPER_CASE);
|
ADDUCONST(UPPER_CASE);
|
||||||
ADDUCONST(LOWER_CASE);
|
ADDUCONST(LOWER_CASE);
|
||||||
|
@ -28,7 +28,7 @@ if _icu is None:
|
|||||||
raise RuntimeError('Failed to load icu with error: %s' % err)
|
raise RuntimeError('Failed to load icu with error: %s' % err)
|
||||||
del err
|
del err
|
||||||
icu_unicode_version = getattr(_icu, 'unicode_version', None)
|
icu_unicode_version = getattr(_icu, 'unicode_version', None)
|
||||||
_nmodes = {m:getattr(_icu, 'UNORM_'+m, None) for m in ('NFC', 'NFD', 'NFKC', 'NFKD', 'NONE', 'DEFAULT', 'FCD')}
|
_nmodes = {m:getattr(_icu, m) for m in ('NFC', 'NFD', 'NFKC', 'NFKD')}
|
||||||
|
|
||||||
# Ensure that the python internal filesystem and default encodings are not ASCII
|
# Ensure that the python internal filesystem and default encodings are not ASCII
|
||||||
|
|
||||||
@ -38,6 +38,8 @@ def is_ascii(name):
|
|||||||
return codecs.lookup(name).name == b'ascii'
|
return codecs.lookup(name).name == b'ascii'
|
||||||
except (TypeError, LookupError):
|
except (TypeError, LookupError):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if is_ascii(sys.getdefaultencoding()):
|
if is_ascii(sys.getdefaultencoding()):
|
||||||
_icu.set_default_encoding(b'utf-8')
|
_icu.set_default_encoding(b'utf-8')
|
||||||
@ -119,6 +121,7 @@ def case_sensitive_collator():
|
|||||||
# function implementations based on different collators, to allow lazy loading
|
# function implementations based on different collators, to allow lazy loading
|
||||||
# of collators, with maximum runtime performance
|
# of collators, with maximum runtime performance
|
||||||
|
|
||||||
|
|
||||||
_sort_key_template = '''
|
_sort_key_template = '''
|
||||||
def {name}(obj):
|
def {name}(obj):
|
||||||
try:
|
try:
|
||||||
@ -222,6 +225,7 @@ def capitalize(x):
|
|||||||
except (IndexError, TypeError, AttributeError):
|
except (IndexError, TypeError, AttributeError):
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
swapcase = _icu.swap_case
|
swapcase = _icu.swap_case
|
||||||
except AttributeError: # For people running from source
|
except AttributeError: # For people running from source
|
||||||
@ -300,6 +304,7 @@ def partition_by_first_letter(items, reverse=False, key=lambda x:x):
|
|||||||
ans[last_c] = [item]
|
ans[last_c] = [item]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
# Return the number of unicode codepoints in a string
|
# Return the number of unicode codepoints in a string
|
||||||
string_length = _icu.string_length if is_narrow_build else len
|
string_length = _icu.string_length if is_narrow_build else len
|
||||||
|
|
||||||
@ -311,4 +316,3 @@ utf16_length = len if is_narrow_build else _icu.utf16_length
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from calibre.utils.icu_test import run
|
from calibre.utils.icu_test import run
|
||||||
run(verbosity=4)
|
run(verbosity=4)
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
#include <unicode/ustring.h>
|
#include <unicode/ustring.h>
|
||||||
#include <unicode/usearch.h>
|
#include <unicode/usearch.h>
|
||||||
#include <unicode/utrans.h>
|
#include <unicode/utrans.h>
|
||||||
#include <unicode/unorm.h>
|
#include <unicode/unorm2.h>
|
||||||
#include <unicode/ubrk.h>
|
#include <unicode/ubrk.h>
|
||||||
|
|
||||||
#if PY_VERSION_HEX >= 0x03030000
|
#if PY_VERSION_HEX >= 0x03030000
|
||||||
|
Loading…
x
Reference in New Issue
Block a user