From 58909f4c740bd8cd73e48f0f0871955070279528 Mon Sep 17 00:00:00 2001 From: Flaviu Tamas Date: Sun, 9 Dec 2018 17:19:48 -0500 Subject: [PATCH] Build ICU in py3 --- src/calibre/utils/icu.c | 268 ++++++++++++++------------ src/calibre/utils/icu_calibre_utils.h | 4 +- 2 files changed, 149 insertions(+), 123 deletions(-) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 81141a6e18..d6d9919ece 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -37,7 +37,7 @@ icu_Collator_dealloc(icu_Collator* self) if (self->collator != NULL) ucol_close(self->collator); if (self->contractions != NULL) uset_close(self->contractions); self->collator = NULL; - self->ob_type->tp_free((PyObject*)self); + Py_TYPE(self)->tp_free((PyObject*)self); } static PyObject * @@ -92,11 +92,15 @@ icu_Collator_get_strength(icu_Collator *self, void *closure) { static int icu_Collator_set_strength(icu_Collator *self, PyObject *val, void *closure) { - if (!PyInt_Check(val)) { + if ( +#if PY_MAJOR_VERSION < 3 + !PyInt_Check(val) && +#endif + !PyLong_Check(val)) { PyErr_SetString(PyExc_TypeError, "Strength must be an integer."); return -1; } - ucol_setStrength(self->collator, (int)PyInt_AS_LONG(val)); + ucol_setStrength(self->collator, (int)PyLong_AsLong(val)); return 0; } // }}} @@ -191,9 +195,6 @@ end: // Collator.find {{{ static PyObject * icu_Collator_find(icu_Collator *self, PyObject *args) { -#if PY_VERSION_HEX >= 0x03030000 -#error Not implemented for python >= 3.3 -#endif PyObject *a_ = NULL, *b_ = NULL; UChar *a = NULL, *b = NULL; int32_t asz = 0, bsz = 0, pos = -1, length = -1; @@ -454,45 +455,44 @@ static PyGetSetDef icu_Collator_getsetters[] = { }; static PyTypeObject icu_CollatorType = { // {{{ - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "icu.Collator", /*tp_name*/ - sizeof(icu_Collator), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)icu_Collator_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "Collator", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - icu_Collator_methods, /* tp_methods */ - 0, /* tp_members */ - icu_Collator_getsetters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - icu_Collator_new, /* tp_new */ + PyVarObject_HEAD_INIT(NULL, 0) + /* tp_name */ "icu.Collator", + /* tp_basicsize */ sizeof(icu_Collator), + /* tp_itemsize */ 0, + /* tp_dealloc */ (destructor)icu_Collator_dealloc, + /* tp_print */ 0, + /* tp_getattr */ 0, + /* tp_setattr */ 0, + /* tp_compare */ 0, + /* tp_repr */ 0, + /* tp_as_number */ 0, + /* tp_as_sequence */ 0, + /* tp_as_mapping */ 0, + /* tp_hash */ 0, + /* tp_call */ 0, + /* tp_str */ 0, + /* tp_getattro */ 0, + /* tp_setattro */ 0, + /* tp_as_buffer */ 0, + /* tp_flags */ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + /* tp_doc */ "Collator", + /* tp_traverse */ 0, + /* tp_clear */ 0, + /* tp_richcompare */ 0, + /* tp_weaklistoffset */ 0, + /* tp_iter */ 0, + /* tp_iternext */ 0, + /* tp_methods */ icu_Collator_methods, + /* tp_members */ 0, + /* tp_getset */ icu_Collator_getsetters, + /* tp_base */ 0, + /* tp_dict */ 0, + /* tp_descr_get */ 0, + /* tp_descr_set */ 0, + /* tp_dictoffset */ 0, + /* tp_init */ 0, + /* tp_alloc */ 0, + /* tp_new */ icu_Collator_new, }; // }}} // }} @@ -542,7 +542,7 @@ icu_BreakIterator_dealloc(icu_BreakIterator* self) if (self->break_iterator != NULL) ubrk_close(self->break_iterator); if (self->text != NULL) free(self->text); self->break_iterator = NULL; self->text = NULL; - self->ob_type->tp_free((PyObject*)self); + Py_TYPE(self)->tp_free((PyObject*)self); } @@ -595,9 +595,6 @@ icu_BreakIterator_set_text(icu_BreakIterator *self, PyObject *input) { // BreakIterator.index {{{ static PyObject * icu_BreakIterator_index(icu_BreakIterator *self, PyObject *token) { -#if PY_VERSION_HEX >= 0x03030000 -#error Not implemented for python >= 3.3 -#endif UChar *buf = NULL, *needle = NULL; int32_t word_start = 0, p = 0, sz = 0, ans = -1, leading_hyphen = 0, trailing_hyphen = 0; @@ -655,9 +652,6 @@ end: // BreakIterator.split2 {{{ static PyObject * icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args) { -#if PY_VERSION_HEX >= 0x03030000 -#error Not implemented for python >= 3.3 -#endif int32_t word_start = 0, p = 0, sz = 0, last_pos = 0, last_sz = 0; int is_hyphen_sep = 0, leading_hyphen = 0, trailing_hyphen = 0; @@ -695,7 +689,7 @@ icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args) { if (is_hyphen_sep && PyList_GET_SIZE(ans) > 0) { sz = last_sz + sz + trailing_hyphen; last_sz = sz; - t = PyInt_FromLong((long)sz); + t = PyLong_FromLong((long)sz); if (t == NULL) { Py_DECREF(ans); ans = NULL; break; } temp = PyList_GET_ITEM(ans, PyList_GET_SIZE(ans) - 1); Py_DECREF(PyTuple_GET_ITEM(temp, 1)); @@ -737,45 +731,44 @@ static PyMethodDef icu_BreakIterator_methods[] = { static PyTypeObject icu_BreakIteratorType = { // {{{ - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "icu.BreakIterator", /*tp_name*/ - sizeof(icu_BreakIterator), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)icu_BreakIterator_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "Break Iterator", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - icu_BreakIterator_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - icu_BreakIterator_new, /* tp_new */ + PyVarObject_HEAD_INIT(NULL, 0) + /* tp_name */ "icu.BreakIterator", + /* tp_basicsize */ sizeof(icu_BreakIterator), + /* tp_itemsize */ 0, + /* tp_dealloc */ (destructor)icu_BreakIterator_dealloc, + /* tp_print */ 0, + /* tp_getattr */ 0, + /* tp_setattr */ 0, + /* tp_compare */ 0, + /* tp_repr */ 0, + /* tp_as_number */ 0, + /* tp_as_sequence */ 0, + /* tp_as_mapping */ 0, + /* tp_hash */ 0, + /* tp_call */ 0, + /* tp_str */ 0, + /* tp_getattro */ 0, + /* tp_setattro */ 0, + /* tp_as_buffer */ 0, + /* tp_flags */ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + /* tp_doc */ "Break Iterator", + /* tp_traverse */ 0, + /* tp_clear */ 0, + /* tp_richcompare */ 0, + /* tp_weaklistoffset */ 0, + /* tp_iter */ 0, + /* tp_iternext */ 0, + /* tp_methods */ icu_BreakIterator_methods, + /* tp_members */ 0, + /* tp_getset */ 0, + /* tp_base */ 0, + /* tp_dict */ 0, + /* tp_descr_get */ 0, + /* tp_descr_set */ 0, + /* tp_dictoffset */ 0, + /* tp_init */ 0, + /* tp_alloc */ 0, + /* tp_new */ icu_BreakIterator_new, }; // }}} // }}} @@ -856,11 +849,13 @@ end: // set_default_encoding {{{ static PyObject * icu_set_default_encoding(PyObject *self, PyObject *args) { +#if PY_MAJOR_VERSION < 3 char *encoding; if (!PyArg_ParseTuple(args, "s:setdefaultencoding", &encoding)) return NULL; if (PyUnicode_SetDefaultEncoding(encoding)) return NULL; +#endif Py_INCREF(Py_None); return Py_None; @@ -989,7 +984,7 @@ icu_ord_string(PyObject *self, PyObject *input) { ans = PyTuple_New(sz); if (ans == NULL) goto end; for (i = 0; i < sz; i++) { - temp = PyInt_FromLong((long)input_buf[i]); + temp = PyLong_FromLong((long)input_buf[i]); if (temp == NULL) { Py_DECREF(ans); ans = NULL; PyErr_NoMemory(); goto end; } PyTuple_SET_ITEM(ans, i, temp); } @@ -1115,18 +1110,15 @@ icu_string_length(PyObject *self, PyObject *src) { // utf16_length {{{ static PyObject * icu_utf16_length(PyObject *self, PyObject *src) { -#if PY_VERSION_HEX >= 0x03030000 -#error Not implemented for python >= 3.3 -#endif - - int32_t sz = 0; + Py_ssize_t sz = 0; +#if PY_VERSION_HEX < 0x03030000 #ifdef Py_UNICODE_WIDE int32_t i = 0, t = 0; Py_UNICODE *data = NULL; #endif if (!PyUnicode_Check(src)) { PyErr_SetString(PyExc_TypeError, "Must be a unicode object"); return NULL; } - sz = (int32_t)PyUnicode_GET_SIZE(src); + sz = PyUnicode_GET_SIZE(src); #ifdef Py_UNICODE_WIDE data = PyUnicode_AS_UNICODE(src); for (i = 0; i < sz; i++) { @@ -1134,7 +1126,25 @@ icu_utf16_length(PyObject *self, PyObject *src) { } sz = t; #endif - return Py_BuildValue("l", (long)sz); +#else + Py_ssize_t unit_length, i; + Py_UCS4 *data = NULL; + + if(PyUnicode_READY(src) != 0) { return NULL; } + + unit_length = sz = PyUnicode_GET_LENGTH(src); + // UCS8 or UCS16? length==utf16 length already. UCS32? count big code points. + if(PyUnicode_KIND(src) == PyUnicode_4BYTE_KIND) { + data = PyUnicode_4BYTE_DATA(src); + for(i = 0; i < unit_length; i++) { + if(data[i] > 0xffff) { + sz++; + } + } + } +#endif + + return Py_BuildValue("n", sz); } // }}} // Module initialization {{{ @@ -1148,7 +1158,7 @@ static PyMethodDef icu_methods[] = { }, {"set_default_encoding", icu_set_default_encoding, METH_VARARGS, - "set_default_encoding(encoding) -> Set the default encoding for the python unicode implementation." + "set_default_encoding(encoding) -> Set the default encoding for the python unicode implementation. In Py3, this operation is a no-op" }, {"set_filesystem_encoding", icu_set_filesystem_encoding, METH_VARARGS, @@ -1198,25 +1208,36 @@ static PyMethodDef icu_methods[] = { {NULL} /* Sentinel */ }; -#define ADDUCONST(x) PyModule_AddIntConstant(m, #x, x) +#if PY_MAJOR_VERSION >= 3 +#define INITERROR return NULL +#define INITMODULE PyModule_Create(&icu_module) +static struct PyModuleDef icu_module = { + /* m_base */ PyModuleDef_HEAD_INIT, + /* m_name */ "icu", + /* m_doc */ "Wrapper for the ICU internationalization library", + /* m_size */ -1, + /* m_methods */ icu_methods, + /* m_slots */ 0, + /* m_traverse */ 0, + /* m_clear */ 0, + /* m_free */ 0, +}; + +CALIBRE_MODINIT_FUNC PyInit_icu(void) { +#else +#define INITERROR return +#define INITMODULE Py_InitModule3("icu", icu_methods, "Wrapper for the ICU internationalization library") +CALIBRE_MODINIT_FUNC initicu(void) { +#endif -CALIBRE_MODINIT_FUNC -initicu(void) -{ - PyObject* m; UVersionInfo ver, uver; UErrorCode status = U_ZERO_ERROR; char version[U_MAX_VERSION_STRING_LENGTH+1] = {0}, uversion[U_MAX_VERSION_STRING_LENGTH+5] = {0}; - if (sizeof(Py_UNICODE) != 2 && sizeof(Py_UNICODE) != 4) { - PyErr_SetString(PyExc_RuntimeError, "This module only works on python versions <= 3.2"); - return; - } - u_init(&status); if (U_FAILURE(status)) { PyErr_SetString(PyExc_RuntimeError, u_errorName(status)); - return; + INITERROR; } u_getVersion(ver); u_versionToString(ver, version); @@ -1224,21 +1245,23 @@ initicu(void) u_versionToString(uver, uversion); if (PyType_Ready(&icu_CollatorType) < 0) - return; + INITERROR; if (PyType_Ready(&icu_BreakIteratorType) < 0) - return; + INITERROR; - m = Py_InitModule3("icu", icu_methods, - "Wrapper for the ICU internationalization library"); + PyObject *mod = INITMODULE; + + if (mod == NULL) INITERROR; Py_INCREF(&icu_CollatorType); Py_INCREF(&icu_BreakIteratorType); - PyModule_AddObject(m, "Collator", (PyObject *)&icu_CollatorType); - PyModule_AddObject(m, "BreakIterator", (PyObject *)&icu_BreakIteratorType); + PyModule_AddObject(mod, "Collator", (PyObject *)&icu_CollatorType); + PyModule_AddObject(mod, "BreakIterator", (PyObject *)&icu_BreakIteratorType); // uint8_t must be the same size as char - PyModule_AddIntConstant(m, "ok", (U_SUCCESS(status) && sizeof(uint8_t) == sizeof(char)) ? 1 : 0); - PyModule_AddStringConstant(m, "icu_version", version); - PyModule_AddStringConstant(m, "unicode_version", uversion); + PyModule_AddIntConstant(mod, "ok", (U_SUCCESS(status) && sizeof(uint8_t) == sizeof(char)) ? 1 : 0); + PyModule_AddStringConstant(mod, "icu_version", version); + PyModule_AddStringConstant(mod, "unicode_version", uversion); +#define ADDUCONST(x) PyModule_AddIntConstant(mod, #x, x) ADDUCONST(USET_SPAN_NOT_CONTAINED); ADDUCONST(USET_SPAN_CONTAINED); ADDUCONST(USET_SPAN_SIMPLE); @@ -1270,5 +1293,8 @@ initicu(void) ADDUCONST(UBRK_LINE); ADDUCONST(UBRK_SENTENCE); +#if PY_MAJOR_VERSION >= 3 + return mod; +#endif } // }}} diff --git a/src/calibre/utils/icu_calibre_utils.h b/src/calibre/utils/icu_calibre_utils.h index 8783e7fd9f..270c914c7c 100644 --- a/src/calibre/utils/icu_calibre_utils.h +++ b/src/calibre/utils/icu_calibre_utils.h @@ -198,14 +198,14 @@ static UChar32* python_to_icu32(PyObject *obj, int32_t *osz) { if (!PyUnicode_CheckExact(obj)) { PyErr_SetString(PyExc_TypeError, "Not a unicode string"); - goto end; + return NULL; } if(PyUnicode_READY(obj) == -1) { return NULL; } sz = PyUnicode_GET_LENGTH(obj); ans = (UChar32*) malloc((sz+1) * sizeof(UChar32)); - if (ans == NULL) { PyErr_NoMemory(); goto end; } + if (ans == NULL) { PyErr_NoMemory(); return NULL; } int kind; if ((kind = PyUnicode_KIND(obj)) == PyUnicode_4BYTE_KIND) { memcpy(ans, PyUnicode_4BYTE_DATA(obj), sz * 4);