diff --git a/src/calibre/gui2/tweak_book/editor/syntax/html.c b/src/calibre/gui2/tweak_book/editor/syntax/html.c index b2d101566d..66e86f3b28 100644 --- a/src/calibre/gui2/tweak_book/editor/syntax/html.c +++ b/src/calibre/gui2/tweak_book/editor/syntax/html.c @@ -11,7 +11,7 @@ #include #define COMPARE(attr, op) (PyObject_RichCompareBool(a->attr, b->attr, op) == 1) -static PyObject *bold_tags = NULL, *italic_tags = NULL, *zero = NULL; +static PyObject *bold_tags = NULL, *italic_tags = NULL, *zero = NULL, *spell_property = NULL, *recognized = NULL, *split = NULL; // Tag type definition {{{ @@ -365,7 +365,91 @@ static PyTypeObject html_StateType = { // {{{ html_State_new, /* tp_new */ }; // }}} // }}} + +static PyObject* +html_init(PyObject *self, PyObject *args) { + Py_XDECREF(spell_property); Py_XDECREF(recognized); Py_XDECREF(split); + if (!PyArg_ParseTuple(args, "OOO", &spell_property, &recognized, &split)) return NULL; + Py_INCREF(spell_property); Py_INCREF(recognized); Py_INCREF(split); + Py_RETURN_NONE; +} + +static PyObject* +html_check_spelling(PyObject *self, PyObject *args) { +#if PY_VERSION_HEX >= 0x03030000 +#error Not implemented for python >= 3.3 +#endif + PyObject *ans = NULL, *temp = NULL, *items = NULL, *text = NULL, *fmt = NULL, *locale = NULL, *sfmt = NULL, *_store_locale = NULL, *t = NULL, *utmp = NULL; + long text_len = 0, start = 0, length = 0, ppos = 0; + int store_locale = 0, ok = 0; + Py_ssize_t i = 0, j = 0; + Py_UNICODE *buf = NULL; + + if (!PyArg_ParseTuple(args, "OlOOOO", &text, &text_len, &fmt, &locale, &sfmt, &_store_locale)) return NULL; + store_locale = PyObject_IsTrue(_store_locale); + temp = PyObject_GetAttrString(locale, "langcode"); + if (temp == NULL) goto error; + items = PyObject_CallFunctionObjArgs(split, text, temp, NULL); + Py_DECREF(temp); temp = NULL; + if (items == NULL) goto error; + ans = PyTuple_New((2 * PyList_GET_SIZE(items)) + 1); + if (ans == NULL) { PyErr_NoMemory(); goto error; } + buf = PyUnicode_AS_UNICODE(text); + +#define APPEND(x, y) t = Py_BuildValue("lO", (x), y); if (t == NULL) goto error; PyTuple_SET_ITEM(ans, j, t); j += 1; + + for (i = 0, j = 0; i < PyList_GET_SIZE(items); i++) { + temp = PyList_GET_ITEM(items, i); + start = PyInt_AS_LONG(PyTuple_GET_ITEM(temp, 0)); length = PyInt_AS_LONG(PyTuple_GET_ITEM(temp, 1)); + temp = NULL; + + if (start > ppos) { APPEND(start - ppos, fmt) } + ppos = start + length; + + utmp = PyUnicode_FromUnicode(buf + start, length); + if (utmp == NULL) { PyErr_NoMemory(); goto error; } + temp = PyObject_CallFunctionObjArgs(recognized, utmp, locale, NULL); + Py_DECREF(utmp); utmp = NULL; + if (temp == NULL) goto error; + ok = PyObject_IsTrue(temp); + Py_DECREF(temp); temp = NULL; + + if (ok) { + APPEND(length, fmt) + } else { + if (store_locale) { + temp = PyObject_CallFunctionObjArgs(spell_property, sfmt, locale, NULL); + if (temp == NULL) goto error; + APPEND(length, temp); + Py_DECREF(temp); temp = NULL; + } else { + APPEND(length, sfmt); + } + } + } + if (ppos < text_len) { + APPEND(text_len - ppos, fmt) + } + + if (j < PyTuple_GET_SIZE(ans)) _PyTuple_Resize(&ans, j); + goto end; + +error: + Py_XDECREF(ans); ans = NULL; +end: + Py_XDECREF(items); Py_XDECREF(temp); + return ans; +} + static PyMethodDef html_methods[] = { + {"init", html_init, METH_VARARGS, + "init()\n\n Initialize this module" + }, + + {"check_spelling", html_check_spelling, METH_VARARGS, + "html_check_spelling()\n\n Speedup inner loop for spell check" + }, + {NULL, NULL, 0, NULL} }; diff --git a/src/calibre/gui2/tweak_book/editor/syntax/html.py b/src/calibre/gui2/tweak_book/editor/syntax/html.py index 145cec22de..b3219c4043 100644 --- a/src/calibre/gui2/tweak_book/editor/syntax/html.py +++ b/src/calibre/gui2/tweak_book/editor/syntax/html.py @@ -64,6 +64,13 @@ if _speedup is not None: Tag = _speedup.Tag bold_tags, italic_tags = _speedup.bold_tags, _speedup.italic_tags State = _speedup.State + def spell_property(sfmt, locale): + s = QTextCharFormat(sfmt) + s.setProperty(SPELL_LOCALE_PROPERTY, locale) + return s + _speedup.init(spell_property, dictionaries.recognized, split_into_words_and_positions) + del spell_property + check_spelling = _speedup.check_spelling else: bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)} italic_tags = {'i', 'em'} @@ -126,6 +133,28 @@ else: '->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang) __str__ = __repr__ + def check_spelling(text, tlen, fmt, locale, sfmt, store_locale): + split_ans = [] + ppos = 0 + r, a = dictionaries.recognized, split_ans.append + for start, length in split_into_words_and_positions(text, lang=locale.langcode): + if start > ppos: + a((start - ppos, fmt)) + ppos = start + length + recognized = r(text[start:ppos], locale) + if recognized: + a((length, fmt)) + else: + if store_locale: + s = QTextCharFormat(sfmt) + s.setProperty(SPELL_LOCALE_PROPERTY, locale) + a((length, s)) + else: + a((length, sfmt)) + if ppos < tlen: + a((tlen - ppos, fmt)) + return split_ans + del _speedup @@ -237,28 +266,6 @@ def cdata(state, text, i, formats, user_data): add_tag_data(user_data, TagStart(m.start(), '', name, True, True)) return [(num, fmt), (2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])] -def check_spelling(text, tpos, tlen, fmt, locale, sfmt): - split_ans = [] - ppos = 0 - sl = store_locale.enabled - for start, length in split_into_words_and_positions(text[tpos:tpos+tlen], lang=locale.langcode): - if start > ppos: - split_ans.append((start - ppos, fmt)) - ppos = start + length - recognized = dictionaries.recognized(text[tpos + start:tpos + ppos], locale) - if recognized: - split_ans.append((length, fmt)) - else: - if sl: - s = QTextCharFormat(sfmt) - s.setProperty(SPELL_LOCALE_PROPERTY, locale) - split_ans.append((length, s)) - else: - split_ans.append((length, sfmt)) - if ppos < tlen: - split_ans.append((tlen - ppos, fmt)) - return split_ans - def process_text(state, text, nbsp_format, spell_format, user_data): ans = [] fmt = None @@ -289,7 +296,7 @@ def process_text(state, text, nbsp_format, spell_format, user_data): if fmt is nbsp_format: split_ans.append((tlen, fmt)) else: - split_ans.extend(check_spelling(text, tpos, tlen, fmt, locale, sfmt)) + split_ans.extend(check_spelling(text[tpos:tpos+tlen], tlen, fmt, locale, sfmt, store_locale.enabled)) tpos += tlen ans = split_ans