Use a C implementation for int he inline spell check inner loop. Speeds it up by about 30% again.

This commit is contained in:
Kovid Goyal 2014-06-24 13:00:10 +05:30
parent 2d6d6a1fe4
commit 70475ea7ce
2 changed files with 115 additions and 24 deletions

View File

@ -11,7 +11,7 @@
#include <structmember.h> #include <structmember.h>
#define COMPARE(attr, op) (PyObject_RichCompareBool(a->attr, b->attr, op) == 1) #define COMPARE(attr, op) (PyObject_RichCompareBool(a->attr, b->attr, op) == 1)
static PyObject *bold_tags = NULL, *italic_tags = NULL, *zero = NULL; static PyObject *bold_tags = NULL, *italic_tags = NULL, *zero = NULL, *spell_property = NULL, *recognized = NULL, *split = NULL;
// Tag type definition {{{ // Tag type definition {{{
@ -365,7 +365,91 @@ static PyTypeObject html_StateType = { // {{{
html_State_new, /* tp_new */ html_State_new, /* tp_new */
}; // }}} }; // }}}
// }}} // }}}
static PyObject*
html_init(PyObject *self, PyObject *args) {
Py_XDECREF(spell_property); Py_XDECREF(recognized); Py_XDECREF(split);
if (!PyArg_ParseTuple(args, "OOO", &spell_property, &recognized, &split)) return NULL;
Py_INCREF(spell_property); Py_INCREF(recognized); Py_INCREF(split);
Py_RETURN_NONE;
}
static PyObject*
html_check_spelling(PyObject *self, PyObject *args) {
#if PY_VERSION_HEX >= 0x03030000
#error Not implemented for python >= 3.3
#endif
PyObject *ans = NULL, *temp = NULL, *items = NULL, *text = NULL, *fmt = NULL, *locale = NULL, *sfmt = NULL, *_store_locale = NULL, *t = NULL, *utmp = NULL;
long text_len = 0, start = 0, length = 0, ppos = 0;
int store_locale = 0, ok = 0;
Py_ssize_t i = 0, j = 0;
Py_UNICODE *buf = NULL;
if (!PyArg_ParseTuple(args, "OlOOOO", &text, &text_len, &fmt, &locale, &sfmt, &_store_locale)) return NULL;
store_locale = PyObject_IsTrue(_store_locale);
temp = PyObject_GetAttrString(locale, "langcode");
if (temp == NULL) goto error;
items = PyObject_CallFunctionObjArgs(split, text, temp, NULL);
Py_DECREF(temp); temp = NULL;
if (items == NULL) goto error;
ans = PyTuple_New((2 * PyList_GET_SIZE(items)) + 1);
if (ans == NULL) { PyErr_NoMemory(); goto error; }
buf = PyUnicode_AS_UNICODE(text);
#define APPEND(x, y) t = Py_BuildValue("lO", (x), y); if (t == NULL) goto error; PyTuple_SET_ITEM(ans, j, t); j += 1;
for (i = 0, j = 0; i < PyList_GET_SIZE(items); i++) {
temp = PyList_GET_ITEM(items, i);
start = PyInt_AS_LONG(PyTuple_GET_ITEM(temp, 0)); length = PyInt_AS_LONG(PyTuple_GET_ITEM(temp, 1));
temp = NULL;
if (start > ppos) { APPEND(start - ppos, fmt) }
ppos = start + length;
utmp = PyUnicode_FromUnicode(buf + start, length);
if (utmp == NULL) { PyErr_NoMemory(); goto error; }
temp = PyObject_CallFunctionObjArgs(recognized, utmp, locale, NULL);
Py_DECREF(utmp); utmp = NULL;
if (temp == NULL) goto error;
ok = PyObject_IsTrue(temp);
Py_DECREF(temp); temp = NULL;
if (ok) {
APPEND(length, fmt)
} else {
if (store_locale) {
temp = PyObject_CallFunctionObjArgs(spell_property, sfmt, locale, NULL);
if (temp == NULL) goto error;
APPEND(length, temp);
Py_DECREF(temp); temp = NULL;
} else {
APPEND(length, sfmt);
}
}
}
if (ppos < text_len) {
APPEND(text_len - ppos, fmt)
}
if (j < PyTuple_GET_SIZE(ans)) _PyTuple_Resize(&ans, j);
goto end;
error:
Py_XDECREF(ans); ans = NULL;
end:
Py_XDECREF(items); Py_XDECREF(temp);
return ans;
}
static PyMethodDef html_methods[] = { static PyMethodDef html_methods[] = {
{"init", html_init, METH_VARARGS,
"init()\n\n Initialize this module"
},
{"check_spelling", html_check_spelling, METH_VARARGS,
"html_check_spelling()\n\n Speedup inner loop for spell check"
},
{NULL, NULL, 0, NULL} {NULL, NULL, 0, NULL}
}; };

View File

@ -64,6 +64,13 @@ if _speedup is not None:
Tag = _speedup.Tag Tag = _speedup.Tag
bold_tags, italic_tags = _speedup.bold_tags, _speedup.italic_tags bold_tags, italic_tags = _speedup.bold_tags, _speedup.italic_tags
State = _speedup.State State = _speedup.State
def spell_property(sfmt, locale):
s = QTextCharFormat(sfmt)
s.setProperty(SPELL_LOCALE_PROPERTY, locale)
return s
_speedup.init(spell_property, dictionaries.recognized, split_into_words_and_positions)
del spell_property
check_spelling = _speedup.check_spelling
else: else:
bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)} bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)}
italic_tags = {'i', 'em'} italic_tags = {'i', 'em'}
@ -126,6 +133,28 @@ else:
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang) '->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
__str__ = __repr__ __str__ = __repr__
def check_spelling(text, tlen, fmt, locale, sfmt, store_locale):
split_ans = []
ppos = 0
r, a = dictionaries.recognized, split_ans.append
for start, length in split_into_words_and_positions(text, lang=locale.langcode):
if start > ppos:
a((start - ppos, fmt))
ppos = start + length
recognized = r(text[start:ppos], locale)
if recognized:
a((length, fmt))
else:
if store_locale:
s = QTextCharFormat(sfmt)
s.setProperty(SPELL_LOCALE_PROPERTY, locale)
a((length, s))
else:
a((length, sfmt))
if ppos < tlen:
a((tlen - ppos, fmt))
return split_ans
del _speedup del _speedup
@ -237,28 +266,6 @@ def cdata(state, text, i, formats, user_data):
add_tag_data(user_data, TagStart(m.start(), '', name, True, True)) add_tag_data(user_data, TagStart(m.start(), '', name, True, True))
return [(num, fmt), (2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])] return [(num, fmt), (2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])]
def check_spelling(text, tpos, tlen, fmt, locale, sfmt):
split_ans = []
ppos = 0
sl = store_locale.enabled
for start, length in split_into_words_and_positions(text[tpos:tpos+tlen], lang=locale.langcode):
if start > ppos:
split_ans.append((start - ppos, fmt))
ppos = start + length
recognized = dictionaries.recognized(text[tpos + start:tpos + ppos], locale)
if recognized:
split_ans.append((length, fmt))
else:
if sl:
s = QTextCharFormat(sfmt)
s.setProperty(SPELL_LOCALE_PROPERTY, locale)
split_ans.append((length, s))
else:
split_ans.append((length, sfmt))
if ppos < tlen:
split_ans.append((tlen - ppos, fmt))
return split_ans
def process_text(state, text, nbsp_format, spell_format, user_data): def process_text(state, text, nbsp_format, spell_format, user_data):
ans = [] ans = []
fmt = None fmt = None
@ -289,7 +296,7 @@ def process_text(state, text, nbsp_format, spell_format, user_data):
if fmt is nbsp_format: if fmt is nbsp_format:
split_ans.append((tlen, fmt)) split_ans.append((tlen, fmt))
else: else:
split_ans.extend(check_spelling(text, tpos, tlen, fmt, locale, sfmt)) split_ans.extend(check_spelling(text[tpos:tpos+tlen], tlen, fmt, locale, sfmt, store_locale.enabled))
tpos += tlen tpos += tlen
ans = split_ans ans = split_ans