mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Use a C implementation for int he inline spell check inner loop. Speeds it up by about 30% again.
This commit is contained in:
parent
2d6d6a1fe4
commit
70475ea7ce
@ -11,7 +11,7 @@
|
|||||||
#include <structmember.h>
|
#include <structmember.h>
|
||||||
|
|
||||||
#define COMPARE(attr, op) (PyObject_RichCompareBool(a->attr, b->attr, op) == 1)
|
#define COMPARE(attr, op) (PyObject_RichCompareBool(a->attr, b->attr, op) == 1)
|
||||||
static PyObject *bold_tags = NULL, *italic_tags = NULL, *zero = NULL;
|
static PyObject *bold_tags = NULL, *italic_tags = NULL, *zero = NULL, *spell_property = NULL, *recognized = NULL, *split = NULL;
|
||||||
|
|
||||||
// Tag type definition {{{
|
// Tag type definition {{{
|
||||||
|
|
||||||
@ -365,7 +365,91 @@ static PyTypeObject html_StateType = { // {{{
|
|||||||
html_State_new, /* tp_new */
|
html_State_new, /* tp_new */
|
||||||
}; // }}}
|
}; // }}}
|
||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
html_init(PyObject *self, PyObject *args) {
|
||||||
|
Py_XDECREF(spell_property); Py_XDECREF(recognized); Py_XDECREF(split);
|
||||||
|
if (!PyArg_ParseTuple(args, "OOO", &spell_property, &recognized, &split)) return NULL;
|
||||||
|
Py_INCREF(spell_property); Py_INCREF(recognized); Py_INCREF(split);
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
html_check_spelling(PyObject *self, PyObject *args) {
|
||||||
|
#if PY_VERSION_HEX >= 0x03030000
|
||||||
|
#error Not implemented for python >= 3.3
|
||||||
|
#endif
|
||||||
|
PyObject *ans = NULL, *temp = NULL, *items = NULL, *text = NULL, *fmt = NULL, *locale = NULL, *sfmt = NULL, *_store_locale = NULL, *t = NULL, *utmp = NULL;
|
||||||
|
long text_len = 0, start = 0, length = 0, ppos = 0;
|
||||||
|
int store_locale = 0, ok = 0;
|
||||||
|
Py_ssize_t i = 0, j = 0;
|
||||||
|
Py_UNICODE *buf = NULL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "OlOOOO", &text, &text_len, &fmt, &locale, &sfmt, &_store_locale)) return NULL;
|
||||||
|
store_locale = PyObject_IsTrue(_store_locale);
|
||||||
|
temp = PyObject_GetAttrString(locale, "langcode");
|
||||||
|
if (temp == NULL) goto error;
|
||||||
|
items = PyObject_CallFunctionObjArgs(split, text, temp, NULL);
|
||||||
|
Py_DECREF(temp); temp = NULL;
|
||||||
|
if (items == NULL) goto error;
|
||||||
|
ans = PyTuple_New((2 * PyList_GET_SIZE(items)) + 1);
|
||||||
|
if (ans == NULL) { PyErr_NoMemory(); goto error; }
|
||||||
|
buf = PyUnicode_AS_UNICODE(text);
|
||||||
|
|
||||||
|
#define APPEND(x, y) t = Py_BuildValue("lO", (x), y); if (t == NULL) goto error; PyTuple_SET_ITEM(ans, j, t); j += 1;
|
||||||
|
|
||||||
|
for (i = 0, j = 0; i < PyList_GET_SIZE(items); i++) {
|
||||||
|
temp = PyList_GET_ITEM(items, i);
|
||||||
|
start = PyInt_AS_LONG(PyTuple_GET_ITEM(temp, 0)); length = PyInt_AS_LONG(PyTuple_GET_ITEM(temp, 1));
|
||||||
|
temp = NULL;
|
||||||
|
|
||||||
|
if (start > ppos) { APPEND(start - ppos, fmt) }
|
||||||
|
ppos = start + length;
|
||||||
|
|
||||||
|
utmp = PyUnicode_FromUnicode(buf + start, length);
|
||||||
|
if (utmp == NULL) { PyErr_NoMemory(); goto error; }
|
||||||
|
temp = PyObject_CallFunctionObjArgs(recognized, utmp, locale, NULL);
|
||||||
|
Py_DECREF(utmp); utmp = NULL;
|
||||||
|
if (temp == NULL) goto error;
|
||||||
|
ok = PyObject_IsTrue(temp);
|
||||||
|
Py_DECREF(temp); temp = NULL;
|
||||||
|
|
||||||
|
if (ok) {
|
||||||
|
APPEND(length, fmt)
|
||||||
|
} else {
|
||||||
|
if (store_locale) {
|
||||||
|
temp = PyObject_CallFunctionObjArgs(spell_property, sfmt, locale, NULL);
|
||||||
|
if (temp == NULL) goto error;
|
||||||
|
APPEND(length, temp);
|
||||||
|
Py_DECREF(temp); temp = NULL;
|
||||||
|
} else {
|
||||||
|
APPEND(length, sfmt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ppos < text_len) {
|
||||||
|
APPEND(text_len - ppos, fmt)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (j < PyTuple_GET_SIZE(ans)) _PyTuple_Resize(&ans, j);
|
||||||
|
goto end;
|
||||||
|
|
||||||
|
error:
|
||||||
|
Py_XDECREF(ans); ans = NULL;
|
||||||
|
end:
|
||||||
|
Py_XDECREF(items); Py_XDECREF(temp);
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
static PyMethodDef html_methods[] = {
|
static PyMethodDef html_methods[] = {
|
||||||
|
{"init", html_init, METH_VARARGS,
|
||||||
|
"init()\n\n Initialize this module"
|
||||||
|
},
|
||||||
|
|
||||||
|
{"check_spelling", html_check_spelling, METH_VARARGS,
|
||||||
|
"html_check_spelling()\n\n Speedup inner loop for spell check"
|
||||||
|
},
|
||||||
|
|
||||||
{NULL, NULL, 0, NULL}
|
{NULL, NULL, 0, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -64,6 +64,13 @@ if _speedup is not None:
|
|||||||
Tag = _speedup.Tag
|
Tag = _speedup.Tag
|
||||||
bold_tags, italic_tags = _speedup.bold_tags, _speedup.italic_tags
|
bold_tags, italic_tags = _speedup.bold_tags, _speedup.italic_tags
|
||||||
State = _speedup.State
|
State = _speedup.State
|
||||||
|
def spell_property(sfmt, locale):
|
||||||
|
s = QTextCharFormat(sfmt)
|
||||||
|
s.setProperty(SPELL_LOCALE_PROPERTY, locale)
|
||||||
|
return s
|
||||||
|
_speedup.init(spell_property, dictionaries.recognized, split_into_words_and_positions)
|
||||||
|
del spell_property
|
||||||
|
check_spelling = _speedup.check_spelling
|
||||||
else:
|
else:
|
||||||
bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)}
|
bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)}
|
||||||
italic_tags = {'i', 'em'}
|
italic_tags = {'i', 'em'}
|
||||||
@ -126,6 +133,28 @@ else:
|
|||||||
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
|
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
|
||||||
__str__ = __repr__
|
__str__ = __repr__
|
||||||
|
|
||||||
|
def check_spelling(text, tlen, fmt, locale, sfmt, store_locale):
|
||||||
|
split_ans = []
|
||||||
|
ppos = 0
|
||||||
|
r, a = dictionaries.recognized, split_ans.append
|
||||||
|
for start, length in split_into_words_and_positions(text, lang=locale.langcode):
|
||||||
|
if start > ppos:
|
||||||
|
a((start - ppos, fmt))
|
||||||
|
ppos = start + length
|
||||||
|
recognized = r(text[start:ppos], locale)
|
||||||
|
if recognized:
|
||||||
|
a((length, fmt))
|
||||||
|
else:
|
||||||
|
if store_locale:
|
||||||
|
s = QTextCharFormat(sfmt)
|
||||||
|
s.setProperty(SPELL_LOCALE_PROPERTY, locale)
|
||||||
|
a((length, s))
|
||||||
|
else:
|
||||||
|
a((length, sfmt))
|
||||||
|
if ppos < tlen:
|
||||||
|
a((tlen - ppos, fmt))
|
||||||
|
return split_ans
|
||||||
|
|
||||||
|
|
||||||
del _speedup
|
del _speedup
|
||||||
|
|
||||||
@ -237,28 +266,6 @@ def cdata(state, text, i, formats, user_data):
|
|||||||
add_tag_data(user_data, TagStart(m.start(), '', name, True, True))
|
add_tag_data(user_data, TagStart(m.start(), '', name, True, True))
|
||||||
return [(num, fmt), (2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])]
|
return [(num, fmt), (2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])]
|
||||||
|
|
||||||
def check_spelling(text, tpos, tlen, fmt, locale, sfmt):
|
|
||||||
split_ans = []
|
|
||||||
ppos = 0
|
|
||||||
sl = store_locale.enabled
|
|
||||||
for start, length in split_into_words_and_positions(text[tpos:tpos+tlen], lang=locale.langcode):
|
|
||||||
if start > ppos:
|
|
||||||
split_ans.append((start - ppos, fmt))
|
|
||||||
ppos = start + length
|
|
||||||
recognized = dictionaries.recognized(text[tpos + start:tpos + ppos], locale)
|
|
||||||
if recognized:
|
|
||||||
split_ans.append((length, fmt))
|
|
||||||
else:
|
|
||||||
if sl:
|
|
||||||
s = QTextCharFormat(sfmt)
|
|
||||||
s.setProperty(SPELL_LOCALE_PROPERTY, locale)
|
|
||||||
split_ans.append((length, s))
|
|
||||||
else:
|
|
||||||
split_ans.append((length, sfmt))
|
|
||||||
if ppos < tlen:
|
|
||||||
split_ans.append((tlen - ppos, fmt))
|
|
||||||
return split_ans
|
|
||||||
|
|
||||||
def process_text(state, text, nbsp_format, spell_format, user_data):
|
def process_text(state, text, nbsp_format, spell_format, user_data):
|
||||||
ans = []
|
ans = []
|
||||||
fmt = None
|
fmt = None
|
||||||
@ -289,7 +296,7 @@ def process_text(state, text, nbsp_format, spell_format, user_data):
|
|||||||
if fmt is nbsp_format:
|
if fmt is nbsp_format:
|
||||||
split_ans.append((tlen, fmt))
|
split_ans.append((tlen, fmt))
|
||||||
else:
|
else:
|
||||||
split_ans.extend(check_spelling(text, tpos, tlen, fmt, locale, sfmt))
|
split_ans.extend(check_spelling(text[tpos:tpos+tlen], tlen, fmt, locale, sfmt, store_locale.enabled))
|
||||||
|
|
||||||
tpos += tlen
|
tpos += tlen
|
||||||
ans = split_ans
|
ans = split_ans
|
||||||
|
Loading…
x
Reference in New Issue
Block a user