mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
30% speed up on html syntax highlighting by using a C implementation for the Tag and State classes
This commit is contained in:
parent
b2168dc176
commit
1b1f61bde6
@ -89,6 +89,10 @@ extensions = [
|
|||||||
['calibre/utils/speedup.c'],
|
['calibre/utils/speedup.c'],
|
||||||
),
|
),
|
||||||
|
|
||||||
|
Extension('html',
|
||||||
|
['calibre/gui2/tweak_book/editor/syntax/html.c'],
|
||||||
|
),
|
||||||
|
|
||||||
Extension('tokenizer',
|
Extension('tokenizer',
|
||||||
['tinycss/tokenizer.c'],
|
['tinycss/tokenizer.c'],
|
||||||
),
|
),
|
||||||
|
@ -135,6 +135,7 @@ class Plugins(collections.Mapping):
|
|||||||
'chm_extra',
|
'chm_extra',
|
||||||
'icu',
|
'icu',
|
||||||
'speedup',
|
'speedup',
|
||||||
|
'html',
|
||||||
'freetype',
|
'freetype',
|
||||||
'woff',
|
'woff',
|
||||||
'unrar',
|
'unrar',
|
||||||
|
401
src/calibre/gui2/tweak_book/editor/syntax/html.c
Normal file
401
src/calibre/gui2/tweak_book/editor/syntax/html.c
Normal file
@ -0,0 +1,401 @@
|
|||||||
|
/*
|
||||||
|
* html.c
|
||||||
|
* Copyright (C) 2014 Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
*
|
||||||
|
* Distributed under terms of the GPL3 license.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define UNICODE
|
||||||
|
#define PY_SSIZE_T_CLEAN
|
||||||
|
#include <Python.h>
|
||||||
|
#include <structmember.h>
|
||||||
|
|
||||||
|
#define COMPARE(attr, op) (PyObject_RichCompareBool(a->attr, b->attr, op) == 1)
|
||||||
|
static PyObject *bold_tags = NULL, *italic_tags = NULL, *zero = NULL;
|
||||||
|
|
||||||
|
// Tag type definition {{{
|
||||||
|
|
||||||
|
static PyTypeObject html_TagType;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PyObject_HEAD
|
||||||
|
// Type-specific fields go here.
|
||||||
|
PyObject *name;
|
||||||
|
PyObject *bold;
|
||||||
|
PyObject *italic;
|
||||||
|
PyObject *lang;
|
||||||
|
|
||||||
|
} html_Tag;
|
||||||
|
|
||||||
|
static void
|
||||||
|
html_Tag_dealloc(html_Tag* self)
|
||||||
|
{
|
||||||
|
Py_XDECREF(self->name); self->name = NULL;
|
||||||
|
Py_XDECREF(self->bold); self->bold = NULL;
|
||||||
|
Py_XDECREF(self->italic); self->italic = NULL;
|
||||||
|
Py_XDECREF(self->lang); self->lang = NULL;
|
||||||
|
self->ob_type->tp_free((PyObject*)self);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
html_Tag_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
|
{
|
||||||
|
html_Tag *self = NULL;
|
||||||
|
self = (html_Tag *)type->tp_alloc(type, 0);
|
||||||
|
if (self == NULL) return PyErr_NoMemory();
|
||||||
|
|
||||||
|
self->bold = NULL; self->italic = NULL; self->lang = NULL;
|
||||||
|
if (!PyArg_ParseTuple(args, "O|OOO", &(self->name), &(self->bold), &(self->italic), &(self->lang))) {
|
||||||
|
self->ob_type->tp_free((PyObject*)self); return NULL;
|
||||||
|
}
|
||||||
|
if (self->bold == NULL) {
|
||||||
|
self->bold = (PySet_Contains(bold_tags, self->name)) ? Py_True : Py_False;
|
||||||
|
}
|
||||||
|
if (self->italic == NULL) {
|
||||||
|
self->italic = (PySet_Contains(italic_tags, self->name)) ? Py_True : Py_False;
|
||||||
|
}
|
||||||
|
if (self->lang == NULL) self->lang = Py_None;
|
||||||
|
Py_INCREF(self->name); Py_INCREF(self->bold); Py_INCREF(self->italic); Py_INCREF(self->lang);
|
||||||
|
|
||||||
|
return (PyObject *)self;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
html_Tag_copy(html_Tag *self, PyObject *args, PyObject *kwargs) {
|
||||||
|
return PyObject_CallFunctionObjArgs((PyObject *) &html_TagType, self->name, self->bold, self->italic, self->lang, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
html_Tag_compare(html_Tag *a, html_Tag *b, int op) {
|
||||||
|
switch (op) {
|
||||||
|
case Py_EQ:
|
||||||
|
if (COMPARE(name, Py_EQ) && COMPARE(lang, Py_EQ)) Py_RETURN_TRUE;
|
||||||
|
Py_RETURN_FALSE;
|
||||||
|
case Py_NE:
|
||||||
|
if (COMPARE(name, Py_NE) || COMPARE(lang, Py_NE)) Py_RETURN_TRUE;
|
||||||
|
Py_RETURN_FALSE;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
PyErr_SetString(PyExc_TypeError, "Only equals comparison is supported for Tag objects");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
html_Tag_repr(html_Tag *self) {
|
||||||
|
PyObject *name = NULL, *bold = NULL, *italic = NULL, *lang = NULL, *ans = NULL;
|
||||||
|
name = PyObject_Repr(self->name); bold = PyObject_Repr(self->bold); italic = PyObject_Repr(self->italic); lang = PyObject_Repr(self->lang);
|
||||||
|
if (name && bold && italic && lang)
|
||||||
|
ans = PyString_FromFormat("Tag(%s, bold=%s, italic=%s, lang=%s)", PyString_AS_STRING(name), PyString_AS_STRING(bold), PyString_AS_STRING(italic), PyString_AS_STRING(lang));
|
||||||
|
Py_XDECREF(name); Py_XDECREF(bold); Py_XDECREF(italic); Py_XDECREF(lang);
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyMemberDef html_Tag_members[] = {
|
||||||
|
{"name", T_OBJECT_EX, offsetof(html_Tag, name), 0, "Name of the tag in lowercase"},
|
||||||
|
{"bold", T_OBJECT_EX, offsetof(html_Tag, bold), 0, "True iff tag is bold"},
|
||||||
|
{"italic", T_OBJECT_EX, offsetof(html_Tag, italic), 0, "True iff tag is italic"},
|
||||||
|
{"lang", T_OBJECT_EX, offsetof(html_Tag, lang), 0, "The language of this tag"},
|
||||||
|
{NULL} /* Sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
static PyMethodDef html_Tag_methods[] = {
|
||||||
|
{"copy", (PyCFunction)html_Tag_copy, METH_VARARGS,
|
||||||
|
"copy() -> Return a copy of this Tag"
|
||||||
|
},
|
||||||
|
|
||||||
|
{NULL} /* Sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
static PyTypeObject html_TagType = { // {{{
|
||||||
|
PyObject_HEAD_INIT(NULL)
|
||||||
|
0, /*ob_size*/
|
||||||
|
"html.Tag", /*tp_name*/
|
||||||
|
sizeof(html_Tag), /*tp_basicsize*/
|
||||||
|
0, /*tp_itemsize*/
|
||||||
|
(destructor)html_Tag_dealloc, /*tp_dealloc*/
|
||||||
|
0, /*tp_print*/
|
||||||
|
0, /*tp_getattr*/
|
||||||
|
0, /*tp_setattr*/
|
||||||
|
0, /*tp_compare*/
|
||||||
|
(reprfunc)html_Tag_repr, /*tp_repr*/
|
||||||
|
0, /*tp_as_number*/
|
||||||
|
0, /*tp_as_sequence*/
|
||||||
|
0, /*tp_as_mapping*/
|
||||||
|
0, /*tp_hash */
|
||||||
|
0, /*tp_call*/
|
||||||
|
0, /*tp_str*/
|
||||||
|
0, /*tp_getattro*/
|
||||||
|
0, /*tp_setattro*/
|
||||||
|
0, /*tp_as_buffer*/
|
||||||
|
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||||
|
"Token", /* tp_doc */
|
||||||
|
0, /* tp_traverse */
|
||||||
|
0, /* tp_clear */
|
||||||
|
(richcmpfunc)html_Tag_compare, /* tp_richcompare */
|
||||||
|
0, /* tp_weaklistoffset */
|
||||||
|
0, /* tp_iter */
|
||||||
|
0, /* tp_iternext */
|
||||||
|
html_Tag_methods, /* tp_methods */
|
||||||
|
html_Tag_members, /* tp_members */
|
||||||
|
0, /* tp_getset */
|
||||||
|
0, /* tp_base */
|
||||||
|
0, /* tp_dict */
|
||||||
|
0, /* tp_descr_get */
|
||||||
|
0, /* tp_descr_set */
|
||||||
|
0, /* tp_dictoffset */
|
||||||
|
0, /* tp_init */
|
||||||
|
0, /* tp_alloc */
|
||||||
|
html_Tag_new, /* tp_new */
|
||||||
|
}; // }}}
|
||||||
|
// }}}
|
||||||
|
|
||||||
|
// State type definition {{{
|
||||||
|
|
||||||
|
static PyTypeObject html_StateType;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PyObject_HEAD
|
||||||
|
// Type-specific fields go here.
|
||||||
|
PyObject *tag_being_defined;
|
||||||
|
PyObject *tags;
|
||||||
|
PyObject *is_bold;
|
||||||
|
PyObject *is_italic;
|
||||||
|
PyObject *current_lang;
|
||||||
|
PyObject *parse;
|
||||||
|
PyObject *css_formats;
|
||||||
|
PyObject *sub_parser_state;
|
||||||
|
PyObject *default_lang;
|
||||||
|
PyObject *attribute_name;
|
||||||
|
|
||||||
|
} html_State;
|
||||||
|
|
||||||
|
static void
|
||||||
|
html_State_dealloc(html_State* self)
|
||||||
|
{
|
||||||
|
Py_XDECREF(self->tag_being_defined); self->tag_being_defined = NULL;
|
||||||
|
Py_XDECREF(self->tags); self->tags = NULL;
|
||||||
|
Py_XDECREF(self->is_bold); self->is_bold = NULL;
|
||||||
|
Py_XDECREF(self->is_italic); self->is_italic = NULL;
|
||||||
|
Py_XDECREF(self->current_lang); self->current_lang = NULL;
|
||||||
|
Py_XDECREF(self->parse); self->parse = NULL;
|
||||||
|
Py_XDECREF(self->css_formats); self->css_formats = NULL;
|
||||||
|
Py_XDECREF(self->sub_parser_state); self->sub_parser_state = NULL;
|
||||||
|
Py_XDECREF(self->default_lang); self->default_lang = NULL;
|
||||||
|
Py_XDECREF(self->attribute_name);self->attribute_name = NULL;
|
||||||
|
|
||||||
|
self->ob_type->tp_free((PyObject*)self);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
html_State_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
|
{
|
||||||
|
html_State *self = NULL;
|
||||||
|
self = (html_State *)type->tp_alloc(type, 0);
|
||||||
|
if (self == NULL) return PyErr_NoMemory();
|
||||||
|
|
||||||
|
self->tag_being_defined = NULL;
|
||||||
|
self->tags = NULL;
|
||||||
|
self->is_bold = NULL;
|
||||||
|
self->is_italic = NULL;
|
||||||
|
self->current_lang = NULL;
|
||||||
|
self->parse = NULL;
|
||||||
|
self->css_formats = NULL;
|
||||||
|
self->sub_parser_state = NULL;
|
||||||
|
self->default_lang = NULL;
|
||||||
|
self->attribute_name = NULL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "|OOOOOOOOOO",
|
||||||
|
&(self->tag_being_defined),
|
||||||
|
&(self->tags),
|
||||||
|
&(self->is_bold),
|
||||||
|
&(self->is_italic),
|
||||||
|
&(self->current_lang),
|
||||||
|
&(self->parse),
|
||||||
|
&(self->css_formats),
|
||||||
|
&(self->sub_parser_state),
|
||||||
|
&(self->default_lang),
|
||||||
|
&(self->attribute_name)))
|
||||||
|
{
|
||||||
|
self->ob_type->tp_free((PyObject*)self); return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self->tag_being_defined == NULL) self->tag_being_defined = Py_None;
|
||||||
|
if (self->tags == NULL) { self->tags = PyList_New(0); if (self->tags == NULL) return PyErr_NoMemory(); }
|
||||||
|
if (self->is_bold == NULL) self->is_bold = Py_False;
|
||||||
|
if (self->is_italic == NULL) self->is_italic = Py_False;
|
||||||
|
if (self->current_lang == NULL) self->current_lang = Py_None;
|
||||||
|
if (self->parse == NULL) self->parse = zero;
|
||||||
|
if (self->css_formats == NULL) self->css_formats = Py_None;
|
||||||
|
if (self->sub_parser_state == NULL) self->sub_parser_state = Py_None;
|
||||||
|
if (self->default_lang == NULL) self->default_lang = Py_None;
|
||||||
|
if (self->attribute_name == NULL) self->attribute_name = Py_None;
|
||||||
|
|
||||||
|
Py_INCREF(self->tag_being_defined);
|
||||||
|
Py_INCREF(self->tags);
|
||||||
|
Py_INCREF(self->is_bold);
|
||||||
|
Py_INCREF(self->is_italic);
|
||||||
|
Py_INCREF(self->current_lang);
|
||||||
|
Py_INCREF(self->parse);
|
||||||
|
Py_INCREF(self->css_formats);
|
||||||
|
Py_INCREF(self->sub_parser_state);
|
||||||
|
Py_INCREF(self->default_lang);
|
||||||
|
Py_INCREF(self->attribute_name);
|
||||||
|
|
||||||
|
return (PyObject *)self;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
html_State_copy(html_State *self, PyObject *args, PyObject *kwargs) {
|
||||||
|
PyObject *ans = NULL, *tags = NULL, *tag_being_defined = NULL, *sub_parser_state = NULL;
|
||||||
|
Py_ssize_t i = 0;
|
||||||
|
|
||||||
|
if (self->sub_parser_state == Py_None) {sub_parser_state = Py_None; Py_INCREF(sub_parser_state); }
|
||||||
|
else sub_parser_state = PyObject_CallMethod(self->sub_parser_state, "copy", NULL);
|
||||||
|
if (sub_parser_state == NULL) goto end;
|
||||||
|
|
||||||
|
if (self->tag_being_defined == Py_None) { tag_being_defined = Py_None; Py_INCREF(Py_None); }
|
||||||
|
else tag_being_defined = html_Tag_copy((html_Tag*)self->tag_being_defined, NULL, NULL);
|
||||||
|
if (tag_being_defined == NULL) goto end;
|
||||||
|
|
||||||
|
tags = PyList_New(PyList_GET_SIZE(self->tags));
|
||||||
|
if (tags == NULL) { PyErr_NoMemory(); goto end; }
|
||||||
|
for (i = 0; i < PyList_GET_SIZE(self->tags); i++) {
|
||||||
|
PyList_SET_ITEM(tags, i, PyList_GET_ITEM(self->tags, i));
|
||||||
|
Py_INCREF(PyList_GET_ITEM(self->tags, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
ans = PyObject_CallFunctionObjArgs((PyObject *) &html_StateType,
|
||||||
|
tag_being_defined, tags, self->is_bold, self->is_italic, self->current_lang, self->parse, self->css_formats, sub_parser_state, self->default_lang, self->attribute_name, NULL);
|
||||||
|
end:
|
||||||
|
Py_XDECREF(tags); Py_XDECREF(tag_being_defined); Py_XDECREF(sub_parser_state);
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
html_State_compare(html_State *a, html_State *b, int op) {
|
||||||
|
switch (op) {
|
||||||
|
case Py_EQ:
|
||||||
|
if (COMPARE(parse, Py_EQ) && COMPARE(sub_parser_state, Py_EQ) && COMPARE(tag_being_defined, Py_EQ) && COMPARE(attribute_name, Py_EQ) && COMPARE(tags, Py_EQ)) Py_RETURN_TRUE;
|
||||||
|
Py_RETURN_FALSE;
|
||||||
|
case Py_NE:
|
||||||
|
if (COMPARE(parse, Py_NE) || COMPARE(sub_parser_state, Py_NE) || COMPARE(tag_being_defined, Py_NE) || COMPARE(attribute_name, Py_NE) || COMPARE(tags, Py_NE)) Py_RETURN_TRUE;
|
||||||
|
Py_RETURN_FALSE;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
PyErr_SetString(PyExc_TypeError, "Only equals comparison is supported for State objects");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
html_State_repr(html_State *self) {
|
||||||
|
PyObject *bold = NULL, *italic = NULL, *lang = NULL, *ans = NULL;
|
||||||
|
bold = PyObject_Repr(self->is_bold); italic = PyObject_Repr(self->is_italic); lang = PyObject_Repr(self->current_lang);
|
||||||
|
if (bold && italic && lang)
|
||||||
|
ans = PyString_FromFormat("State(bold=%s, italic=%s, lang=%s)", PyString_AS_STRING(bold), PyString_AS_STRING(italic), PyString_AS_STRING(lang));
|
||||||
|
Py_XDECREF(bold); Py_XDECREF(italic); Py_XDECREF(lang);
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyMemberDef html_State_members[] = {
|
||||||
|
{"tag_being_defined", T_OBJECT_EX, offsetof(html_State, tag_being_defined), 0, "xxx"},
|
||||||
|
{"tags", T_OBJECT_EX, offsetof(html_State, tags), 0, "xxx"},
|
||||||
|
{"is_bold", T_OBJECT_EX, offsetof(html_State, is_bold), 0, "xxx"},
|
||||||
|
{"is_italic", T_OBJECT_EX, offsetof(html_State, is_italic), 0, "xxx"},
|
||||||
|
{"current_lang", T_OBJECT_EX, offsetof(html_State, current_lang), 0, "xxx"},
|
||||||
|
{"parse", T_OBJECT_EX, offsetof(html_State, parse), 0, "xxx"},
|
||||||
|
{"css_formats", T_OBJECT_EX, offsetof(html_State, css_formats), 0, "xxx"},
|
||||||
|
{"sub_parser_state", T_OBJECT_EX, offsetof(html_State, sub_parser_state), 0, "xxx"},
|
||||||
|
{"default_lang", T_OBJECT_EX, offsetof(html_State, default_lang), 0, "xxx"},
|
||||||
|
{"attribute_name", T_OBJECT_EX, offsetof(html_State, attribute_name), 0, "xxx"},
|
||||||
|
{NULL} /* Sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
static PyMethodDef html_State_methods[] = {
|
||||||
|
{"copy", (PyCFunction)html_State_copy, METH_VARARGS,
|
||||||
|
"copy() -> Return a copy of this Tag"
|
||||||
|
},
|
||||||
|
|
||||||
|
{NULL} /* Sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
static PyTypeObject html_StateType = { // {{{
|
||||||
|
PyObject_HEAD_INIT(NULL)
|
||||||
|
0, /*ob_size*/
|
||||||
|
"html.State", /*tp_name*/
|
||||||
|
sizeof(html_State), /*tp_basicsize*/
|
||||||
|
0, /*tp_itemsize*/
|
||||||
|
(destructor)html_State_dealloc, /*tp_dealloc*/
|
||||||
|
0, /*tp_print*/
|
||||||
|
0, /*tp_getattr*/
|
||||||
|
0, /*tp_setattr*/
|
||||||
|
0, /*tp_compare*/
|
||||||
|
(reprfunc)html_State_repr, /*tp_repr*/
|
||||||
|
0, /*tp_as_number*/
|
||||||
|
0, /*tp_as_sequence*/
|
||||||
|
0, /*tp_as_mapping*/
|
||||||
|
0, /*tp_hash */
|
||||||
|
0, /*tp_call*/
|
||||||
|
0, /*tp_str*/
|
||||||
|
0, /*tp_getattro*/
|
||||||
|
0, /*tp_setattro*/
|
||||||
|
0, /*tp_as_buffer*/
|
||||||
|
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||||
|
"Token", /* tp_doc */
|
||||||
|
0, /* tp_traverse */
|
||||||
|
0, /* tp_clear */
|
||||||
|
(richcmpfunc)html_State_compare, /* tp_richcompare */
|
||||||
|
0, /* tp_weaklistoffset */
|
||||||
|
0, /* tp_iter */
|
||||||
|
0, /* tp_iternext */
|
||||||
|
html_State_methods, /* tp_methods */
|
||||||
|
html_State_members, /* tp_members */
|
||||||
|
0, /* tp_getset */
|
||||||
|
0, /* tp_base */
|
||||||
|
0, /* tp_dict */
|
||||||
|
0, /* tp_descr_get */
|
||||||
|
0, /* tp_descr_set */
|
||||||
|
0, /* tp_dictoffset */
|
||||||
|
0, /* tp_init */
|
||||||
|
0, /* tp_alloc */
|
||||||
|
html_State_new, /* tp_new */
|
||||||
|
}; // }}}
|
||||||
|
// }}}
|
||||||
|
static PyMethodDef html_methods[] = {
|
||||||
|
{NULL, NULL, 0, NULL}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
PyMODINIT_FUNC
|
||||||
|
inithtml(void) {
|
||||||
|
PyObject *m, *temp;
|
||||||
|
if (PyType_Ready(&html_TagType) < 0)
|
||||||
|
return;
|
||||||
|
if (PyType_Ready(&html_StateType) < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
temp = Py_BuildValue("ssssssss", "b", "strong", "h1", "h2", "h3", "h4", "h5", "h6", "h7");
|
||||||
|
if (temp == NULL) return;
|
||||||
|
bold_tags = PyFrozenSet_New(temp); Py_DECREF(temp);
|
||||||
|
temp = Py_BuildValue("ss", "i", "em");
|
||||||
|
if (temp == NULL) return;
|
||||||
|
italic_tags = PyFrozenSet_New(temp); Py_DECREF(temp); temp = NULL;
|
||||||
|
zero = PyInt_FromLong(0);
|
||||||
|
if (bold_tags == NULL || italic_tags == NULL || zero == NULL) return;
|
||||||
|
Py_INCREF(bold_tags); Py_INCREF(italic_tags);
|
||||||
|
|
||||||
|
m = Py_InitModule3("html", html_methods,
|
||||||
|
"Speedups for the html syntax highlighter."
|
||||||
|
);
|
||||||
|
if (m == NULL) return;
|
||||||
|
Py_INCREF(&html_TagType);
|
||||||
|
Py_INCREF(&html_StateType);
|
||||||
|
PyModule_AddObject(m, "Tag", (PyObject *)&html_TagType);
|
||||||
|
PyModule_AddObject(m, "State", (PyObject *)&html_StateType);
|
||||||
|
PyModule_AddObject(m, "bold_tags", bold_tags);
|
||||||
|
PyModule_AddObject(m, "italic_tags", italic_tags);
|
||||||
|
}
|
@ -24,8 +24,6 @@ from calibre.gui2.tweak_book.editor.syntax.css import (
|
|||||||
from html5lib.constants import cdataElements, rcdataElements
|
from html5lib.constants import cdataElements, rcdataElements
|
||||||
|
|
||||||
cdata_tags = cdataElements | rcdataElements
|
cdata_tags = cdataElements | rcdataElements
|
||||||
bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)}
|
|
||||||
italic_tags = {'i', 'em'}
|
|
||||||
normal_pat = re.compile(r'[^<>&]+')
|
normal_pat = re.compile(r'[^<>&]+')
|
||||||
entity_pat = re.compile(r'&#{0,1}[a-zA-Z0-9]{1,8};')
|
entity_pat = re.compile(r'&#{0,1}[a-zA-Z0-9]{1,8};')
|
||||||
tag_name_pat = re.compile(r'/{0,1}[a-zA-Z0-9:]+')
|
tag_name_pat = re.compile(r'/{0,1}[a-zA-Z0-9:]+')
|
||||||
@ -59,22 +57,32 @@ def refresh_spell_check_status():
|
|||||||
global do_spell_check
|
global do_spell_check
|
||||||
do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries')
|
do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries')
|
||||||
|
|
||||||
|
from calibre.constants import plugins
|
||||||
|
|
||||||
|
_speedup = plugins['html'][0]
|
||||||
|
if _speedup is not None:
|
||||||
|
Tag = _speedup.Tag
|
||||||
|
bold_tags, italic_tags = _speedup.bold_tags, _speedup.italic_tags
|
||||||
|
State = _speedup.State
|
||||||
|
else:
|
||||||
|
bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)}
|
||||||
|
italic_tags = {'i', 'em'}
|
||||||
|
|
||||||
class Tag(object):
|
class Tag(object):
|
||||||
|
|
||||||
__slots__ = ('name', 'bold', 'italic', 'lang')
|
__slots__ = ('name', 'bold', 'italic', 'lang')
|
||||||
|
|
||||||
def __init__(self, name, bold=None, italic=None):
|
def __init__(self, name, bold=None, italic=None, lang=None):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.bold = name in bold_tags if bold is None else bold
|
self.bold = name in bold_tags if bold is None else bold
|
||||||
self.italic = name in italic_tags if italic is None else italic
|
self.italic = name in italic_tags if italic is None else italic
|
||||||
self.lang = None
|
self.lang = lang
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return self.name == getattr(other, 'name', None) and self.lang == getattr(other, 'lang', False)
|
return self.name == other.name and self.lang == other.lang
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
ans = Tag(self.name, self.bold, self.italic)
|
ans = Tag(self.name, self.bold, self.italic, self.lang)
|
||||||
ans.lang = self.lang
|
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
class State(object):
|
class State(object):
|
||||||
@ -83,7 +91,7 @@ class State(object):
|
|||||||
'tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang',
|
'tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang',
|
||||||
'parse', 'css_formats', 'sub_parser_state', 'default_lang', 'attribute_name',)
|
'parse', 'css_formats', 'sub_parser_state', 'default_lang', 'attribute_name',)
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, tags=None):
|
||||||
self.tags = []
|
self.tags = []
|
||||||
self.is_bold = self.is_italic = False
|
self.is_bold = self.is_italic = False
|
||||||
self.tag_being_defined = self.current_lang = self.css_formats = \
|
self.tag_being_defined = self.current_lang = self.css_formats = \
|
||||||
@ -103,68 +111,68 @@ class State(object):
|
|||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return (
|
return (
|
||||||
self.parse == getattr(other, 'parse', -1) and
|
self.parse == other.parse and
|
||||||
self.sub_parser_state == getattr(other, 'sub_parser_state', -1) and
|
self.sub_parser_state == other.sub_parser_state and
|
||||||
self.tag_being_defined == getattr(other, 'tag_being_defined', False) and
|
self.tag_being_defined == other.tag_being_defined and
|
||||||
self.attribute_name == getattr(other, 'attribute_name', False) and
|
self.attribute_name == other.attribute_name and
|
||||||
self.tags == getattr(other, 'tags', None)
|
self.tags == other.tags
|
||||||
)
|
)
|
||||||
|
|
||||||
def __ne__(self, other):
|
def __ne__(self, other):
|
||||||
return not self.__eq__(other)
|
return not self.__eq__(other)
|
||||||
|
|
||||||
def open_tag(self, name):
|
def __repr__(self):
|
||||||
self.tag_being_defined = Tag(name)
|
return '<State %s is_bold=%s is_italic=%s current_lang=%s>' % (
|
||||||
|
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
|
||||||
|
__str__ = __repr__
|
||||||
|
|
||||||
def close_tag(self, name):
|
|
||||||
|
del _speedup
|
||||||
|
|
||||||
|
def finish_opening_tag(state, cdata_tags):
|
||||||
|
state.parse = NORMAL
|
||||||
|
if state.tag_being_defined is None:
|
||||||
|
return
|
||||||
|
t, state.tag_being_defined = state.tag_being_defined, None
|
||||||
|
state.tags.append(t)
|
||||||
|
state.is_bold = state.is_bold or t.bold
|
||||||
|
state.is_italic = state.is_italic or t.italic
|
||||||
|
state.current_lang = t.lang or state.current_lang
|
||||||
|
if t.name in cdata_tags:
|
||||||
|
state.parse = CSS if t.name == 'style' else CDATA
|
||||||
|
state.sub_parser_state = None
|
||||||
|
|
||||||
|
def close_tag(state, name):
|
||||||
removed_tags = []
|
removed_tags = []
|
||||||
for tag in reversed(self.tags):
|
for tag in reversed(state.tags):
|
||||||
removed_tags.append(tag)
|
removed_tags.append(tag)
|
||||||
if tag.name == name:
|
if tag.name == name:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
return # No matching open tag found, ignore the closing tag
|
return # No matching open tag found, ignore the closing tag
|
||||||
# Remove all tags upto the matching open tag
|
# Remove all tags upto the matching open tag
|
||||||
self.tags = self.tags[:-len(removed_tags)]
|
state.tags = state.tags[:-len(removed_tags)]
|
||||||
self.sub_parser_state = None
|
state.sub_parser_state = None
|
||||||
# Check if we should still be bold or italic
|
# Check if we should still be bold or italic
|
||||||
if self.is_bold:
|
if state.is_bold:
|
||||||
self.is_bold = False
|
state.is_bold = False
|
||||||
for tag in reversed(self.tags):
|
for tag in reversed(state.tags):
|
||||||
if tag.bold:
|
if tag.bold:
|
||||||
self.is_bold = True
|
state.is_bold = True
|
||||||
break
|
break
|
||||||
if self.is_italic:
|
if state.is_italic:
|
||||||
self.is_italic = False
|
state.is_italic = False
|
||||||
for tag in reversed(self.tags):
|
for tag in reversed(state.tags):
|
||||||
if tag.italic:
|
if tag.italic:
|
||||||
self.is_italic = True
|
state.is_italic = True
|
||||||
break
|
break
|
||||||
# Set the current language to the first lang attribute in a still open tag
|
# Set the current language to the first lang attribute in a still open tag
|
||||||
self.current_lang = None
|
state.current_lang = None
|
||||||
for tag in reversed(self.tags):
|
for tag in reversed(state.tags):
|
||||||
if tag.lang is not None:
|
if tag.lang is not None:
|
||||||
self.current_lang = tag.lang
|
state.current_lang = tag.lang
|
||||||
break
|
break
|
||||||
|
|
||||||
def finish_opening_tag(self, cdata_tags):
|
|
||||||
self.parse = NORMAL
|
|
||||||
if self.tag_being_defined is None:
|
|
||||||
return
|
|
||||||
t, self.tag_being_defined = self.tag_being_defined, None
|
|
||||||
self.tags.append(t)
|
|
||||||
self.is_bold = self.is_bold or t.bold
|
|
||||||
self.is_italic = self.is_italic or t.italic
|
|
||||||
self.current_lang = t.lang or self.current_lang
|
|
||||||
if t.name in cdata_tags:
|
|
||||||
self.parse = CSS if t.name == 'style' else CDATA
|
|
||||||
self.sub_parser_state = None
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '<State %s is_bold=%s is_italic=%s current_lang=%s>' % (
|
|
||||||
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
|
|
||||||
__str__ = __repr__
|
|
||||||
|
|
||||||
class HTMLUserData(QTextBlockUserData):
|
class HTMLUserData(QTextBlockUserData):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -324,7 +332,10 @@ def normal(state, text, i, formats, user_data):
|
|||||||
ans.append((len(name), formats['tag_name']))
|
ans.append((len(name), formats['tag_name']))
|
||||||
state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG
|
state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG
|
||||||
add_tag_data(user_data, TagStart(i, prefix, name, closing, True))
|
add_tag_data(user_data, TagStart(i, prefix, name, closing, True))
|
||||||
(state.close_tag if closing else state.open_tag)(name)
|
if closing:
|
||||||
|
close_tag(state, name)
|
||||||
|
else:
|
||||||
|
state.tag_being_defined = Tag(name)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
if ch == '&':
|
if ch == '&':
|
||||||
@ -353,7 +364,7 @@ def opening_tag(cdata_tags, state, text, i, formats, user_data):
|
|||||||
add_tag_data(user_data, TagEnd(i + l - 1, True, False))
|
add_tag_data(user_data, TagEnd(i + l - 1, True, False))
|
||||||
return [(l, formats['tag'])]
|
return [(l, formats['tag'])]
|
||||||
if ch == '>':
|
if ch == '>':
|
||||||
state.finish_opening_tag(cdata_tags)
|
finish_opening_tag(state, cdata_tags)
|
||||||
add_tag_data(user_data, TagEnd(i, False, False))
|
add_tag_data(user_data, TagEnd(i, False, False))
|
||||||
return [(1, formats['tag'])]
|
return [(1, formats['tag'])]
|
||||||
m = attribute_name_pat.match(text, i)
|
m = attribute_name_pat.match(text, i)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user