mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
30% speed up on html syntax highlighting by using a C implementation for the Tag and State classes
This commit is contained in:
parent
b2168dc176
commit
1b1f61bde6
@ -89,6 +89,10 @@ extensions = [
|
||||
['calibre/utils/speedup.c'],
|
||||
),
|
||||
|
||||
Extension('html',
|
||||
['calibre/gui2/tweak_book/editor/syntax/html.c'],
|
||||
),
|
||||
|
||||
Extension('tokenizer',
|
||||
['tinycss/tokenizer.c'],
|
||||
),
|
||||
|
@ -135,6 +135,7 @@ class Plugins(collections.Mapping):
|
||||
'chm_extra',
|
||||
'icu',
|
||||
'speedup',
|
||||
'html',
|
||||
'freetype',
|
||||
'woff',
|
||||
'unrar',
|
||||
|
401
src/calibre/gui2/tweak_book/editor/syntax/html.c
Normal file
401
src/calibre/gui2/tweak_book/editor/syntax/html.c
Normal file
@ -0,0 +1,401 @@
|
||||
/*
|
||||
* html.c
|
||||
* Copyright (C) 2014 Kovid Goyal <kovid at kovidgoyal.net>
|
||||
*
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#define UNICODE
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
|
||||
#define COMPARE(attr, op) (PyObject_RichCompareBool(a->attr, b->attr, op) == 1)
|
||||
static PyObject *bold_tags = NULL, *italic_tags = NULL, *zero = NULL;
|
||||
|
||||
// Tag type definition {{{
|
||||
|
||||
static PyTypeObject html_TagType;
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
// Type-specific fields go here.
|
||||
PyObject *name;
|
||||
PyObject *bold;
|
||||
PyObject *italic;
|
||||
PyObject *lang;
|
||||
|
||||
} html_Tag;
|
||||
|
||||
static void
|
||||
html_Tag_dealloc(html_Tag* self)
|
||||
{
|
||||
Py_XDECREF(self->name); self->name = NULL;
|
||||
Py_XDECREF(self->bold); self->bold = NULL;
|
||||
Py_XDECREF(self->italic); self->italic = NULL;
|
||||
Py_XDECREF(self->lang); self->lang = NULL;
|
||||
self->ob_type->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
html_Tag_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
html_Tag *self = NULL;
|
||||
self = (html_Tag *)type->tp_alloc(type, 0);
|
||||
if (self == NULL) return PyErr_NoMemory();
|
||||
|
||||
self->bold = NULL; self->italic = NULL; self->lang = NULL;
|
||||
if (!PyArg_ParseTuple(args, "O|OOO", &(self->name), &(self->bold), &(self->italic), &(self->lang))) {
|
||||
self->ob_type->tp_free((PyObject*)self); return NULL;
|
||||
}
|
||||
if (self->bold == NULL) {
|
||||
self->bold = (PySet_Contains(bold_tags, self->name)) ? Py_True : Py_False;
|
||||
}
|
||||
if (self->italic == NULL) {
|
||||
self->italic = (PySet_Contains(italic_tags, self->name)) ? Py_True : Py_False;
|
||||
}
|
||||
if (self->lang == NULL) self->lang = Py_None;
|
||||
Py_INCREF(self->name); Py_INCREF(self->bold); Py_INCREF(self->italic); Py_INCREF(self->lang);
|
||||
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
html_Tag_copy(html_Tag *self, PyObject *args, PyObject *kwargs) {
|
||||
return PyObject_CallFunctionObjArgs((PyObject *) &html_TagType, self->name, self->bold, self->italic, self->lang, NULL);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
html_Tag_compare(html_Tag *a, html_Tag *b, int op) {
|
||||
switch (op) {
|
||||
case Py_EQ:
|
||||
if (COMPARE(name, Py_EQ) && COMPARE(lang, Py_EQ)) Py_RETURN_TRUE;
|
||||
Py_RETURN_FALSE;
|
||||
case Py_NE:
|
||||
if (COMPARE(name, Py_NE) || COMPARE(lang, Py_NE)) Py_RETURN_TRUE;
|
||||
Py_RETURN_FALSE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
PyErr_SetString(PyExc_TypeError, "Only equals comparison is supported for Tag objects");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
html_Tag_repr(html_Tag *self) {
|
||||
PyObject *name = NULL, *bold = NULL, *italic = NULL, *lang = NULL, *ans = NULL;
|
||||
name = PyObject_Repr(self->name); bold = PyObject_Repr(self->bold); italic = PyObject_Repr(self->italic); lang = PyObject_Repr(self->lang);
|
||||
if (name && bold && italic && lang)
|
||||
ans = PyString_FromFormat("Tag(%s, bold=%s, italic=%s, lang=%s)", PyString_AS_STRING(name), PyString_AS_STRING(bold), PyString_AS_STRING(italic), PyString_AS_STRING(lang));
|
||||
Py_XDECREF(name); Py_XDECREF(bold); Py_XDECREF(italic); Py_XDECREF(lang);
|
||||
return ans;
|
||||
}
|
||||
|
||||
static PyMemberDef html_Tag_members[] = {
|
||||
{"name", T_OBJECT_EX, offsetof(html_Tag, name), 0, "Name of the tag in lowercase"},
|
||||
{"bold", T_OBJECT_EX, offsetof(html_Tag, bold), 0, "True iff tag is bold"},
|
||||
{"italic", T_OBJECT_EX, offsetof(html_Tag, italic), 0, "True iff tag is italic"},
|
||||
{"lang", T_OBJECT_EX, offsetof(html_Tag, lang), 0, "The language of this tag"},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyMethodDef html_Tag_methods[] = {
|
||||
{"copy", (PyCFunction)html_Tag_copy, METH_VARARGS,
|
||||
"copy() -> Return a copy of this Tag"
|
||||
},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyTypeObject html_TagType = { // {{{
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /*ob_size*/
|
||||
"html.Tag", /*tp_name*/
|
||||
sizeof(html_Tag), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)html_Tag_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
0, /*tp_compare*/
|
||||
(reprfunc)html_Tag_repr, /*tp_repr*/
|
||||
0, /*tp_as_number*/
|
||||
0, /*tp_as_sequence*/
|
||||
0, /*tp_as_mapping*/
|
||||
0, /*tp_hash */
|
||||
0, /*tp_call*/
|
||||
0, /*tp_str*/
|
||||
0, /*tp_getattro*/
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
"Token", /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
(richcmpfunc)html_Tag_compare, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
html_Tag_methods, /* tp_methods */
|
||||
html_Tag_members, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
html_Tag_new, /* tp_new */
|
||||
}; // }}}
|
||||
// }}}
|
||||
|
||||
// State type definition {{{
|
||||
|
||||
static PyTypeObject html_StateType;
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
// Type-specific fields go here.
|
||||
PyObject *tag_being_defined;
|
||||
PyObject *tags;
|
||||
PyObject *is_bold;
|
||||
PyObject *is_italic;
|
||||
PyObject *current_lang;
|
||||
PyObject *parse;
|
||||
PyObject *css_formats;
|
||||
PyObject *sub_parser_state;
|
||||
PyObject *default_lang;
|
||||
PyObject *attribute_name;
|
||||
|
||||
} html_State;
|
||||
|
||||
static void
|
||||
html_State_dealloc(html_State* self)
|
||||
{
|
||||
Py_XDECREF(self->tag_being_defined); self->tag_being_defined = NULL;
|
||||
Py_XDECREF(self->tags); self->tags = NULL;
|
||||
Py_XDECREF(self->is_bold); self->is_bold = NULL;
|
||||
Py_XDECREF(self->is_italic); self->is_italic = NULL;
|
||||
Py_XDECREF(self->current_lang); self->current_lang = NULL;
|
||||
Py_XDECREF(self->parse); self->parse = NULL;
|
||||
Py_XDECREF(self->css_formats); self->css_formats = NULL;
|
||||
Py_XDECREF(self->sub_parser_state); self->sub_parser_state = NULL;
|
||||
Py_XDECREF(self->default_lang); self->default_lang = NULL;
|
||||
Py_XDECREF(self->attribute_name);self->attribute_name = NULL;
|
||||
|
||||
self->ob_type->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
html_State_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
html_State *self = NULL;
|
||||
self = (html_State *)type->tp_alloc(type, 0);
|
||||
if (self == NULL) return PyErr_NoMemory();
|
||||
|
||||
self->tag_being_defined = NULL;
|
||||
self->tags = NULL;
|
||||
self->is_bold = NULL;
|
||||
self->is_italic = NULL;
|
||||
self->current_lang = NULL;
|
||||
self->parse = NULL;
|
||||
self->css_formats = NULL;
|
||||
self->sub_parser_state = NULL;
|
||||
self->default_lang = NULL;
|
||||
self->attribute_name = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "|OOOOOOOOOO",
|
||||
&(self->tag_being_defined),
|
||||
&(self->tags),
|
||||
&(self->is_bold),
|
||||
&(self->is_italic),
|
||||
&(self->current_lang),
|
||||
&(self->parse),
|
||||
&(self->css_formats),
|
||||
&(self->sub_parser_state),
|
||||
&(self->default_lang),
|
||||
&(self->attribute_name)))
|
||||
{
|
||||
self->ob_type->tp_free((PyObject*)self); return NULL;
|
||||
}
|
||||
|
||||
if (self->tag_being_defined == NULL) self->tag_being_defined = Py_None;
|
||||
if (self->tags == NULL) { self->tags = PyList_New(0); if (self->tags == NULL) return PyErr_NoMemory(); }
|
||||
if (self->is_bold == NULL) self->is_bold = Py_False;
|
||||
if (self->is_italic == NULL) self->is_italic = Py_False;
|
||||
if (self->current_lang == NULL) self->current_lang = Py_None;
|
||||
if (self->parse == NULL) self->parse = zero;
|
||||
if (self->css_formats == NULL) self->css_formats = Py_None;
|
||||
if (self->sub_parser_state == NULL) self->sub_parser_state = Py_None;
|
||||
if (self->default_lang == NULL) self->default_lang = Py_None;
|
||||
if (self->attribute_name == NULL) self->attribute_name = Py_None;
|
||||
|
||||
Py_INCREF(self->tag_being_defined);
|
||||
Py_INCREF(self->tags);
|
||||
Py_INCREF(self->is_bold);
|
||||
Py_INCREF(self->is_italic);
|
||||
Py_INCREF(self->current_lang);
|
||||
Py_INCREF(self->parse);
|
||||
Py_INCREF(self->css_formats);
|
||||
Py_INCREF(self->sub_parser_state);
|
||||
Py_INCREF(self->default_lang);
|
||||
Py_INCREF(self->attribute_name);
|
||||
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
html_State_copy(html_State *self, PyObject *args, PyObject *kwargs) {
|
||||
PyObject *ans = NULL, *tags = NULL, *tag_being_defined = NULL, *sub_parser_state = NULL;
|
||||
Py_ssize_t i = 0;
|
||||
|
||||
if (self->sub_parser_state == Py_None) {sub_parser_state = Py_None; Py_INCREF(sub_parser_state); }
|
||||
else sub_parser_state = PyObject_CallMethod(self->sub_parser_state, "copy", NULL);
|
||||
if (sub_parser_state == NULL) goto end;
|
||||
|
||||
if (self->tag_being_defined == Py_None) { tag_being_defined = Py_None; Py_INCREF(Py_None); }
|
||||
else tag_being_defined = html_Tag_copy((html_Tag*)self->tag_being_defined, NULL, NULL);
|
||||
if (tag_being_defined == NULL) goto end;
|
||||
|
||||
tags = PyList_New(PyList_GET_SIZE(self->tags));
|
||||
if (tags == NULL) { PyErr_NoMemory(); goto end; }
|
||||
for (i = 0; i < PyList_GET_SIZE(self->tags); i++) {
|
||||
PyList_SET_ITEM(tags, i, PyList_GET_ITEM(self->tags, i));
|
||||
Py_INCREF(PyList_GET_ITEM(self->tags, i));
|
||||
}
|
||||
|
||||
ans = PyObject_CallFunctionObjArgs((PyObject *) &html_StateType,
|
||||
tag_being_defined, tags, self->is_bold, self->is_italic, self->current_lang, self->parse, self->css_formats, sub_parser_state, self->default_lang, self->attribute_name, NULL);
|
||||
end:
|
||||
Py_XDECREF(tags); Py_XDECREF(tag_being_defined); Py_XDECREF(sub_parser_state);
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
html_State_compare(html_State *a, html_State *b, int op) {
|
||||
switch (op) {
|
||||
case Py_EQ:
|
||||
if (COMPARE(parse, Py_EQ) && COMPARE(sub_parser_state, Py_EQ) && COMPARE(tag_being_defined, Py_EQ) && COMPARE(attribute_name, Py_EQ) && COMPARE(tags, Py_EQ)) Py_RETURN_TRUE;
|
||||
Py_RETURN_FALSE;
|
||||
case Py_NE:
|
||||
if (COMPARE(parse, Py_NE) || COMPARE(sub_parser_state, Py_NE) || COMPARE(tag_being_defined, Py_NE) || COMPARE(attribute_name, Py_NE) || COMPARE(tags, Py_NE)) Py_RETURN_TRUE;
|
||||
Py_RETURN_FALSE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
PyErr_SetString(PyExc_TypeError, "Only equals comparison is supported for State objects");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
html_State_repr(html_State *self) {
|
||||
PyObject *bold = NULL, *italic = NULL, *lang = NULL, *ans = NULL;
|
||||
bold = PyObject_Repr(self->is_bold); italic = PyObject_Repr(self->is_italic); lang = PyObject_Repr(self->current_lang);
|
||||
if (bold && italic && lang)
|
||||
ans = PyString_FromFormat("State(bold=%s, italic=%s, lang=%s)", PyString_AS_STRING(bold), PyString_AS_STRING(italic), PyString_AS_STRING(lang));
|
||||
Py_XDECREF(bold); Py_XDECREF(italic); Py_XDECREF(lang);
|
||||
return ans;
|
||||
}
|
||||
|
||||
static PyMemberDef html_State_members[] = {
|
||||
{"tag_being_defined", T_OBJECT_EX, offsetof(html_State, tag_being_defined), 0, "xxx"},
|
||||
{"tags", T_OBJECT_EX, offsetof(html_State, tags), 0, "xxx"},
|
||||
{"is_bold", T_OBJECT_EX, offsetof(html_State, is_bold), 0, "xxx"},
|
||||
{"is_italic", T_OBJECT_EX, offsetof(html_State, is_italic), 0, "xxx"},
|
||||
{"current_lang", T_OBJECT_EX, offsetof(html_State, current_lang), 0, "xxx"},
|
||||
{"parse", T_OBJECT_EX, offsetof(html_State, parse), 0, "xxx"},
|
||||
{"css_formats", T_OBJECT_EX, offsetof(html_State, css_formats), 0, "xxx"},
|
||||
{"sub_parser_state", T_OBJECT_EX, offsetof(html_State, sub_parser_state), 0, "xxx"},
|
||||
{"default_lang", T_OBJECT_EX, offsetof(html_State, default_lang), 0, "xxx"},
|
||||
{"attribute_name", T_OBJECT_EX, offsetof(html_State, attribute_name), 0, "xxx"},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyMethodDef html_State_methods[] = {
|
||||
{"copy", (PyCFunction)html_State_copy, METH_VARARGS,
|
||||
"copy() -> Return a copy of this Tag"
|
||||
},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyTypeObject html_StateType = { // {{{
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /*ob_size*/
|
||||
"html.State", /*tp_name*/
|
||||
sizeof(html_State), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)html_State_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
0, /*tp_compare*/
|
||||
(reprfunc)html_State_repr, /*tp_repr*/
|
||||
0, /*tp_as_number*/
|
||||
0, /*tp_as_sequence*/
|
||||
0, /*tp_as_mapping*/
|
||||
0, /*tp_hash */
|
||||
0, /*tp_call*/
|
||||
0, /*tp_str*/
|
||||
0, /*tp_getattro*/
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
"Token", /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
(richcmpfunc)html_State_compare, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
html_State_methods, /* tp_methods */
|
||||
html_State_members, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
html_State_new, /* tp_new */
|
||||
}; // }}}
|
||||
// }}}
|
||||
static PyMethodDef html_methods[] = {
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
|
||||
PyMODINIT_FUNC
|
||||
inithtml(void) {
|
||||
PyObject *m, *temp;
|
||||
if (PyType_Ready(&html_TagType) < 0)
|
||||
return;
|
||||
if (PyType_Ready(&html_StateType) < 0)
|
||||
return;
|
||||
|
||||
temp = Py_BuildValue("ssssssss", "b", "strong", "h1", "h2", "h3", "h4", "h5", "h6", "h7");
|
||||
if (temp == NULL) return;
|
||||
bold_tags = PyFrozenSet_New(temp); Py_DECREF(temp);
|
||||
temp = Py_BuildValue("ss", "i", "em");
|
||||
if (temp == NULL) return;
|
||||
italic_tags = PyFrozenSet_New(temp); Py_DECREF(temp); temp = NULL;
|
||||
zero = PyInt_FromLong(0);
|
||||
if (bold_tags == NULL || italic_tags == NULL || zero == NULL) return;
|
||||
Py_INCREF(bold_tags); Py_INCREF(italic_tags);
|
||||
|
||||
m = Py_InitModule3("html", html_methods,
|
||||
"Speedups for the html syntax highlighter."
|
||||
);
|
||||
if (m == NULL) return;
|
||||
Py_INCREF(&html_TagType);
|
||||
Py_INCREF(&html_StateType);
|
||||
PyModule_AddObject(m, "Tag", (PyObject *)&html_TagType);
|
||||
PyModule_AddObject(m, "State", (PyObject *)&html_StateType);
|
||||
PyModule_AddObject(m, "bold_tags", bold_tags);
|
||||
PyModule_AddObject(m, "italic_tags", italic_tags);
|
||||
}
|
@ -24,8 +24,6 @@ from calibre.gui2.tweak_book.editor.syntax.css import (
|
||||
from html5lib.constants import cdataElements, rcdataElements
|
||||
|
||||
cdata_tags = cdataElements | rcdataElements
|
||||
bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)}
|
||||
italic_tags = {'i', 'em'}
|
||||
normal_pat = re.compile(r'[^<>&]+')
|
||||
entity_pat = re.compile(r'&#{0,1}[a-zA-Z0-9]{1,8};')
|
||||
tag_name_pat = re.compile(r'/{0,1}[a-zA-Z0-9:]+')
|
||||
@ -59,111 +57,121 @@ def refresh_spell_check_status():
|
||||
global do_spell_check
|
||||
do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries')
|
||||
|
||||
class Tag(object):
|
||||
from calibre.constants import plugins
|
||||
|
||||
__slots__ = ('name', 'bold', 'italic', 'lang')
|
||||
_speedup = plugins['html'][0]
|
||||
if _speedup is not None:
|
||||
Tag = _speedup.Tag
|
||||
bold_tags, italic_tags = _speedup.bold_tags, _speedup.italic_tags
|
||||
State = _speedup.State
|
||||
else:
|
||||
bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)}
|
||||
italic_tags = {'i', 'em'}
|
||||
|
||||
def __init__(self, name, bold=None, italic=None):
|
||||
self.name = name
|
||||
self.bold = name in bold_tags if bold is None else bold
|
||||
self.italic = name in italic_tags if italic is None else italic
|
||||
self.lang = None
|
||||
class Tag(object):
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.name == getattr(other, 'name', None) and self.lang == getattr(other, 'lang', False)
|
||||
__slots__ = ('name', 'bold', 'italic', 'lang')
|
||||
|
||||
def copy(self):
|
||||
ans = Tag(self.name, self.bold, self.italic)
|
||||
ans.lang = self.lang
|
||||
return ans
|
||||
def __init__(self, name, bold=None, italic=None, lang=None):
|
||||
self.name = name
|
||||
self.bold = name in bold_tags if bold is None else bold
|
||||
self.italic = name in italic_tags if italic is None else italic
|
||||
self.lang = lang
|
||||
|
||||
class State(object):
|
||||
def __eq__(self, other):
|
||||
return self.name == other.name and self.lang == other.lang
|
||||
|
||||
__slots__ = (
|
||||
'tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang',
|
||||
'parse', 'css_formats', 'sub_parser_state', 'default_lang', 'attribute_name',)
|
||||
def copy(self):
|
||||
ans = Tag(self.name, self.bold, self.italic, self.lang)
|
||||
return ans
|
||||
|
||||
def __init__(self):
|
||||
self.tags = []
|
||||
self.is_bold = self.is_italic = False
|
||||
self.tag_being_defined = self.current_lang = self.css_formats = \
|
||||
self.sub_parser_state = self.default_lang = self.attribute_name = None
|
||||
self.parse = NORMAL
|
||||
class State(object):
|
||||
|
||||
def copy(self):
|
||||
ans = State()
|
||||
for x in self.__slots__:
|
||||
setattr(ans, x, getattr(self, x))
|
||||
self.tags = [x.copy() for x in self.tags]
|
||||
if self.tag_being_defined is not None:
|
||||
self.tag_being_defined = self.tag_being_defined.copy()
|
||||
if self.sub_parser_state is not None:
|
||||
ans.sub_parser_state = self.sub_parser_state.copy()
|
||||
return ans
|
||||
__slots__ = (
|
||||
'tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang',
|
||||
'parse', 'css_formats', 'sub_parser_state', 'default_lang', 'attribute_name',)
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.parse == getattr(other, 'parse', -1) and
|
||||
self.sub_parser_state == getattr(other, 'sub_parser_state', -1) and
|
||||
self.tag_being_defined == getattr(other, 'tag_being_defined', False) and
|
||||
self.attribute_name == getattr(other, 'attribute_name', False) and
|
||||
self.tags == getattr(other, 'tags', None)
|
||||
)
|
||||
def __init__(self, tags=None):
|
||||
self.tags = []
|
||||
self.is_bold = self.is_italic = False
|
||||
self.tag_being_defined = self.current_lang = self.css_formats = \
|
||||
self.sub_parser_state = self.default_lang = self.attribute_name = None
|
||||
self.parse = NORMAL
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
def copy(self):
|
||||
ans = State()
|
||||
for x in self.__slots__:
|
||||
setattr(ans, x, getattr(self, x))
|
||||
self.tags = [x.copy() for x in self.tags]
|
||||
if self.tag_being_defined is not None:
|
||||
self.tag_being_defined = self.tag_being_defined.copy()
|
||||
if self.sub_parser_state is not None:
|
||||
ans.sub_parser_state = self.sub_parser_state.copy()
|
||||
return ans
|
||||
|
||||
def open_tag(self, name):
|
||||
self.tag_being_defined = Tag(name)
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.parse == other.parse and
|
||||
self.sub_parser_state == other.sub_parser_state and
|
||||
self.tag_being_defined == other.tag_being_defined and
|
||||
self.attribute_name == other.attribute_name and
|
||||
self.tags == other.tags
|
||||
)
|
||||
|
||||
def close_tag(self, name):
|
||||
removed_tags = []
|
||||
for tag in reversed(self.tags):
|
||||
removed_tags.append(tag)
|
||||
if tag.name == name:
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __repr__(self):
|
||||
return '<State %s is_bold=%s is_italic=%s current_lang=%s>' % (
|
||||
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
|
||||
__str__ = __repr__
|
||||
|
||||
|
||||
del _speedup
|
||||
|
||||
def finish_opening_tag(state, cdata_tags):
|
||||
state.parse = NORMAL
|
||||
if state.tag_being_defined is None:
|
||||
return
|
||||
t, state.tag_being_defined = state.tag_being_defined, None
|
||||
state.tags.append(t)
|
||||
state.is_bold = state.is_bold or t.bold
|
||||
state.is_italic = state.is_italic or t.italic
|
||||
state.current_lang = t.lang or state.current_lang
|
||||
if t.name in cdata_tags:
|
||||
state.parse = CSS if t.name == 'style' else CDATA
|
||||
state.sub_parser_state = None
|
||||
|
||||
def close_tag(state, name):
|
||||
removed_tags = []
|
||||
for tag in reversed(state.tags):
|
||||
removed_tags.append(tag)
|
||||
if tag.name == name:
|
||||
break
|
||||
else:
|
||||
return # No matching open tag found, ignore the closing tag
|
||||
# Remove all tags upto the matching open tag
|
||||
state.tags = state.tags[:-len(removed_tags)]
|
||||
state.sub_parser_state = None
|
||||
# Check if we should still be bold or italic
|
||||
if state.is_bold:
|
||||
state.is_bold = False
|
||||
for tag in reversed(state.tags):
|
||||
if tag.bold:
|
||||
state.is_bold = True
|
||||
break
|
||||
else:
|
||||
return # No matching open tag found, ignore the closing tag
|
||||
# Remove all tags upto the matching open tag
|
||||
self.tags = self.tags[:-len(removed_tags)]
|
||||
self.sub_parser_state = None
|
||||
# Check if we should still be bold or italic
|
||||
if self.is_bold:
|
||||
self.is_bold = False
|
||||
for tag in reversed(self.tags):
|
||||
if tag.bold:
|
||||
self.is_bold = True
|
||||
break
|
||||
if self.is_italic:
|
||||
self.is_italic = False
|
||||
for tag in reversed(self.tags):
|
||||
if tag.italic:
|
||||
self.is_italic = True
|
||||
break
|
||||
# Set the current language to the first lang attribute in a still open tag
|
||||
self.current_lang = None
|
||||
for tag in reversed(self.tags):
|
||||
if tag.lang is not None:
|
||||
self.current_lang = tag.lang
|
||||
if state.is_italic:
|
||||
state.is_italic = False
|
||||
for tag in reversed(state.tags):
|
||||
if tag.italic:
|
||||
state.is_italic = True
|
||||
break
|
||||
|
||||
def finish_opening_tag(self, cdata_tags):
|
||||
self.parse = NORMAL
|
||||
if self.tag_being_defined is None:
|
||||
return
|
||||
t, self.tag_being_defined = self.tag_being_defined, None
|
||||
self.tags.append(t)
|
||||
self.is_bold = self.is_bold or t.bold
|
||||
self.is_italic = self.is_italic or t.italic
|
||||
self.current_lang = t.lang or self.current_lang
|
||||
if t.name in cdata_tags:
|
||||
self.parse = CSS if t.name == 'style' else CDATA
|
||||
self.sub_parser_state = None
|
||||
|
||||
def __repr__(self):
|
||||
return '<State %s is_bold=%s is_italic=%s current_lang=%s>' % (
|
||||
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
|
||||
__str__ = __repr__
|
||||
# Set the current language to the first lang attribute in a still open tag
|
||||
state.current_lang = None
|
||||
for tag in reversed(state.tags):
|
||||
if tag.lang is not None:
|
||||
state.current_lang = tag.lang
|
||||
break
|
||||
|
||||
class HTMLUserData(QTextBlockUserData):
|
||||
|
||||
@ -324,7 +332,10 @@ def normal(state, text, i, formats, user_data):
|
||||
ans.append((len(name), formats['tag_name']))
|
||||
state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG
|
||||
add_tag_data(user_data, TagStart(i, prefix, name, closing, True))
|
||||
(state.close_tag if closing else state.open_tag)(name)
|
||||
if closing:
|
||||
close_tag(state, name)
|
||||
else:
|
||||
state.tag_being_defined = Tag(name)
|
||||
return ans
|
||||
|
||||
if ch == '&':
|
||||
@ -353,7 +364,7 @@ def opening_tag(cdata_tags, state, text, i, formats, user_data):
|
||||
add_tag_data(user_data, TagEnd(i + l - 1, True, False))
|
||||
return [(l, formats['tag'])]
|
||||
if ch == '>':
|
||||
state.finish_opening_tag(cdata_tags)
|
||||
finish_opening_tag(state, cdata_tags)
|
||||
add_tag_data(user_data, TagEnd(i, False, False))
|
||||
return [(1, formats['tag'])]
|
||||
m = attribute_name_pat.match(text, i)
|
||||
|
Loading…
x
Reference in New Issue
Block a user