From 1b1f61bde605bdd0c31688fd674b8aa77dbb2504 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Jun 2014 21:11:32 +0530 Subject: [PATCH] 30% speed up on html syntax highlighting by using a C implementation for the Tag and State classes --- setup/extensions.py | 4 + src/calibre/constants.py | 1 + .../gui2/tweak_book/editor/syntax/html.c | 401 ++++++++++++++++++ .../gui2/tweak_book/editor/syntax/html.py | 201 ++++----- 4 files changed, 512 insertions(+), 95 deletions(-) create mode 100644 src/calibre/gui2/tweak_book/editor/syntax/html.c diff --git a/setup/extensions.py b/setup/extensions.py index 43c13d3aa1..a15e8aea20 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -89,6 +89,10 @@ extensions = [ ['calibre/utils/speedup.c'], ), + Extension('html', + ['calibre/gui2/tweak_book/editor/syntax/html.c'], + ), + Extension('tokenizer', ['tinycss/tokenizer.c'], ), diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 5b4cf7a155..b66bbf9ab0 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -135,6 +135,7 @@ class Plugins(collections.Mapping): 'chm_extra', 'icu', 'speedup', + 'html', 'freetype', 'woff', 'unrar', diff --git a/src/calibre/gui2/tweak_book/editor/syntax/html.c b/src/calibre/gui2/tweak_book/editor/syntax/html.c new file mode 100644 index 0000000000..b2d101566d --- /dev/null +++ b/src/calibre/gui2/tweak_book/editor/syntax/html.c @@ -0,0 +1,401 @@ +/* + * html.c + * Copyright (C) 2014 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#define UNICODE +#define PY_SSIZE_T_CLEAN +#include +#include + +#define COMPARE(attr, op) (PyObject_RichCompareBool(a->attr, b->attr, op) == 1) +static PyObject *bold_tags = NULL, *italic_tags = NULL, *zero = NULL; + +// Tag type definition {{{ + +static PyTypeObject html_TagType; + +typedef struct { + PyObject_HEAD + // Type-specific fields go here. + PyObject *name; + PyObject *bold; + PyObject *italic; + PyObject *lang; + +} html_Tag; + +static void +html_Tag_dealloc(html_Tag* self) +{ + Py_XDECREF(self->name); self->name = NULL; + Py_XDECREF(self->bold); self->bold = NULL; + Py_XDECREF(self->italic); self->italic = NULL; + Py_XDECREF(self->lang); self->lang = NULL; + self->ob_type->tp_free((PyObject*)self); +} + + +static PyObject * +html_Tag_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + html_Tag *self = NULL; + self = (html_Tag *)type->tp_alloc(type, 0); + if (self == NULL) return PyErr_NoMemory(); + + self->bold = NULL; self->italic = NULL; self->lang = NULL; + if (!PyArg_ParseTuple(args, "O|OOO", &(self->name), &(self->bold), &(self->italic), &(self->lang))) { + self->ob_type->tp_free((PyObject*)self); return NULL; + } + if (self->bold == NULL) { + self->bold = (PySet_Contains(bold_tags, self->name)) ? Py_True : Py_False; + } + if (self->italic == NULL) { + self->italic = (PySet_Contains(italic_tags, self->name)) ? Py_True : Py_False; + } + if (self->lang == NULL) self->lang = Py_None; + Py_INCREF(self->name); Py_INCREF(self->bold); Py_INCREF(self->italic); Py_INCREF(self->lang); + + return (PyObject *)self; +} + +static PyObject * +html_Tag_copy(html_Tag *self, PyObject *args, PyObject *kwargs) { + return PyObject_CallFunctionObjArgs((PyObject *) &html_TagType, self->name, self->bold, self->italic, self->lang, NULL); +} + +static PyObject * +html_Tag_compare(html_Tag *a, html_Tag *b, int op) { + switch (op) { + case Py_EQ: + if (COMPARE(name, Py_EQ) && COMPARE(lang, Py_EQ)) Py_RETURN_TRUE; + Py_RETURN_FALSE; + case Py_NE: + if (COMPARE(name, Py_NE) || COMPARE(lang, Py_NE)) Py_RETURN_TRUE; + Py_RETURN_FALSE; + default: + break; + } + PyErr_SetString(PyExc_TypeError, "Only equals comparison is supported for Tag objects"); + return NULL; +} + +static PyObject * +html_Tag_repr(html_Tag *self) { + PyObject *name = NULL, *bold = NULL, *italic = NULL, *lang = NULL, *ans = NULL; + name = PyObject_Repr(self->name); bold = PyObject_Repr(self->bold); italic = PyObject_Repr(self->italic); lang = PyObject_Repr(self->lang); + if (name && bold && italic && lang) + ans = PyString_FromFormat("Tag(%s, bold=%s, italic=%s, lang=%s)", PyString_AS_STRING(name), PyString_AS_STRING(bold), PyString_AS_STRING(italic), PyString_AS_STRING(lang)); + Py_XDECREF(name); Py_XDECREF(bold); Py_XDECREF(italic); Py_XDECREF(lang); + return ans; +} + +static PyMemberDef html_Tag_members[] = { + {"name", T_OBJECT_EX, offsetof(html_Tag, name), 0, "Name of the tag in lowercase"}, + {"bold", T_OBJECT_EX, offsetof(html_Tag, bold), 0, "True iff tag is bold"}, + {"italic", T_OBJECT_EX, offsetof(html_Tag, italic), 0, "True iff tag is italic"}, + {"lang", T_OBJECT_EX, offsetof(html_Tag, lang), 0, "The language of this tag"}, + {NULL} /* Sentinel */ +}; + +static PyMethodDef html_Tag_methods[] = { + {"copy", (PyCFunction)html_Tag_copy, METH_VARARGS, + "copy() -> Return a copy of this Tag" + }, + + {NULL} /* Sentinel */ +}; + +static PyTypeObject html_TagType = { // {{{ + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "html.Tag", /*tp_name*/ + sizeof(html_Tag), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)html_Tag_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + (reprfunc)html_Tag_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "Token", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + (richcmpfunc)html_Tag_compare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + html_Tag_methods, /* tp_methods */ + html_Tag_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + html_Tag_new, /* tp_new */ +}; // }}} +// }}} + +// State type definition {{{ + +static PyTypeObject html_StateType; + +typedef struct { + PyObject_HEAD + // Type-specific fields go here. + PyObject *tag_being_defined; + PyObject *tags; + PyObject *is_bold; + PyObject *is_italic; + PyObject *current_lang; + PyObject *parse; + PyObject *css_formats; + PyObject *sub_parser_state; + PyObject *default_lang; + PyObject *attribute_name; + +} html_State; + +static void +html_State_dealloc(html_State* self) +{ + Py_XDECREF(self->tag_being_defined); self->tag_being_defined = NULL; + Py_XDECREF(self->tags); self->tags = NULL; + Py_XDECREF(self->is_bold); self->is_bold = NULL; + Py_XDECREF(self->is_italic); self->is_italic = NULL; + Py_XDECREF(self->current_lang); self->current_lang = NULL; + Py_XDECREF(self->parse); self->parse = NULL; + Py_XDECREF(self->css_formats); self->css_formats = NULL; + Py_XDECREF(self->sub_parser_state); self->sub_parser_state = NULL; + Py_XDECREF(self->default_lang); self->default_lang = NULL; + Py_XDECREF(self->attribute_name);self->attribute_name = NULL; + + self->ob_type->tp_free((PyObject*)self); +} + + +static PyObject * +html_State_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + html_State *self = NULL; + self = (html_State *)type->tp_alloc(type, 0); + if (self == NULL) return PyErr_NoMemory(); + + self->tag_being_defined = NULL; + self->tags = NULL; + self->is_bold = NULL; + self->is_italic = NULL; + self->current_lang = NULL; + self->parse = NULL; + self->css_formats = NULL; + self->sub_parser_state = NULL; + self->default_lang = NULL; + self->attribute_name = NULL; + + if (!PyArg_ParseTuple(args, "|OOOOOOOOOO", + &(self->tag_being_defined), + &(self->tags), + &(self->is_bold), + &(self->is_italic), + &(self->current_lang), + &(self->parse), + &(self->css_formats), + &(self->sub_parser_state), + &(self->default_lang), + &(self->attribute_name))) + { + self->ob_type->tp_free((PyObject*)self); return NULL; + } + + if (self->tag_being_defined == NULL) self->tag_being_defined = Py_None; + if (self->tags == NULL) { self->tags = PyList_New(0); if (self->tags == NULL) return PyErr_NoMemory(); } + if (self->is_bold == NULL) self->is_bold = Py_False; + if (self->is_italic == NULL) self->is_italic = Py_False; + if (self->current_lang == NULL) self->current_lang = Py_None; + if (self->parse == NULL) self->parse = zero; + if (self->css_formats == NULL) self->css_formats = Py_None; + if (self->sub_parser_state == NULL) self->sub_parser_state = Py_None; + if (self->default_lang == NULL) self->default_lang = Py_None; + if (self->attribute_name == NULL) self->attribute_name = Py_None; + + Py_INCREF(self->tag_being_defined); + Py_INCREF(self->tags); + Py_INCREF(self->is_bold); + Py_INCREF(self->is_italic); + Py_INCREF(self->current_lang); + Py_INCREF(self->parse); + Py_INCREF(self->css_formats); + Py_INCREF(self->sub_parser_state); + Py_INCREF(self->default_lang); + Py_INCREF(self->attribute_name); + + return (PyObject *)self; +} + +static PyObject * +html_State_copy(html_State *self, PyObject *args, PyObject *kwargs) { + PyObject *ans = NULL, *tags = NULL, *tag_being_defined = NULL, *sub_parser_state = NULL; + Py_ssize_t i = 0; + + if (self->sub_parser_state == Py_None) {sub_parser_state = Py_None; Py_INCREF(sub_parser_state); } + else sub_parser_state = PyObject_CallMethod(self->sub_parser_state, "copy", NULL); + if (sub_parser_state == NULL) goto end; + + if (self->tag_being_defined == Py_None) { tag_being_defined = Py_None; Py_INCREF(Py_None); } + else tag_being_defined = html_Tag_copy((html_Tag*)self->tag_being_defined, NULL, NULL); + if (tag_being_defined == NULL) goto end; + + tags = PyList_New(PyList_GET_SIZE(self->tags)); + if (tags == NULL) { PyErr_NoMemory(); goto end; } + for (i = 0; i < PyList_GET_SIZE(self->tags); i++) { + PyList_SET_ITEM(tags, i, PyList_GET_ITEM(self->tags, i)); + Py_INCREF(PyList_GET_ITEM(self->tags, i)); + } + + ans = PyObject_CallFunctionObjArgs((PyObject *) &html_StateType, + tag_being_defined, tags, self->is_bold, self->is_italic, self->current_lang, self->parse, self->css_formats, sub_parser_state, self->default_lang, self->attribute_name, NULL); +end: + Py_XDECREF(tags); Py_XDECREF(tag_being_defined); Py_XDECREF(sub_parser_state); + return ans; +} + + +static PyObject * +html_State_compare(html_State *a, html_State *b, int op) { + switch (op) { + case Py_EQ: + if (COMPARE(parse, Py_EQ) && COMPARE(sub_parser_state, Py_EQ) && COMPARE(tag_being_defined, Py_EQ) && COMPARE(attribute_name, Py_EQ) && COMPARE(tags, Py_EQ)) Py_RETURN_TRUE; + Py_RETURN_FALSE; + case Py_NE: + if (COMPARE(parse, Py_NE) || COMPARE(sub_parser_state, Py_NE) || COMPARE(tag_being_defined, Py_NE) || COMPARE(attribute_name, Py_NE) || COMPARE(tags, Py_NE)) Py_RETURN_TRUE; + Py_RETURN_FALSE; + default: + break; + } + PyErr_SetString(PyExc_TypeError, "Only equals comparison is supported for State objects"); + return NULL; +} + +static PyObject * +html_State_repr(html_State *self) { + PyObject *bold = NULL, *italic = NULL, *lang = NULL, *ans = NULL; + bold = PyObject_Repr(self->is_bold); italic = PyObject_Repr(self->is_italic); lang = PyObject_Repr(self->current_lang); + if (bold && italic && lang) + ans = PyString_FromFormat("State(bold=%s, italic=%s, lang=%s)", PyString_AS_STRING(bold), PyString_AS_STRING(italic), PyString_AS_STRING(lang)); + Py_XDECREF(bold); Py_XDECREF(italic); Py_XDECREF(lang); + return ans; +} + +static PyMemberDef html_State_members[] = { + {"tag_being_defined", T_OBJECT_EX, offsetof(html_State, tag_being_defined), 0, "xxx"}, + {"tags", T_OBJECT_EX, offsetof(html_State, tags), 0, "xxx"}, + {"is_bold", T_OBJECT_EX, offsetof(html_State, is_bold), 0, "xxx"}, + {"is_italic", T_OBJECT_EX, offsetof(html_State, is_italic), 0, "xxx"}, + {"current_lang", T_OBJECT_EX, offsetof(html_State, current_lang), 0, "xxx"}, + {"parse", T_OBJECT_EX, offsetof(html_State, parse), 0, "xxx"}, + {"css_formats", T_OBJECT_EX, offsetof(html_State, css_formats), 0, "xxx"}, + {"sub_parser_state", T_OBJECT_EX, offsetof(html_State, sub_parser_state), 0, "xxx"}, + {"default_lang", T_OBJECT_EX, offsetof(html_State, default_lang), 0, "xxx"}, + {"attribute_name", T_OBJECT_EX, offsetof(html_State, attribute_name), 0, "xxx"}, + {NULL} /* Sentinel */ +}; + +static PyMethodDef html_State_methods[] = { + {"copy", (PyCFunction)html_State_copy, METH_VARARGS, + "copy() -> Return a copy of this Tag" + }, + + {NULL} /* Sentinel */ +}; + +static PyTypeObject html_StateType = { // {{{ + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "html.State", /*tp_name*/ + sizeof(html_State), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)html_State_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + (reprfunc)html_State_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "Token", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + (richcmpfunc)html_State_compare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + html_State_methods, /* tp_methods */ + html_State_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + html_State_new, /* tp_new */ +}; // }}} +// }}} +static PyMethodDef html_methods[] = { + {NULL, NULL, 0, NULL} +}; + + +PyMODINIT_FUNC +inithtml(void) { + PyObject *m, *temp; + if (PyType_Ready(&html_TagType) < 0) + return; + if (PyType_Ready(&html_StateType) < 0) + return; + + temp = Py_BuildValue("ssssssss", "b", "strong", "h1", "h2", "h3", "h4", "h5", "h6", "h7"); + if (temp == NULL) return; + bold_tags = PyFrozenSet_New(temp); Py_DECREF(temp); + temp = Py_BuildValue("ss", "i", "em"); + if (temp == NULL) return; + italic_tags = PyFrozenSet_New(temp); Py_DECREF(temp); temp = NULL; + zero = PyInt_FromLong(0); + if (bold_tags == NULL || italic_tags == NULL || zero == NULL) return; + Py_INCREF(bold_tags); Py_INCREF(italic_tags); + + m = Py_InitModule3("html", html_methods, + "Speedups for the html syntax highlighter." + ); + if (m == NULL) return; + Py_INCREF(&html_TagType); + Py_INCREF(&html_StateType); + PyModule_AddObject(m, "Tag", (PyObject *)&html_TagType); + PyModule_AddObject(m, "State", (PyObject *)&html_StateType); + PyModule_AddObject(m, "bold_tags", bold_tags); + PyModule_AddObject(m, "italic_tags", italic_tags); +} diff --git a/src/calibre/gui2/tweak_book/editor/syntax/html.py b/src/calibre/gui2/tweak_book/editor/syntax/html.py index 9fe89f0808..29429becd7 100644 --- a/src/calibre/gui2/tweak_book/editor/syntax/html.py +++ b/src/calibre/gui2/tweak_book/editor/syntax/html.py @@ -24,8 +24,6 @@ from calibre.gui2.tweak_book.editor.syntax.css import ( from html5lib.constants import cdataElements, rcdataElements cdata_tags = cdataElements | rcdataElements -bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)} -italic_tags = {'i', 'em'} normal_pat = re.compile(r'[^<>&]+') entity_pat = re.compile(r'&#{0,1}[a-zA-Z0-9]{1,8};') tag_name_pat = re.compile(r'/{0,1}[a-zA-Z0-9:]+') @@ -59,111 +57,121 @@ def refresh_spell_check_status(): global do_spell_check do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries') -class Tag(object): +from calibre.constants import plugins - __slots__ = ('name', 'bold', 'italic', 'lang') +_speedup = plugins['html'][0] +if _speedup is not None: + Tag = _speedup.Tag + bold_tags, italic_tags = _speedup.bold_tags, _speedup.italic_tags + State = _speedup.State +else: + bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)} + italic_tags = {'i', 'em'} - def __init__(self, name, bold=None, italic=None): - self.name = name - self.bold = name in bold_tags if bold is None else bold - self.italic = name in italic_tags if italic is None else italic - self.lang = None + class Tag(object): - def __eq__(self, other): - return self.name == getattr(other, 'name', None) and self.lang == getattr(other, 'lang', False) + __slots__ = ('name', 'bold', 'italic', 'lang') - def copy(self): - ans = Tag(self.name, self.bold, self.italic) - ans.lang = self.lang - return ans + def __init__(self, name, bold=None, italic=None, lang=None): + self.name = name + self.bold = name in bold_tags if bold is None else bold + self.italic = name in italic_tags if italic is None else italic + self.lang = lang -class State(object): + def __eq__(self, other): + return self.name == other.name and self.lang == other.lang - __slots__ = ( - 'tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang', - 'parse', 'css_formats', 'sub_parser_state', 'default_lang', 'attribute_name',) + def copy(self): + ans = Tag(self.name, self.bold, self.italic, self.lang) + return ans - def __init__(self): - self.tags = [] - self.is_bold = self.is_italic = False - self.tag_being_defined = self.current_lang = self.css_formats = \ - self.sub_parser_state = self.default_lang = self.attribute_name = None - self.parse = NORMAL + class State(object): - def copy(self): - ans = State() - for x in self.__slots__: - setattr(ans, x, getattr(self, x)) - self.tags = [x.copy() for x in self.tags] - if self.tag_being_defined is not None: - self.tag_being_defined = self.tag_being_defined.copy() - if self.sub_parser_state is not None: - ans.sub_parser_state = self.sub_parser_state.copy() - return ans + __slots__ = ( + 'tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang', + 'parse', 'css_formats', 'sub_parser_state', 'default_lang', 'attribute_name',) - def __eq__(self, other): - return ( - self.parse == getattr(other, 'parse', -1) and - self.sub_parser_state == getattr(other, 'sub_parser_state', -1) and - self.tag_being_defined == getattr(other, 'tag_being_defined', False) and - self.attribute_name == getattr(other, 'attribute_name', False) and - self.tags == getattr(other, 'tags', None) - ) + def __init__(self, tags=None): + self.tags = [] + self.is_bold = self.is_italic = False + self.tag_being_defined = self.current_lang = self.css_formats = \ + self.sub_parser_state = self.default_lang = self.attribute_name = None + self.parse = NORMAL - def __ne__(self, other): - return not self.__eq__(other) + def copy(self): + ans = State() + for x in self.__slots__: + setattr(ans, x, getattr(self, x)) + self.tags = [x.copy() for x in self.tags] + if self.tag_being_defined is not None: + self.tag_being_defined = self.tag_being_defined.copy() + if self.sub_parser_state is not None: + ans.sub_parser_state = self.sub_parser_state.copy() + return ans - def open_tag(self, name): - self.tag_being_defined = Tag(name) + def __eq__(self, other): + return ( + self.parse == other.parse and + self.sub_parser_state == other.sub_parser_state and + self.tag_being_defined == other.tag_being_defined and + self.attribute_name == other.attribute_name and + self.tags == other.tags + ) - def close_tag(self, name): - removed_tags = [] - for tag in reversed(self.tags): - removed_tags.append(tag) - if tag.name == name: + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return '' % ( + '->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang) + __str__ = __repr__ + + +del _speedup + +def finish_opening_tag(state, cdata_tags): + state.parse = NORMAL + if state.tag_being_defined is None: + return + t, state.tag_being_defined = state.tag_being_defined, None + state.tags.append(t) + state.is_bold = state.is_bold or t.bold + state.is_italic = state.is_italic or t.italic + state.current_lang = t.lang or state.current_lang + if t.name in cdata_tags: + state.parse = CSS if t.name == 'style' else CDATA + state.sub_parser_state = None + +def close_tag(state, name): + removed_tags = [] + for tag in reversed(state.tags): + removed_tags.append(tag) + if tag.name == name: + break + else: + return # No matching open tag found, ignore the closing tag + # Remove all tags upto the matching open tag + state.tags = state.tags[:-len(removed_tags)] + state.sub_parser_state = None + # Check if we should still be bold or italic + if state.is_bold: + state.is_bold = False + for tag in reversed(state.tags): + if tag.bold: + state.is_bold = True break - else: - return # No matching open tag found, ignore the closing tag - # Remove all tags upto the matching open tag - self.tags = self.tags[:-len(removed_tags)] - self.sub_parser_state = None - # Check if we should still be bold or italic - if self.is_bold: - self.is_bold = False - for tag in reversed(self.tags): - if tag.bold: - self.is_bold = True - break - if self.is_italic: - self.is_italic = False - for tag in reversed(self.tags): - if tag.italic: - self.is_italic = True - break - # Set the current language to the first lang attribute in a still open tag - self.current_lang = None - for tag in reversed(self.tags): - if tag.lang is not None: - self.current_lang = tag.lang + if state.is_italic: + state.is_italic = False + for tag in reversed(state.tags): + if tag.italic: + state.is_italic = True break - - def finish_opening_tag(self, cdata_tags): - self.parse = NORMAL - if self.tag_being_defined is None: - return - t, self.tag_being_defined = self.tag_being_defined, None - self.tags.append(t) - self.is_bold = self.is_bold or t.bold - self.is_italic = self.is_italic or t.italic - self.current_lang = t.lang or self.current_lang - if t.name in cdata_tags: - self.parse = CSS if t.name == 'style' else CDATA - self.sub_parser_state = None - - def __repr__(self): - return '' % ( - '->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang) - __str__ = __repr__ + # Set the current language to the first lang attribute in a still open tag + state.current_lang = None + for tag in reversed(state.tags): + if tag.lang is not None: + state.current_lang = tag.lang + break class HTMLUserData(QTextBlockUserData): @@ -324,7 +332,10 @@ def normal(state, text, i, formats, user_data): ans.append((len(name), formats['tag_name'])) state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG add_tag_data(user_data, TagStart(i, prefix, name, closing, True)) - (state.close_tag if closing else state.open_tag)(name) + if closing: + close_tag(state, name) + else: + state.tag_being_defined = Tag(name) return ans if ch == '&': @@ -353,7 +364,7 @@ def opening_tag(cdata_tags, state, text, i, formats, user_data): add_tag_data(user_data, TagEnd(i + l - 1, True, False)) return [(l, formats['tag'])] if ch == '>': - state.finish_opening_tag(cdata_tags) + finish_opening_tag(state, cdata_tags) add_tag_data(user_data, TagEnd(i, False, False)) return [(1, formats['tag'])] m = attribute_name_pat.match(text, i)