30% speed up on html syntax highlighting by using a C implementation for the Tag and State classes

This commit is contained in:
Kovid Goyal 2014-06-23 21:11:32 +05:30
parent b2168dc176
commit 1b1f61bde6
4 changed files with 512 additions and 95 deletions

View File

@ -89,6 +89,10 @@ extensions = [
['calibre/utils/speedup.c'], ['calibre/utils/speedup.c'],
), ),
Extension('html',
['calibre/gui2/tweak_book/editor/syntax/html.c'],
),
Extension('tokenizer', Extension('tokenizer',
['tinycss/tokenizer.c'], ['tinycss/tokenizer.c'],
), ),

View File

@ -135,6 +135,7 @@ class Plugins(collections.Mapping):
'chm_extra', 'chm_extra',
'icu', 'icu',
'speedup', 'speedup',
'html',
'freetype', 'freetype',
'woff', 'woff',
'unrar', 'unrar',

View File

@ -0,0 +1,401 @@
/*
* html.c
* Copyright (C) 2014 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#define UNICODE
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <structmember.h>
#define COMPARE(attr, op) (PyObject_RichCompareBool(a->attr, b->attr, op) == 1)
static PyObject *bold_tags = NULL, *italic_tags = NULL, *zero = NULL;
// Tag type definition {{{
static PyTypeObject html_TagType;
typedef struct {
PyObject_HEAD
// Type-specific fields go here.
PyObject *name;
PyObject *bold;
PyObject *italic;
PyObject *lang;
} html_Tag;
static void
html_Tag_dealloc(html_Tag* self)
{
Py_XDECREF(self->name); self->name = NULL;
Py_XDECREF(self->bold); self->bold = NULL;
Py_XDECREF(self->italic); self->italic = NULL;
Py_XDECREF(self->lang); self->lang = NULL;
self->ob_type->tp_free((PyObject*)self);
}
static PyObject *
html_Tag_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
html_Tag *self = NULL;
self = (html_Tag *)type->tp_alloc(type, 0);
if (self == NULL) return PyErr_NoMemory();
self->bold = NULL; self->italic = NULL; self->lang = NULL;
if (!PyArg_ParseTuple(args, "O|OOO", &(self->name), &(self->bold), &(self->italic), &(self->lang))) {
self->ob_type->tp_free((PyObject*)self); return NULL;
}
if (self->bold == NULL) {
self->bold = (PySet_Contains(bold_tags, self->name)) ? Py_True : Py_False;
}
if (self->italic == NULL) {
self->italic = (PySet_Contains(italic_tags, self->name)) ? Py_True : Py_False;
}
if (self->lang == NULL) self->lang = Py_None;
Py_INCREF(self->name); Py_INCREF(self->bold); Py_INCREF(self->italic); Py_INCREF(self->lang);
return (PyObject *)self;
}
static PyObject *
html_Tag_copy(html_Tag *self, PyObject *args, PyObject *kwargs) {
return PyObject_CallFunctionObjArgs((PyObject *) &html_TagType, self->name, self->bold, self->italic, self->lang, NULL);
}
static PyObject *
html_Tag_compare(html_Tag *a, html_Tag *b, int op) {
switch (op) {
case Py_EQ:
if (COMPARE(name, Py_EQ) && COMPARE(lang, Py_EQ)) Py_RETURN_TRUE;
Py_RETURN_FALSE;
case Py_NE:
if (COMPARE(name, Py_NE) || COMPARE(lang, Py_NE)) Py_RETURN_TRUE;
Py_RETURN_FALSE;
default:
break;
}
PyErr_SetString(PyExc_TypeError, "Only equals comparison is supported for Tag objects");
return NULL;
}
static PyObject *
html_Tag_repr(html_Tag *self) {
PyObject *name = NULL, *bold = NULL, *italic = NULL, *lang = NULL, *ans = NULL;
name = PyObject_Repr(self->name); bold = PyObject_Repr(self->bold); italic = PyObject_Repr(self->italic); lang = PyObject_Repr(self->lang);
if (name && bold && italic && lang)
ans = PyString_FromFormat("Tag(%s, bold=%s, italic=%s, lang=%s)", PyString_AS_STRING(name), PyString_AS_STRING(bold), PyString_AS_STRING(italic), PyString_AS_STRING(lang));
Py_XDECREF(name); Py_XDECREF(bold); Py_XDECREF(italic); Py_XDECREF(lang);
return ans;
}
static PyMemberDef html_Tag_members[] = {
{"name", T_OBJECT_EX, offsetof(html_Tag, name), 0, "Name of the tag in lowercase"},
{"bold", T_OBJECT_EX, offsetof(html_Tag, bold), 0, "True iff tag is bold"},
{"italic", T_OBJECT_EX, offsetof(html_Tag, italic), 0, "True iff tag is italic"},
{"lang", T_OBJECT_EX, offsetof(html_Tag, lang), 0, "The language of this tag"},
{NULL} /* Sentinel */
};
static PyMethodDef html_Tag_methods[] = {
{"copy", (PyCFunction)html_Tag_copy, METH_VARARGS,
"copy() -> Return a copy of this Tag"
},
{NULL} /* Sentinel */
};
static PyTypeObject html_TagType = { // {{{
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"html.Tag", /*tp_name*/
sizeof(html_Tag), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)html_Tag_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
(reprfunc)html_Tag_repr, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
"Token", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
(richcmpfunc)html_Tag_compare, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
html_Tag_methods, /* tp_methods */
html_Tag_members, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
html_Tag_new, /* tp_new */
}; // }}}
// }}}
// State type definition {{{
static PyTypeObject html_StateType;
typedef struct {
PyObject_HEAD
// Type-specific fields go here.
PyObject *tag_being_defined;
PyObject *tags;
PyObject *is_bold;
PyObject *is_italic;
PyObject *current_lang;
PyObject *parse;
PyObject *css_formats;
PyObject *sub_parser_state;
PyObject *default_lang;
PyObject *attribute_name;
} html_State;
static void
html_State_dealloc(html_State* self)
{
Py_XDECREF(self->tag_being_defined); self->tag_being_defined = NULL;
Py_XDECREF(self->tags); self->tags = NULL;
Py_XDECREF(self->is_bold); self->is_bold = NULL;
Py_XDECREF(self->is_italic); self->is_italic = NULL;
Py_XDECREF(self->current_lang); self->current_lang = NULL;
Py_XDECREF(self->parse); self->parse = NULL;
Py_XDECREF(self->css_formats); self->css_formats = NULL;
Py_XDECREF(self->sub_parser_state); self->sub_parser_state = NULL;
Py_XDECREF(self->default_lang); self->default_lang = NULL;
Py_XDECREF(self->attribute_name);self->attribute_name = NULL;
self->ob_type->tp_free((PyObject*)self);
}
static PyObject *
html_State_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
html_State *self = NULL;
self = (html_State *)type->tp_alloc(type, 0);
if (self == NULL) return PyErr_NoMemory();
self->tag_being_defined = NULL;
self->tags = NULL;
self->is_bold = NULL;
self->is_italic = NULL;
self->current_lang = NULL;
self->parse = NULL;
self->css_formats = NULL;
self->sub_parser_state = NULL;
self->default_lang = NULL;
self->attribute_name = NULL;
if (!PyArg_ParseTuple(args, "|OOOOOOOOOO",
&(self->tag_being_defined),
&(self->tags),
&(self->is_bold),
&(self->is_italic),
&(self->current_lang),
&(self->parse),
&(self->css_formats),
&(self->sub_parser_state),
&(self->default_lang),
&(self->attribute_name)))
{
self->ob_type->tp_free((PyObject*)self); return NULL;
}
if (self->tag_being_defined == NULL) self->tag_being_defined = Py_None;
if (self->tags == NULL) { self->tags = PyList_New(0); if (self->tags == NULL) return PyErr_NoMemory(); }
if (self->is_bold == NULL) self->is_bold = Py_False;
if (self->is_italic == NULL) self->is_italic = Py_False;
if (self->current_lang == NULL) self->current_lang = Py_None;
if (self->parse == NULL) self->parse = zero;
if (self->css_formats == NULL) self->css_formats = Py_None;
if (self->sub_parser_state == NULL) self->sub_parser_state = Py_None;
if (self->default_lang == NULL) self->default_lang = Py_None;
if (self->attribute_name == NULL) self->attribute_name = Py_None;
Py_INCREF(self->tag_being_defined);
Py_INCREF(self->tags);
Py_INCREF(self->is_bold);
Py_INCREF(self->is_italic);
Py_INCREF(self->current_lang);
Py_INCREF(self->parse);
Py_INCREF(self->css_formats);
Py_INCREF(self->sub_parser_state);
Py_INCREF(self->default_lang);
Py_INCREF(self->attribute_name);
return (PyObject *)self;
}
static PyObject *
html_State_copy(html_State *self, PyObject *args, PyObject *kwargs) {
PyObject *ans = NULL, *tags = NULL, *tag_being_defined = NULL, *sub_parser_state = NULL;
Py_ssize_t i = 0;
if (self->sub_parser_state == Py_None) {sub_parser_state = Py_None; Py_INCREF(sub_parser_state); }
else sub_parser_state = PyObject_CallMethod(self->sub_parser_state, "copy", NULL);
if (sub_parser_state == NULL) goto end;
if (self->tag_being_defined == Py_None) { tag_being_defined = Py_None; Py_INCREF(Py_None); }
else tag_being_defined = html_Tag_copy((html_Tag*)self->tag_being_defined, NULL, NULL);
if (tag_being_defined == NULL) goto end;
tags = PyList_New(PyList_GET_SIZE(self->tags));
if (tags == NULL) { PyErr_NoMemory(); goto end; }
for (i = 0; i < PyList_GET_SIZE(self->tags); i++) {
PyList_SET_ITEM(tags, i, PyList_GET_ITEM(self->tags, i));
Py_INCREF(PyList_GET_ITEM(self->tags, i));
}
ans = PyObject_CallFunctionObjArgs((PyObject *) &html_StateType,
tag_being_defined, tags, self->is_bold, self->is_italic, self->current_lang, self->parse, self->css_formats, sub_parser_state, self->default_lang, self->attribute_name, NULL);
end:
Py_XDECREF(tags); Py_XDECREF(tag_being_defined); Py_XDECREF(sub_parser_state);
return ans;
}
static PyObject *
html_State_compare(html_State *a, html_State *b, int op) {
switch (op) {
case Py_EQ:
if (COMPARE(parse, Py_EQ) && COMPARE(sub_parser_state, Py_EQ) && COMPARE(tag_being_defined, Py_EQ) && COMPARE(attribute_name, Py_EQ) && COMPARE(tags, Py_EQ)) Py_RETURN_TRUE;
Py_RETURN_FALSE;
case Py_NE:
if (COMPARE(parse, Py_NE) || COMPARE(sub_parser_state, Py_NE) || COMPARE(tag_being_defined, Py_NE) || COMPARE(attribute_name, Py_NE) || COMPARE(tags, Py_NE)) Py_RETURN_TRUE;
Py_RETURN_FALSE;
default:
break;
}
PyErr_SetString(PyExc_TypeError, "Only equals comparison is supported for State objects");
return NULL;
}
static PyObject *
html_State_repr(html_State *self) {
PyObject *bold = NULL, *italic = NULL, *lang = NULL, *ans = NULL;
bold = PyObject_Repr(self->is_bold); italic = PyObject_Repr(self->is_italic); lang = PyObject_Repr(self->current_lang);
if (bold && italic && lang)
ans = PyString_FromFormat("State(bold=%s, italic=%s, lang=%s)", PyString_AS_STRING(bold), PyString_AS_STRING(italic), PyString_AS_STRING(lang));
Py_XDECREF(bold); Py_XDECREF(italic); Py_XDECREF(lang);
return ans;
}
static PyMemberDef html_State_members[] = {
{"tag_being_defined", T_OBJECT_EX, offsetof(html_State, tag_being_defined), 0, "xxx"},
{"tags", T_OBJECT_EX, offsetof(html_State, tags), 0, "xxx"},
{"is_bold", T_OBJECT_EX, offsetof(html_State, is_bold), 0, "xxx"},
{"is_italic", T_OBJECT_EX, offsetof(html_State, is_italic), 0, "xxx"},
{"current_lang", T_OBJECT_EX, offsetof(html_State, current_lang), 0, "xxx"},
{"parse", T_OBJECT_EX, offsetof(html_State, parse), 0, "xxx"},
{"css_formats", T_OBJECT_EX, offsetof(html_State, css_formats), 0, "xxx"},
{"sub_parser_state", T_OBJECT_EX, offsetof(html_State, sub_parser_state), 0, "xxx"},
{"default_lang", T_OBJECT_EX, offsetof(html_State, default_lang), 0, "xxx"},
{"attribute_name", T_OBJECT_EX, offsetof(html_State, attribute_name), 0, "xxx"},
{NULL} /* Sentinel */
};
static PyMethodDef html_State_methods[] = {
{"copy", (PyCFunction)html_State_copy, METH_VARARGS,
"copy() -> Return a copy of this Tag"
},
{NULL} /* Sentinel */
};
static PyTypeObject html_StateType = { // {{{
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"html.State", /*tp_name*/
sizeof(html_State), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)html_State_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
(reprfunc)html_State_repr, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
"Token", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
(richcmpfunc)html_State_compare, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
html_State_methods, /* tp_methods */
html_State_members, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
html_State_new, /* tp_new */
}; // }}}
// }}}
static PyMethodDef html_methods[] = {
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC
inithtml(void) {
PyObject *m, *temp;
if (PyType_Ready(&html_TagType) < 0)
return;
if (PyType_Ready(&html_StateType) < 0)
return;
temp = Py_BuildValue("ssssssss", "b", "strong", "h1", "h2", "h3", "h4", "h5", "h6", "h7");
if (temp == NULL) return;
bold_tags = PyFrozenSet_New(temp); Py_DECREF(temp);
temp = Py_BuildValue("ss", "i", "em");
if (temp == NULL) return;
italic_tags = PyFrozenSet_New(temp); Py_DECREF(temp); temp = NULL;
zero = PyInt_FromLong(0);
if (bold_tags == NULL || italic_tags == NULL || zero == NULL) return;
Py_INCREF(bold_tags); Py_INCREF(italic_tags);
m = Py_InitModule3("html", html_methods,
"Speedups for the html syntax highlighter."
);
if (m == NULL) return;
Py_INCREF(&html_TagType);
Py_INCREF(&html_StateType);
PyModule_AddObject(m, "Tag", (PyObject *)&html_TagType);
PyModule_AddObject(m, "State", (PyObject *)&html_StateType);
PyModule_AddObject(m, "bold_tags", bold_tags);
PyModule_AddObject(m, "italic_tags", italic_tags);
}

View File

@ -24,8 +24,6 @@ from calibre.gui2.tweak_book.editor.syntax.css import (
from html5lib.constants import cdataElements, rcdataElements from html5lib.constants import cdataElements, rcdataElements
cdata_tags = cdataElements | rcdataElements cdata_tags = cdataElements | rcdataElements
bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)}
italic_tags = {'i', 'em'}
normal_pat = re.compile(r'[^<>&]+') normal_pat = re.compile(r'[^<>&]+')
entity_pat = re.compile(r'&#{0,1}[a-zA-Z0-9]{1,8};') entity_pat = re.compile(r'&#{0,1}[a-zA-Z0-9]{1,8};')
tag_name_pat = re.compile(r'/{0,1}[a-zA-Z0-9:]+') tag_name_pat = re.compile(r'/{0,1}[a-zA-Z0-9:]+')
@ -59,22 +57,32 @@ def refresh_spell_check_status():
global do_spell_check global do_spell_check
do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries') do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries')
from calibre.constants import plugins
_speedup = plugins['html'][0]
if _speedup is not None:
Tag = _speedup.Tag
bold_tags, italic_tags = _speedup.bold_tags, _speedup.italic_tags
State = _speedup.State
else:
bold_tags = {'b', 'strong'} | {'h%d' % d for d in range(1, 7)}
italic_tags = {'i', 'em'}
class Tag(object): class Tag(object):
__slots__ = ('name', 'bold', 'italic', 'lang') __slots__ = ('name', 'bold', 'italic', 'lang')
def __init__(self, name, bold=None, italic=None): def __init__(self, name, bold=None, italic=None, lang=None):
self.name = name self.name = name
self.bold = name in bold_tags if bold is None else bold self.bold = name in bold_tags if bold is None else bold
self.italic = name in italic_tags if italic is None else italic self.italic = name in italic_tags if italic is None else italic
self.lang = None self.lang = lang
def __eq__(self, other): def __eq__(self, other):
return self.name == getattr(other, 'name', None) and self.lang == getattr(other, 'lang', False) return self.name == other.name and self.lang == other.lang
def copy(self): def copy(self):
ans = Tag(self.name, self.bold, self.italic) ans = Tag(self.name, self.bold, self.italic, self.lang)
ans.lang = self.lang
return ans return ans
class State(object): class State(object):
@ -83,7 +91,7 @@ class State(object):
'tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang', 'tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang',
'parse', 'css_formats', 'sub_parser_state', 'default_lang', 'attribute_name',) 'parse', 'css_formats', 'sub_parser_state', 'default_lang', 'attribute_name',)
def __init__(self): def __init__(self, tags=None):
self.tags = [] self.tags = []
self.is_bold = self.is_italic = False self.is_bold = self.is_italic = False
self.tag_being_defined = self.current_lang = self.css_formats = \ self.tag_being_defined = self.current_lang = self.css_formats = \
@ -103,68 +111,68 @@ class State(object):
def __eq__(self, other): def __eq__(self, other):
return ( return (
self.parse == getattr(other, 'parse', -1) and self.parse == other.parse and
self.sub_parser_state == getattr(other, 'sub_parser_state', -1) and self.sub_parser_state == other.sub_parser_state and
self.tag_being_defined == getattr(other, 'tag_being_defined', False) and self.tag_being_defined == other.tag_being_defined and
self.attribute_name == getattr(other, 'attribute_name', False) and self.attribute_name == other.attribute_name and
self.tags == getattr(other, 'tags', None) self.tags == other.tags
) )
def __ne__(self, other): def __ne__(self, other):
return not self.__eq__(other) return not self.__eq__(other)
def open_tag(self, name): def __repr__(self):
self.tag_being_defined = Tag(name) return '<State %s is_bold=%s is_italic=%s current_lang=%s>' % (
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
__str__ = __repr__
def close_tag(self, name):
del _speedup
def finish_opening_tag(state, cdata_tags):
state.parse = NORMAL
if state.tag_being_defined is None:
return
t, state.tag_being_defined = state.tag_being_defined, None
state.tags.append(t)
state.is_bold = state.is_bold or t.bold
state.is_italic = state.is_italic or t.italic
state.current_lang = t.lang or state.current_lang
if t.name in cdata_tags:
state.parse = CSS if t.name == 'style' else CDATA
state.sub_parser_state = None
def close_tag(state, name):
removed_tags = [] removed_tags = []
for tag in reversed(self.tags): for tag in reversed(state.tags):
removed_tags.append(tag) removed_tags.append(tag)
if tag.name == name: if tag.name == name:
break break
else: else:
return # No matching open tag found, ignore the closing tag return # No matching open tag found, ignore the closing tag
# Remove all tags upto the matching open tag # Remove all tags upto the matching open tag
self.tags = self.tags[:-len(removed_tags)] state.tags = state.tags[:-len(removed_tags)]
self.sub_parser_state = None state.sub_parser_state = None
# Check if we should still be bold or italic # Check if we should still be bold or italic
if self.is_bold: if state.is_bold:
self.is_bold = False state.is_bold = False
for tag in reversed(self.tags): for tag in reversed(state.tags):
if tag.bold: if tag.bold:
self.is_bold = True state.is_bold = True
break break
if self.is_italic: if state.is_italic:
self.is_italic = False state.is_italic = False
for tag in reversed(self.tags): for tag in reversed(state.tags):
if tag.italic: if tag.italic:
self.is_italic = True state.is_italic = True
break break
# Set the current language to the first lang attribute in a still open tag # Set the current language to the first lang attribute in a still open tag
self.current_lang = None state.current_lang = None
for tag in reversed(self.tags): for tag in reversed(state.tags):
if tag.lang is not None: if tag.lang is not None:
self.current_lang = tag.lang state.current_lang = tag.lang
break break
def finish_opening_tag(self, cdata_tags):
self.parse = NORMAL
if self.tag_being_defined is None:
return
t, self.tag_being_defined = self.tag_being_defined, None
self.tags.append(t)
self.is_bold = self.is_bold or t.bold
self.is_italic = self.is_italic or t.italic
self.current_lang = t.lang or self.current_lang
if t.name in cdata_tags:
self.parse = CSS if t.name == 'style' else CDATA
self.sub_parser_state = None
def __repr__(self):
return '<State %s is_bold=%s is_italic=%s current_lang=%s>' % (
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
__str__ = __repr__
class HTMLUserData(QTextBlockUserData): class HTMLUserData(QTextBlockUserData):
def __init__(self): def __init__(self):
@ -324,7 +332,10 @@ def normal(state, text, i, formats, user_data):
ans.append((len(name), formats['tag_name'])) ans.append((len(name), formats['tag_name']))
state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG
add_tag_data(user_data, TagStart(i, prefix, name, closing, True)) add_tag_data(user_data, TagStart(i, prefix, name, closing, True))
(state.close_tag if closing else state.open_tag)(name) if closing:
close_tag(state, name)
else:
state.tag_being_defined = Tag(name)
return ans return ans
if ch == '&': if ch == '&':
@ -353,7 +364,7 @@ def opening_tag(cdata_tags, state, text, i, formats, user_data):
add_tag_data(user_data, TagEnd(i + l - 1, True, False)) add_tag_data(user_data, TagEnd(i + l - 1, True, False))
return [(l, formats['tag'])] return [(l, formats['tag'])]
if ch == '>': if ch == '>':
state.finish_opening_tag(cdata_tags) finish_opening_tag(state, cdata_tags)
add_tag_data(user_data, TagEnd(i, False, False)) add_tag_data(user_data, TagEnd(i, False, False))
return [(1, formats['tag'])] return [(1, formats['tag'])]
m = attribute_name_pat.match(text, i) m = attribute_name_pat.match(text, i)