mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Refactor HTML syntax highlighter too keep track of tag nesting
This is needed for the eventual implementation of inline spellcheck
This commit is contained in:
parent
5481f1f820
commit
a1a4585167
@ -29,13 +29,15 @@ def run_loop(state, state_map, formats, text):
|
|||||||
|
|
||||||
class SyntaxHighlighter(QSyntaxHighlighter):
|
class SyntaxHighlighter(QSyntaxHighlighter):
|
||||||
|
|
||||||
state_class = SimpleState
|
|
||||||
state_map = {0:lambda state, text, i, formats:[(len(text), None)]}
|
state_map = {0:lambda state, text, i, formats:[(len(text), None)]}
|
||||||
create_formats_func = lambda highlighter: {}
|
create_formats_func = lambda highlighter: {}
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
QSyntaxHighlighter.__init__(self, *args, **kwargs)
|
QSyntaxHighlighter.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
|
def create_state(self, num):
|
||||||
|
return SimpleState(max(0, num))
|
||||||
|
|
||||||
def rehighlight(self):
|
def rehighlight(self):
|
||||||
self.outlineexplorer_data = {}
|
self.outlineexplorer_data = {}
|
||||||
QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
|
QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
|
||||||
@ -54,9 +56,7 @@ class SyntaxHighlighter(QSyntaxHighlighter):
|
|||||||
try:
|
try:
|
||||||
state = self.previousBlockState()
|
state = self.previousBlockState()
|
||||||
self.setCurrentBlockUserData(None) # Ensure that any stale user data is discarded
|
self.setCurrentBlockUserData(None) # Ensure that any stale user data is discarded
|
||||||
if state == -1:
|
state = self.create_state(state)
|
||||||
state = 0
|
|
||||||
state = self.state_class(state)
|
|
||||||
state.get_user_data, state.set_user_data = self.currentBlockUserData, self.setCurrentBlockUserData
|
state.get_user_data, state.set_user_data = self.currentBlockUserData, self.setCurrentBlockUserData
|
||||||
for i, num, fmt in run_loop(state, self.state_map, self.formats, unicode(text)):
|
for i, num, fmt in run_loop(state, self.state_map, self.formats, unicode(text)):
|
||||||
if fmt is not None:
|
if fmt is not None:
|
||||||
|
@ -251,9 +251,11 @@ def create_formats(highlighter):
|
|||||||
class CSSHighlighter(SyntaxHighlighter):
|
class CSSHighlighter(SyntaxHighlighter):
|
||||||
|
|
||||||
state_map = state_map
|
state_map = state_map
|
||||||
state_class = State
|
|
||||||
create_formats_func = create_formats
|
create_formats_func = create_formats
|
||||||
|
|
||||||
|
def create_state(self, num):
|
||||||
|
return State(max(0, num))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from calibre.gui2.tweak_book.editor.widget import launch_editor
|
from calibre.gui2.tweak_book.editor.widget import launch_editor
|
||||||
launch_editor('''\
|
launch_editor('''\
|
||||||
|
@ -31,55 +31,170 @@ unquoted_val_pat = re.compile(r'''[^%s'"=<>`]+''' % space_chars)
|
|||||||
cdata_close_pats = {x:re.compile(r'</%s' % x, flags=re.I) for x in cdata_tags}
|
cdata_close_pats = {x:re.compile(r'</%s' % x, flags=re.I) for x in cdata_tags}
|
||||||
nbsp_pat = re.compile('[\xa0\u2000-\u200A\u202F\u205F\u3000\u2011-\u2015\uFE58\uFE63\uFF0D]+') # special spaces and hyphens
|
nbsp_pat = re.compile('[\xa0\u2000-\u200A\u202F\u205F\u3000\u2011-\u2015\uFE58\uFE63\uFF0D]+') # special spaces and hyphens
|
||||||
|
|
||||||
class State(object):
|
NORMAL = 0
|
||||||
|
IN_OPENING_TAG = 1
|
||||||
''' Store the parsing state, a stack of bold and italic formatting and the
|
IN_CLOSING_TAG = 2
|
||||||
last seen open tag, all in a single integer, so that it can be used with.
|
IN_COMMENT = 3
|
||||||
This assumes an int is at least 32 bits.'''
|
IN_PI = 4
|
||||||
|
IN_DOCTYPE = 5
|
||||||
NORMAL = 0
|
ATTRIBUTE_NAME = 6
|
||||||
IN_OPENING_TAG = 1
|
ATTRIBUTE_VALUE = 7
|
||||||
IN_CLOSING_TAG = 2
|
SQ_VAL = 8
|
||||||
IN_COMMENT = 3
|
DQ_VAL = 9
|
||||||
IN_PI = 4
|
CDATA = 10
|
||||||
IN_DOCTYPE = 5
|
CSS = 11
|
||||||
ATTRIBUTE_NAME = 6
|
|
||||||
ATTRIBUTE_VALUE = 7
|
|
||||||
SQ_VAL = 8
|
|
||||||
DQ_VAL = 9
|
|
||||||
CDATA = 10
|
|
||||||
CSS = 11
|
|
||||||
|
|
||||||
TAGS = {x:i+1 for i, x in enumerate(cdata_tags | bold_tags | italic_tags)}
|
|
||||||
TAGS_RMAP = {v:k for k, v in TAGS.iteritems()}
|
|
||||||
UNKNOWN_TAG = '___'
|
|
||||||
|
|
||||||
def __init__(self, num):
|
|
||||||
self.parse = num & 0b1111
|
|
||||||
self.bold = (num >> 4) & 0b11111111
|
|
||||||
self.italic = (num >> 12) & 0b11111111
|
|
||||||
self.tag = self.TAGS_RMAP.get(num >> 20, self.UNKNOWN_TAG)
|
|
||||||
self.css = 0
|
|
||||||
if self.parse == State.CSS:
|
|
||||||
self.css = num >> 4
|
|
||||||
|
|
||||||
@property
|
|
||||||
def value(self):
|
|
||||||
if self.parse == State.CSS:
|
|
||||||
return ((self.parse & 0b1111) | (self.css << 4))
|
|
||||||
tag = self.TAGS.get(self.tag.lower(), 0)
|
|
||||||
return ((self.parse & 0b1111) |
|
|
||||||
((max(0, self.bold) & 0b11111111) << 4) |
|
|
||||||
((max(0, self.italic) & 0b11111111) << 12) |
|
|
||||||
(tag << 20))
|
|
||||||
|
|
||||||
def clear(self):
|
|
||||||
self.parse = self.bold = self.italic = self.css = 0
|
|
||||||
self.tag = self.UNKNOWN_TAG
|
|
||||||
|
|
||||||
TagStart = namedtuple('TagStart', 'offset prefix name closing is_start')
|
TagStart = namedtuple('TagStart', 'offset prefix name closing is_start')
|
||||||
TagEnd = namedtuple('TagEnd', 'offset self_closing is_start')
|
TagEnd = namedtuple('TagEnd', 'offset self_closing is_start')
|
||||||
|
|
||||||
|
class Tag(object):
|
||||||
|
|
||||||
|
__slots__ = ('name', 'bold', 'italic', 'lang', 'hash')
|
||||||
|
|
||||||
|
def __init__(self, name, bold=None, italic=None):
|
||||||
|
self.name = name
|
||||||
|
self.bold = name in bold_tags if bold is None else bold
|
||||||
|
self.italic = name in italic_tags if italic is None else italic
|
||||||
|
self.lang = None
|
||||||
|
self.hash = 0
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return self.hash
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.name == getattr(other, 'name', None) and self.lang == getattr(other, 'lang', False)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
ans = Tag(self.name, self.bold, self.italic)
|
||||||
|
ans.lang, ans.hash = self.lang, self.hash
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def update_hash(self):
|
||||||
|
self.hash = hash((self.name, self.lang))
|
||||||
|
|
||||||
|
class State(object):
|
||||||
|
|
||||||
|
__slots__ = ('tag_being_defined', 'tags', 'is_bold', 'is_italic',
|
||||||
|
'current_lang', 'parse', 'get_user_data', 'set_user_data',
|
||||||
|
'css_formats', 'stack', 'sub_parser_state', 'default_lang')
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.tags = []
|
||||||
|
self.is_bold = self.is_italic = False
|
||||||
|
self.tag_being_defined = self.current_lang = self.get_user_data = self.set_user_data = \
|
||||||
|
self.css_formats = self.stack = self.sub_parser_state = self.default_lang = None
|
||||||
|
self.parse = NORMAL
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
ans = State()
|
||||||
|
for x in self.__slots__:
|
||||||
|
setattr(ans, x, getattr(self, x))
|
||||||
|
self.tags = [x.copy() for x in self.tags]
|
||||||
|
if self.tag_being_defined is not None:
|
||||||
|
self.tag_being_defined = self.tag_being_defined.copy()
|
||||||
|
return ans
|
||||||
|
|
||||||
|
@property
|
||||||
|
def value(self):
|
||||||
|
if self.tag_being_defined is not None:
|
||||||
|
self.tag_being_defined.update_hash()
|
||||||
|
return self.stack.index_for(self)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash((self.parse, self.sub_parser_state, self.tag_being_defined, tuple(self.tags)))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return (
|
||||||
|
self.parse == getattr(other, 'parse', -1) and
|
||||||
|
self.sub_parser_state == getattr(other, 'sub_parser_state', -1) and
|
||||||
|
self.tag_being_defined == getattr(other, 'tag_being_defined', False) and
|
||||||
|
self.tags == getattr(other, 'tags', None)
|
||||||
|
)
|
||||||
|
|
||||||
|
def open_tag(self, name):
|
||||||
|
self.tag_being_defined = Tag(name)
|
||||||
|
|
||||||
|
def close_tag(self, name):
|
||||||
|
removed_tags = []
|
||||||
|
for tag in reversed(self.tags):
|
||||||
|
removed_tags.append(tag)
|
||||||
|
if tag.name == name:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
return # No matching open tag found, ignore the closing tag
|
||||||
|
# Remove all tags upto the matching open tag
|
||||||
|
self.tags = self.tags[:-len(removed_tags)]
|
||||||
|
self.sub_parser_state = 0
|
||||||
|
# Check if we should still be bold or italic
|
||||||
|
if self.is_bold:
|
||||||
|
self.is_bold = False
|
||||||
|
for tag in reversed(self.tags):
|
||||||
|
if tag.bold:
|
||||||
|
self.is_bold = True
|
||||||
|
break
|
||||||
|
if self.is_italic:
|
||||||
|
self.is_italic = False
|
||||||
|
for tag in reversed(self.tags):
|
||||||
|
if tag.italic:
|
||||||
|
self.is_italic = True
|
||||||
|
break
|
||||||
|
# Set the current language to the first lang attribute in a still open tag
|
||||||
|
self.current_lang = None
|
||||||
|
for tag in reversed(self.tags):
|
||||||
|
if tag.lang is not None:
|
||||||
|
self.current_lang = tag.lang
|
||||||
|
break
|
||||||
|
|
||||||
|
def finish_opening_tag(self, cdata_tags):
|
||||||
|
self.parse = NORMAL
|
||||||
|
if self.tag_being_defined is None:
|
||||||
|
return
|
||||||
|
t, self.tag_being_defined = self.tag_being_defined, None
|
||||||
|
t.update_hash()
|
||||||
|
self.tags.append(t)
|
||||||
|
self.is_bold = self.is_bold or t.bold
|
||||||
|
self.is_italic = self.is_italic or t.italic
|
||||||
|
self.current_lang = t.lang or self.current_lang
|
||||||
|
if t.name in cdata_tags:
|
||||||
|
self.parse = CSS if t.name == 'style' else CDATA
|
||||||
|
self.sub_parser_state = 0
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<State %s is_bold=%s is_italic=%s current_lang=%s>' % (
|
||||||
|
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
|
||||||
|
__str__ = __repr__
|
||||||
|
|
||||||
|
class Stack(object):
|
||||||
|
|
||||||
|
''' Maintain an efficient bi-directional mapping between states and index
|
||||||
|
numbers. Ensures that if state1 == state2 then their corresponding index
|
||||||
|
numbers are the same and vice versa. This is need so that the state number
|
||||||
|
passed to Qt does not change unless the underlying state has actually
|
||||||
|
changed. '''
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.index_map = []
|
||||||
|
self.state_map = {}
|
||||||
|
|
||||||
|
def index_for(self, state):
|
||||||
|
ans = self.state_map.get(state, None)
|
||||||
|
if ans is None:
|
||||||
|
self.state_map[state] = ans = len(self.index_map)
|
||||||
|
self.index_map.append(state)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def state_for(self, index):
|
||||||
|
try:
|
||||||
|
return self.index_map[index]
|
||||||
|
except IndexError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
class HTMLUserData(QTextBlockUserData):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
QTextBlockUserData.__init__(self)
|
||||||
|
self.tags = []
|
||||||
|
|
||||||
def add_tag_data(state, tag):
|
def add_tag_data(state, tag):
|
||||||
ud = q = state.get_user_data()
|
ud = q = state.get_user_data()
|
||||||
if ud is None:
|
if ud is None:
|
||||||
@ -97,37 +212,38 @@ def css(state, text, i, formats):
|
|||||||
else:
|
else:
|
||||||
css_text = text[i:m.start()]
|
css_text = text[i:m.start()]
|
||||||
ans = []
|
ans = []
|
||||||
css_state = CSSState(state.css)
|
css_state = CSSState(state.sub_parser_state)
|
||||||
for j, num, fmt in run_loop(css_state, css_state_map, state.css_formats, css_text):
|
for j, num, fmt in run_loop(css_state, css_state_map, state.css_formats, css_text):
|
||||||
ans.append((num, fmt))
|
ans.append((num, fmt))
|
||||||
state.css = css_state.value
|
state.sub_parser_state = css_state.value
|
||||||
if m is not None:
|
if m is not None:
|
||||||
state.clear()
|
state.sub_parser_state = 0
|
||||||
state.parse = State.IN_CLOSING_TAG
|
state.parse = IN_CLOSING_TAG
|
||||||
add_tag_data(state, TagStart(m.start(), 'style', '', True, True))
|
add_tag_data(state, TagStart(m.start(), 'style', '', True, True))
|
||||||
ans.extend([(2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])])
|
ans.extend([(2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])])
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def cdata(state, text, i, formats):
|
def cdata(state, text, i, formats):
|
||||||
'CDATA inside tags like <title> or <style>'
|
'CDATA inside tags like <title> or <style>'
|
||||||
pat = cdata_close_pats[state.tag]
|
name = state.tags[-1].name
|
||||||
|
pat = cdata_close_pats[name]
|
||||||
m = pat.search(text, i)
|
m = pat.search(text, i)
|
||||||
fmt = formats['title' if state.tag == 'title' else 'special']
|
fmt = formats['title' if name == 'title' else 'special']
|
||||||
if m is None:
|
if m is None:
|
||||||
return [(len(text) - i, fmt)]
|
return [(len(text) - i, fmt)]
|
||||||
state.parse = State.IN_CLOSING_TAG
|
state.parse = IN_CLOSING_TAG
|
||||||
num = m.start() - i
|
num = m.start() - i
|
||||||
add_tag_data(state, TagStart(m.start(), state.tag, '', True, True))
|
add_tag_data(state, TagStart(m.start(), name, '', True, True))
|
||||||
return [(num, fmt), (2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])]
|
return [(num, fmt), (2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])]
|
||||||
|
|
||||||
def mark_nbsp(state, text, nbsp_format):
|
def mark_nbsp(state, text, nbsp_format):
|
||||||
ans = []
|
ans = []
|
||||||
fmt = None
|
fmt = None
|
||||||
if state.bold or state.italic:
|
if state.is_bold or state.is_italic:
|
||||||
fmt = SyntaxTextCharFormat()
|
fmt = SyntaxTextCharFormat()
|
||||||
if state.bold:
|
if state.is_bold:
|
||||||
fmt.setFontWeight(QFont.Bold)
|
fmt.setFontWeight(QFont.Bold)
|
||||||
if state.italic:
|
if state.is_italic:
|
||||||
fmt.setFontItalic(True)
|
fmt.setFontItalic(True)
|
||||||
last = 0
|
last = 0
|
||||||
for m in nbsp_pat.finditer(text):
|
for m in nbsp_pat.finditer(text):
|
||||||
@ -137,26 +253,20 @@ def mark_nbsp(state, text, nbsp_format):
|
|||||||
ans = [(len(text), fmt)]
|
ans = [(len(text), fmt)]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
class HTMLUserData(QTextBlockUserData):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
QTextBlockUserData.__init__(self)
|
|
||||||
self.tags = []
|
|
||||||
|
|
||||||
def normal(state, text, i, formats):
|
def normal(state, text, i, formats):
|
||||||
' The normal state in between tags '
|
' The normal state in between tags '
|
||||||
ch = text[i]
|
ch = text[i]
|
||||||
if ch == '<':
|
if ch == '<':
|
||||||
if text[i:i+4] == '<!--':
|
if text[i:i+4] == '<!--':
|
||||||
state.parse, fmt = state.IN_COMMENT, formats['comment']
|
state.parse, fmt = IN_COMMENT, formats['comment']
|
||||||
return [(4, fmt)]
|
return [(4, fmt)]
|
||||||
|
|
||||||
if text[i:i+2] == '<?':
|
if text[i:i+2] == '<?':
|
||||||
state.parse, fmt = state.IN_PI, formats['preproc']
|
state.parse, fmt = IN_PI, formats['preproc']
|
||||||
return [(2, fmt)]
|
return [(2, fmt)]
|
||||||
|
|
||||||
if text[i:i+2] == '<!' and text[i+2:].lstrip().lower().startswith('doctype'):
|
if text[i:i+2] == '<!' and text[i+2:].lstrip().lower().startswith('doctype'):
|
||||||
state.parse, fmt = state.IN_DOCTYPE, formats['preproc']
|
state.parse, fmt = IN_DOCTYPE, formats['preproc']
|
||||||
return [(2, fmt)]
|
return [(2, fmt)]
|
||||||
|
|
||||||
m = tag_name_pat.match(text, i + 1)
|
m = tag_name_pat.match(text, i + 1)
|
||||||
@ -165,16 +275,16 @@ def normal(state, text, i, formats):
|
|||||||
|
|
||||||
name = m.group()
|
name = m.group()
|
||||||
closing = name.startswith('/')
|
closing = name.startswith('/')
|
||||||
state.parse = state.IN_CLOSING_TAG if closing else state.IN_OPENING_TAG
|
state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG
|
||||||
ans = [(2 if closing else 1, formats['end_tag' if closing else 'tag'])]
|
ans = [(2 if closing else 1, formats['end_tag' if closing else 'tag'])]
|
||||||
if closing:
|
if closing:
|
||||||
name = name[1:]
|
name = name[1:]
|
||||||
prefix, name = name.partition(':')[0::2]
|
prefix, name = name.partition(':')[0::2]
|
||||||
state.tag = name or prefix
|
|
||||||
if prefix and name:
|
if prefix and name:
|
||||||
ans.append((len(prefix)+1, formats['nsprefix']))
|
ans.append((len(prefix)+1, formats['nsprefix']))
|
||||||
ans.append((len(name or prefix), formats['tag_name']))
|
ans.append((len(name or prefix), formats['tag_name']))
|
||||||
add_tag_data(state, TagStart(i, prefix, name, closing, True))
|
add_tag_data(state, TagStart(i, prefix, name, closing, True))
|
||||||
|
(state.close_tag if closing else state.open_tag)(name or prefix)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
if ch == '&':
|
if ch == '&':
|
||||||
@ -198,27 +308,18 @@ def opening_tag(cdata_tags, state, text, i, formats):
|
|||||||
m = self_closing_pat.match(text, i)
|
m = self_closing_pat.match(text, i)
|
||||||
if m is None:
|
if m is None:
|
||||||
return [(1, formats['/'])]
|
return [(1, formats['/'])]
|
||||||
state.parse = state.NORMAL
|
state.parse = NORMAL
|
||||||
state.tag = State.UNKNOWN_TAG
|
|
||||||
l = len(m.group())
|
l = len(m.group())
|
||||||
add_tag_data(state, TagEnd(i + l - 1, True, False))
|
add_tag_data(state, TagEnd(i + l - 1, True, False))
|
||||||
return [(l, formats['tag'])]
|
return [(l, formats['tag'])]
|
||||||
if ch == '>':
|
if ch == '>':
|
||||||
state.parse = state.NORMAL
|
state.finish_opening_tag(cdata_tags)
|
||||||
tag = state.tag.lower()
|
|
||||||
if tag in cdata_tags:
|
|
||||||
state.parse = state.CDATA
|
|
||||||
if tag == 'style':
|
|
||||||
state.clear()
|
|
||||||
state.parse = state.CSS
|
|
||||||
state.bold += int(tag in bold_tags)
|
|
||||||
state.italic += int(tag in italic_tags)
|
|
||||||
add_tag_data(state, TagEnd(i, False, False))
|
add_tag_data(state, TagEnd(i, False, False))
|
||||||
return [(1, formats['tag'])]
|
return [(1, formats['tag'])]
|
||||||
m = attribute_name_pat.match(text, i)
|
m = attribute_name_pat.match(text, i)
|
||||||
if m is None:
|
if m is None:
|
||||||
return [(1, formats['?'])]
|
return [(1, formats['?'])]
|
||||||
state.parse = state.ATTRIBUTE_NAME
|
state.parse = ATTRIBUTE_NAME
|
||||||
prefix, name = m.group().partition(':')[0::2]
|
prefix, name = m.group().partition(':')[0::2]
|
||||||
if prefix and name:
|
if prefix and name:
|
||||||
return [(len(prefix) + 1, formats['nsprefix']), (len(name), formats['attr'])]
|
return [(len(prefix) + 1, formats['nsprefix']), (len(name), formats['attr'])]
|
||||||
@ -230,9 +331,9 @@ def attribute_name(state, text, i, formats):
|
|||||||
if ch in space_chars:
|
if ch in space_chars:
|
||||||
return [(1, None)]
|
return [(1, None)]
|
||||||
if ch == '=':
|
if ch == '=':
|
||||||
state.parse = State.ATTRIBUTE_VALUE
|
state.parse = ATTRIBUTE_VALUE
|
||||||
return [(1, formats['attr'])]
|
return [(1, formats['attr'])]
|
||||||
state.parse = State.IN_OPENING_TAG
|
state.parse = IN_OPENING_TAG
|
||||||
if ch in {'>', '/'}:
|
if ch in {'>', '/'}:
|
||||||
# Standalone attribute with no value
|
# Standalone attribute with no value
|
||||||
return [(0, None)]
|
return [(0, None)]
|
||||||
@ -244,9 +345,9 @@ def attribute_value(state, text, i, formats):
|
|||||||
if ch in space_chars:
|
if ch in space_chars:
|
||||||
return [(1, None)]
|
return [(1, None)]
|
||||||
if ch in {'"', "'"}:
|
if ch in {'"', "'"}:
|
||||||
state.parse = State.SQ_VAL if ch == "'" else State.DQ_VAL
|
state.parse = SQ_VAL if ch == "'" else DQ_VAL
|
||||||
return [(1, formats['string'])]
|
return [(1, formats['string'])]
|
||||||
state.parse = State.IN_OPENING_TAG
|
state.parse = IN_OPENING_TAG
|
||||||
m = unquoted_val_pat.match(text, i)
|
m = unquoted_val_pat.match(text, i)
|
||||||
if m is None:
|
if m is None:
|
||||||
return [(1, formats['no-attr-value'])]
|
return [(1, formats['no-attr-value'])]
|
||||||
@ -254,13 +355,13 @@ def attribute_value(state, text, i, formats):
|
|||||||
|
|
||||||
def quoted_val(state, text, i, formats):
|
def quoted_val(state, text, i, formats):
|
||||||
' A quoted attribute value '
|
' A quoted attribute value '
|
||||||
quote = '"' if state.parse == State.DQ_VAL else "'"
|
quote = '"' if state.parse is DQ_VAL else "'"
|
||||||
pos = text.find(quote, i)
|
pos = text.find(quote, i)
|
||||||
if pos == -1:
|
if pos == -1:
|
||||||
num = len(text) - i
|
num = len(text) - i
|
||||||
else:
|
else:
|
||||||
num = pos - i + 1
|
num = pos - i + 1
|
||||||
state.parse = State.IN_OPENING_TAG
|
state.parse = IN_OPENING_TAG
|
||||||
return [(num, formats['string'])]
|
return [(num, formats['string'])]
|
||||||
|
|
||||||
def closing_tag(state, text, i, formats):
|
def closing_tag(state, text, i, formats):
|
||||||
@ -271,48 +372,44 @@ def closing_tag(state, text, i, formats):
|
|||||||
pos = text.find('>', i)
|
pos = text.find('>', i)
|
||||||
if pos == -1:
|
if pos == -1:
|
||||||
return [(len(text) - i, formats['bad-closing'])]
|
return [(len(text) - i, formats['bad-closing'])]
|
||||||
state.parse = state.NORMAL
|
state.parse = NORMAL
|
||||||
tag = state.tag.lower()
|
|
||||||
state.bold -= int(tag in bold_tags)
|
|
||||||
state.italic -= int(tag in italic_tags)
|
|
||||||
num = pos - i + 1
|
num = pos - i + 1
|
||||||
ans = [(1, formats['end_tag'])]
|
ans = [(1, formats['end_tag'])]
|
||||||
if num > 1:
|
if num > 1:
|
||||||
ans.insert(0, (num - 1, formats['bad-closing']))
|
ans.insert(0, (num - 1, formats['bad-closing']))
|
||||||
state.tag = State.UNKNOWN_TAG
|
|
||||||
add_tag_data(state, TagEnd(pos, False, False))
|
add_tag_data(state, TagEnd(pos, False, False))
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def in_comment(state, text, i, formats):
|
def in_comment(state, text, i, formats):
|
||||||
' Comment, processing instruction or doctype '
|
' Comment, processing instruction or doctype '
|
||||||
end = {state.IN_COMMENT:'-->', state.IN_PI:'?>'}.get(state.parse, '>')
|
end = {IN_COMMENT:'-->', IN_PI:'?>'}.get(state.parse, '>')
|
||||||
pos = text.find(end, i)
|
pos = text.find(end, i)
|
||||||
fmt = formats['comment' if state.parse == state.IN_COMMENT else 'preproc']
|
fmt = formats['comment' if state.parse is IN_COMMENT else 'preproc']
|
||||||
if pos == -1:
|
if pos == -1:
|
||||||
num = len(text) - i
|
num = len(text) - i
|
||||||
else:
|
else:
|
||||||
num = pos - i + len(end)
|
num = pos - i + len(end)
|
||||||
state.parse = state.NORMAL
|
state.parse = NORMAL
|
||||||
return [(num, fmt)]
|
return [(num, fmt)]
|
||||||
|
|
||||||
state_map = {
|
state_map = {
|
||||||
State.NORMAL:normal,
|
NORMAL:normal,
|
||||||
State.IN_OPENING_TAG: partial(opening_tag, cdata_tags),
|
IN_OPENING_TAG: partial(opening_tag, cdata_tags),
|
||||||
State.IN_CLOSING_TAG: closing_tag,
|
IN_CLOSING_TAG: closing_tag,
|
||||||
State.ATTRIBUTE_NAME: attribute_name,
|
ATTRIBUTE_NAME: attribute_name,
|
||||||
State.ATTRIBUTE_VALUE: attribute_value,
|
ATTRIBUTE_VALUE: attribute_value,
|
||||||
State.CDATA: cdata,
|
CDATA: cdata,
|
||||||
State.CSS: css,
|
CSS: css,
|
||||||
}
|
}
|
||||||
|
|
||||||
for x in (State.IN_COMMENT, State.IN_PI, State.IN_DOCTYPE):
|
for x in (IN_COMMENT, IN_PI, IN_DOCTYPE):
|
||||||
state_map[x] = in_comment
|
state_map[x] = in_comment
|
||||||
|
|
||||||
for x in (State.SQ_VAL, State.DQ_VAL):
|
for x in (SQ_VAL, DQ_VAL):
|
||||||
state_map[x] = quoted_val
|
state_map[x] = quoted_val
|
||||||
|
|
||||||
xml_state_map = state_map.copy()
|
xml_state_map = state_map.copy()
|
||||||
xml_state_map[State.IN_OPENING_TAG] = partial(opening_tag, set())
|
xml_state_map[IN_OPENING_TAG] = partial(opening_tag, set())
|
||||||
|
|
||||||
def create_formats(highlighter):
|
def create_formats(highlighter):
|
||||||
t = highlighter.theme
|
t = highlighter.theme
|
||||||
@ -349,18 +446,19 @@ def create_formats(highlighter):
|
|||||||
class HTMLHighlighter(SyntaxHighlighter):
|
class HTMLHighlighter(SyntaxHighlighter):
|
||||||
|
|
||||||
state_map = state_map
|
state_map = state_map
|
||||||
state_class = State
|
|
||||||
create_formats_func = create_formats
|
create_formats_func = create_formats
|
||||||
|
|
||||||
def create_formats(self):
|
def create_formats(self):
|
||||||
super(HTMLHighlighter, self).create_formats()
|
super(HTMLHighlighter, self).create_formats()
|
||||||
self.css_formats = create_css_formats(self)
|
self.default_state = State()
|
||||||
self.state_class = self.create_state
|
self.default_state.css_formats = create_css_formats(self)
|
||||||
|
self.default_state.stack = Stack()
|
||||||
|
|
||||||
def create_state(self, val):
|
def create_state(self, val):
|
||||||
ans = State(val)
|
if val < 0:
|
||||||
ans.css_formats = self.css_formats
|
return self.default_state.copy()
|
||||||
return ans
|
ans = self.default_state.stack.state_for(val) or self.default_state
|
||||||
|
return ans.copy()
|
||||||
|
|
||||||
class XMLHighlighter(HTMLHighlighter):
|
class XMLHighlighter(HTMLHighlighter):
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user