Make the Pygments<->Qt glue faster and also fix handling of EOL

This commit is contained in:
Kovid Goyal 2014-11-21 12:02:24 +05:30
parent 3e8bf6d586
commit 022c1ed445

View File

@ -9,7 +9,7 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
from functools import partial from functools import partial
from PyQt5.Qt import QTextBlockUserData from PyQt5.Qt import QTextBlockUserData
from pygments.lexer import _TokenType, Text, Error from pygments.lexer import _TokenType, Error
from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter
from calibre.gui2.tweak_book.editor.syntax.utils import format_for_pygments_token, NULL_FMT from calibre.gui2.tweak_book.editor.syntax.utils import format_for_pygments_token, NULL_FMT
@ -18,20 +18,18 @@ NORMAL = 0
def create_lexer(base_class): def create_lexer(base_class):
''' '''
Subclass the pygments RegexLexer to store state on the lexer itself, Subclass the pygments RegexLexer to lex line by line instead of lexing full
allowing for efficient integration into Qt text. The statestack at the end of each line is stored in the Qt block state.
''' '''
def get_tokens_unprocessed(self, text, stack=('root',)): def get_tokens_unprocessed(self, text, statestack):
# Method is overriden to store state on the lexer itself
pos = 0 pos = 0
tokendefs = self._tokens tokendefs = self._tokens
statestack = self.saved_state_stack = list(stack if self.saved_state_stack is None else self.saved_state_stack)
statetokens = tokendefs[statestack[-1]] statetokens = tokendefs[statestack[-1]]
while 1: while True:
for rexmatch, action, new_state in statetokens: for rexmatch, action, new_state in statetokens:
m = rexmatch(text, pos) m = rexmatch(text, pos)
if m: if m is not None:
if action is not None: if action is not None:
if type(action) is _TokenType: if type(action) is _TokenType:
yield pos, action, m.group() yield pos, action, m.group()
@ -62,11 +60,8 @@ def create_lexer(base_class):
try: try:
if text[pos] == '\n': if text[pos] == '\n':
# at EOL, reset state to "root" # at EOL, reset state to "root"
statestack = ['root'] statestack[:] = ['root']
statetokens = tokendefs['root'] break
yield pos, Text, u'\n'
pos += 1
continue
yield pos, Error, text[pos] yield pos, Error, text[pos]
pos += 1 pos += 1
except IndexError: except IndexError:
@ -74,25 +69,22 @@ def create_lexer(base_class):
def lex_a_line(self, state, text, i, formats_map, user_data): def lex_a_line(self, state, text, i, formats_map, user_data):
' Get formats for a single block (line) ' ' Get formats for a single block (line) '
self.saved_state_stack = state.pygments_stack statestack = list(state.pygments_stack) if state.pygments_stack is not None else ['root']
# Lex the text using Pygments # Lex the text using Pygments
formats = [] formats = []
if i > 0: if i > 0:
text = text[i:] # This should never happen
for token, txt in self.get_tokens(text): return [(len(text) - i, formats_map(Error))]
if txt: # Pygments lexers expect newlines at the end of the line
for pos, token, txt in self.get_tokens_unprocessed(text + '\n', statestack):
if txt not in ('\n', ''):
formats.append((len(txt), formats_map(token))) formats.append((len(txt), formats_map(token)))
ss = self.saved_state_stack state.pygments_stack = statestack
if ss is not None:
state.pygments_stack = ss
# Clean up the lexer so that it can be re-used
self.saved_state_stack = None
return formats return formats
return type(str('Qt'+base_class.__name__), (base_class,), { return type(str('Qt'+base_class.__name__), (base_class,), {
'saved_state_stack': None,
'get_tokens_unprocessed': get_tokens_unprocessed, 'get_tokens_unprocessed': get_tokens_unprocessed,
'lex_a_line':lex_a_line, 'lex_a_line':lex_a_line,
}) })