A syntax highlighter for python using pygments

This commit is contained in:
Kovid Goyal 2014-11-20 21:53:43 +05:30
parent d588010b39
commit 64b9a921ab
3 changed files with 200 additions and 42 deletions

View File

@ -8,13 +8,12 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from PyQt5.Qt import QTextDocument, QTextCursor, QTextCharFormat, QPlainTextDocumentLayout
from PyQt5.Qt import QTextDocument, QTextCursor, QPlainTextDocumentLayout
from calibre.gui2.tweak_book import tprefs
from calibre.gui2.tweak_book.editor.text import get_highlighter as calibre_highlighter, SyntaxHighlighter
from calibre.gui2.tweak_book.editor.themes import get_theme, highlight_to_char_format
NULL_FMT = QTextCharFormat()
from calibre.gui2.tweak_book.editor.syntax.python import format_for_token, NULL_FMT
class QtHighlighter(QTextDocument):
@ -72,44 +71,6 @@ def pygments_lexer(filename):
return glff('a.py')
return None
_pyg_map = None
def pygments_map():
global _pyg_map
if _pyg_map is None:
from pygments.token import Token
_pyg_map = {
Token: None,
Token.Comment: 'Comment',
Token.Comment.Preproc: 'PreProc',
Token.String: 'String',
Token.Number: 'Number',
Token.Keyword.Type: 'Type',
Token.Keyword: 'Keyword',
Token.Name.Builtin: 'Identifier',
Token.Operator: 'Statement',
Token.Name.Function: 'Function',
Token.Literal: 'Constant',
Token.Error: 'Error',
}
return _pyg_map
def format_for_token(theme, cache, token):
try:
return cache[token]
except KeyError:
pass
pmap = pygments_map()
while token is not None:
try:
name = pmap[token]
except KeyError:
token = token.parent
continue
cache[token] = ans = theme[name]
return ans
cache[token] = ans = NULL_FMT
return ans
class PygmentsHighlighter(object):
def __init__(self, text, lexer):

View File

@ -0,0 +1,191 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
from functools import partial
from PyQt5.Qt import QTextCharFormat, QTextBlockUserData
from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter
from pygments.lexer import _TokenType, Text, Error
from pygments.lexers import PythonLexer
NORMAL = 0
NULL_FMT = QTextCharFormat()
class QtLexer(PythonLexer):
'''
Subclass the pygments PythonLexer to store state on the lexer itself,
allowing for efficient integration into Qt
'''
def __init__(self):
PythonLexer.__init__(self)
self.saved_state_stack = None
def get_tokens_unprocessed(self, text, stack=('root',)):
# Method is overriden to store state on the lexer itself
pos = 0
tokendefs = self._tokens
statestack = self.saved_state_stack = list(stack if self.saved_state_stack is None else self.saved_state_stack)
statetokens = tokendefs[statestack[-1]]
while 1:
for rexmatch, action, new_state in statetokens:
m = rexmatch(text, pos)
if m:
if action is not None:
if type(action) is _TokenType:
yield pos, action, m.group()
else:
for item in action(self, m):
yield item
pos = m.end()
if new_state is not None:
# state transition
if isinstance(new_state, tuple):
for state in new_state:
if state == '#pop':
statestack.pop()
elif state == '#push':
statestack.append(statestack[-1])
else:
statestack.append(state)
elif isinstance(new_state, int):
# pop
del statestack[new_state:]
elif new_state == '#push':
statestack.append(statestack[-1])
else:
assert False, "wrong state def: %r" % new_state
statetokens = tokendefs[statestack[-1]]
break
else:
try:
if text[pos] == '\n':
# at EOL, reset state to "root"
statestack = ['root']
statetokens = tokendefs['root']
yield pos, Text, u'\n'
pos += 1
continue
yield pos, Error, text[pos]
pos += 1
except IndexError:
break
lexer = QtLexer()
_pyg_map = None
def pygments_map():
global _pyg_map
if _pyg_map is None:
from pygments.token import Token
_pyg_map = {
Token: None,
Token.Comment: 'Comment', Token.Comment.Preproc: 'PreProc',
Token.String: 'String',
Token.Number: 'Number',
Token.Keyword.Type: 'Type',
Token.Keyword: 'Keyword',
Token.Name.Builtin: 'Identifier',
Token.Operator: 'Statement',
Token.Name.Function: 'Function',
Token.Literal: 'Constant',
Token.Error: 'Error',
}
return _pyg_map
def format_for_token(theme, cache, token):
try:
return cache[token]
except KeyError:
pass
pmap = pygments_map()
while token is not None:
try:
name = pmap[token]
except KeyError:
token = token.parent
continue
cache[token] = ans = theme[name]
return ans
cache[token] = ans = NULL_FMT
return ans
class State(object):
__slots__ = ('parse', 'pygments_stack')
def __init__(self):
self.parse = NORMAL
self.pygments_stack = None
def copy(self):
s = State()
s.pygments_stack = None if self.pygments_stack is None else list(self.pygments_stack)
return s
def __eq__(self, other):
return self.parse == getattr(other, 'parse', -1) and \
self.pygments_stack == getattr(other, 'pygments_stack', False)
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "PythonState(%r)" % (self.parse, self.pygments_stack)
__str__ = __repr__
class PythonUserData(QTextBlockUserData):
def __init__(self):
QTextBlockUserData.__init__(self)
self.state = State()
self.doc_name = None
def clear(self, state=None, doc_name=None):
self.state = State() if state is None else state
self.doc_name = doc_name
def normal(state, text, i, formats_map, user_data):
lexer.saved_state_stack = state.pygments_stack
# Lex the text using Pygments
formats = []
if i > 0:
text = text[i:]
for token, txt in lexer.get_tokens(text):
if txt:
formats.append((len(txt), formats_map(token)))
ss = lexer.saved_state_stack
if ss is not None:
state.pygments_stack = ss
# Clean up the lexer so that it can be re-used
lexer.saved_state_stack = None
return formats
def create_formats(highlighter):
cache = {}
theme = highlighter.theme.copy()
theme[None] = NULL_FMT
return partial(format_for_token, theme, cache)
class PythonHighlighter(SyntaxHighlighter):
state_map = {NORMAL:normal}
create_formats_func = create_formats
user_data_factory = PythonUserData
if __name__ == '__main__':
import os
from calibre.gui2.tweak_book.editor.widget import launch_editor
launch_editor(os.path.abspath(__file__), syntax='python')

View File

@ -36,7 +36,13 @@ PARAGRAPH_SEPARATOR = '\u2029'
entity_pat = re.compile(r'&(#{0,1}[a-zA-Z0-9]{1,8});')
def get_highlighter(syntax):
return {'html':HTMLHighlighter, 'css':CSSHighlighter, 'xml':XMLHighlighter}.get(syntax, SyntaxHighlighter)
ans = {'html':HTMLHighlighter, 'css':CSSHighlighter, 'xml':XMLHighlighter}.get(syntax, SyntaxHighlighter)
if ans is SyntaxHighlighter:
# Load these highlighters only on demand
if syntax == 'python':
from calibre.gui2.tweak_book.editor.syntax.python import PythonHighlighter
ans = PythonHighlighter
return ans
_dff = None
def default_font_family():