Refactor the pygments based highlighter so that the plumbing connecting pygments to Qt can be re-used

2025-07-31 14:33:54 -04:00 · 2014-11-21 10:31:54 +05:30 · 2014-11-21 10:31:54 +05:30 · ca534b9919
commit ca534b9919
parent 6f539330f4
2 changed files with 149 additions and 138 deletions
--- a/src/calibre/gui2/tweak_book/editor/syntax/pygments_highlighter.py
+++ b/src/calibre/gui2/tweak_book/editor/syntax/pygments_highlighter.py
@ -0,0 +1,147 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from functools import partial
+
+from PyQt5.Qt import QTextBlockUserData
+from pygments.lexer import _TokenType, Text, Error
+
+from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter
+from calibre.gui2.tweak_book.editor.syntax.utils import format_for_pygments_token, NULL_FMT
+
+NORMAL = 0
+
+def create_lexer(base_class):
+    '''
+    Subclass the pygments RegexLexer to store state on the lexer itself,
+    allowing for efficient integration into Qt
+    '''
+
+    def get_tokens_unprocessed(self, text, stack=('root',)):
+        # Method is overriden to store state on the lexer itself
+        pos = 0
+        tokendefs = self._tokens
+        statestack = self.saved_state_stack = list(stack if self.saved_state_stack is None else self.saved_state_stack)
+        statetokens = tokendefs[statestack[-1]]
+        while 1:
+            for rexmatch, action, new_state in statetokens:
+                m = rexmatch(text, pos)
+                if m:
+                    if action is not None:
+                        if type(action) is _TokenType:
+                            yield pos, action, m.group()
+                        else:
+                            for item in action(self, m):
+                                yield item
+                    pos = m.end()
+                    if new_state is not None:
+                        # state transition
+                        if isinstance(new_state, tuple):
+                            for state in new_state:
+                                if state == '#pop':
+                                    statestack.pop()
+                                elif state == '#push':
+                                    statestack.append(statestack[-1])
+                                else:
+                                    statestack.append(state)
+                        elif isinstance(new_state, int):
+                            # pop
+                            del statestack[new_state:]
+                        elif new_state == '#push':
+                            statestack.append(statestack[-1])
+                        else:
+                            assert False, "wrong state def: %r" % new_state
+                        statetokens = tokendefs[statestack[-1]]
+                    break
+            else:
+                try:
+                    if text[pos] == '\n':
+                        # at EOL, reset state to "root"
+                        statestack = ['root']
+                        statetokens = tokendefs['root']
+                        yield pos, Text, u'\n'
+                        pos += 1
+                        continue
+                    yield pos, Error, text[pos]
+                    pos += 1
+                except IndexError:
+                    break
+
+    def lex_a_line(self, state, text, i, formats_map, user_data):
+        ' Get formats for a single block (line) '
+        self.saved_state_stack = state.pygments_stack
+
+        # Lex the text using Pygments
+        formats = []
+        if i > 0:
+            text = text[i:]
+        for token, txt in self.get_tokens(text):
+            if txt:
+                formats.append((len(txt), formats_map(token)))
+
+        ss = self.saved_state_stack
+        if ss is not None:
+            state.pygments_stack = ss
+            # Clean up the lexer so that it can be re-used
+            self.saved_state_stack = None
+        return formats
+
+    return type(str('Qt'+base_class.__name__), (base_class,), {
+        'saved_state_stack': None,
+        'get_tokens_unprocessed': get_tokens_unprocessed,
+        'lex_a_line':lex_a_line,
+    })
+
+class State(object):
+
+    __slots__ = ('parse', 'pygments_stack')
+
+    def __init__(self):
+        self.parse = NORMAL
+        self.pygments_stack = None
+
+    def copy(self):
+        s = State()
+        s.pygments_stack = None if self.pygments_stack is None else list(self.pygments_stack)
+        return s
+
+    def __eq__(self, other):
+        return self.parse == getattr(other, 'parse', -1) and \
+            self.pygments_stack == getattr(other, 'pygments_stack', False)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __repr__(self):
+        return "PythonState(%r)" % self.pygments_stack
+    __str__ = __repr__
+
+
+class PygmentsUserData(QTextBlockUserData):
+
+    def __init__(self):
+        QTextBlockUserData.__init__(self)
+        self.state = State()
+        self.doc_name = None
+
+    def clear(self, state=None, doc_name=None):
+        self.state = State() if state is None else state
+        self.doc_name = doc_name
+
+def create_formats(highlighter):
+    cache = {}
+    theme = highlighter.theme.copy()
+    theme[None] = NULL_FMT
+    return partial(format_for_pygments_token, theme, cache)
+
+def create_highlighter(name, lexer_class):
+    return type(str(name), (SyntaxHighlighter,), {
+        'state_map': {NORMAL:create_lexer(lexer_class)().lex_a_line},
+        'create_formats_func': create_formats,
+        'user_data_factory': PygmentsUserData,
+    })
--- a/src/calibre/gui2/tweak_book/editor/syntax/python.py
+++ b/src/calibre/gui2/tweak_book/editor/syntax/python.py
@ -6,147 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'

-from functools import partial
-
-from PyQt5.Qt import QTextBlockUserData
-
-from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter
-from calibre.gui2.tweak_book.editor.syntax.utils import format_for_pygments_token, NULL_FMT
-
-from pygments.lexer import _TokenType, Text, Error
 from pygments.lexers import PythonLexer

-NORMAL = 0
+from calibre.gui2.tweak_book.editor.syntax.pygments_highlighter import create_highlighter

-
-class QtLexer(PythonLexer):
-
-    '''
-    Subclass the pygments PythonLexer to store state on the lexer itself,
-    allowing for efficient integration into Qt
-    '''
-
-    def __init__(self):
-        PythonLexer.__init__(self)
-        self.saved_state_stack = None
-
-    def get_tokens_unprocessed(self, text, stack=('root',)):
-        # Method is overriden to store state on the lexer itself
-        pos = 0
-        tokendefs = self._tokens
-        statestack = self.saved_state_stack = list(stack if self.saved_state_stack is None else self.saved_state_stack)
-        statetokens = tokendefs[statestack[-1]]
-        while 1:
-            for rexmatch, action, new_state in statetokens:
-                m = rexmatch(text, pos)
-                if m:
-                    if action is not None:
-                        if type(action) is _TokenType:
-                            yield pos, action, m.group()
-                        else:
-                            for item in action(self, m):
-                                yield item
-                    pos = m.end()
-                    if new_state is not None:
-                        # state transition
-                        if isinstance(new_state, tuple):
-                            for state in new_state:
-                                if state == '#pop':
-                                    statestack.pop()
-                                elif state == '#push':
-                                    statestack.append(statestack[-1])
-                                else:
-                                    statestack.append(state)
-                        elif isinstance(new_state, int):
-                            # pop
-                            del statestack[new_state:]
-                        elif new_state == '#push':
-                            statestack.append(statestack[-1])
-                        else:
-                            assert False, "wrong state def: %r" % new_state
-                        statetokens = tokendefs[statestack[-1]]
-                    break
-            else:
-                try:
-                    if text[pos] == '\n':
-                        # at EOL, reset state to "root"
-                        statestack = ['root']
-                        statetokens = tokendefs['root']
-                        yield pos, Text, u'\n'
-                        pos += 1
-                        continue
-                    yield pos, Error, text[pos]
-                    pos += 1
-                except IndexError:
-                    break
-
-lexer = QtLexer()
-
-class State(object):
-
-    __slots__ = ('parse', 'pygments_stack')
-
-    def __init__(self):
-        self.parse = NORMAL
-        self.pygments_stack = None
-
-    def copy(self):
-        s = State()
-        s.pygments_stack = None if self.pygments_stack is None else list(self.pygments_stack)
-        return s
-
-    def __eq__(self, other):
-        return self.parse == getattr(other, 'parse', -1) and \
-            self.pygments_stack == getattr(other, 'pygments_stack', False)
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
-    def __repr__(self):
-        return "PythonState(%r)" % self.pygments_stack
-    __str__ = __repr__
-
-
-class PythonUserData(QTextBlockUserData):
-
-    def __init__(self):
-        QTextBlockUserData.__init__(self)
-        self.state = State()
-        self.doc_name = None
-
-    def clear(self, state=None, doc_name=None):
-        self.state = State() if state is None else state
-        self.doc_name = doc_name
-
-def normal(state, text, i, formats_map, user_data):
-    lexer.saved_state_stack = state.pygments_stack
-
-    # Lex the text using Pygments
-    formats = []
-    if i > 0:
-        text = text[i:]
-    for token, txt in lexer.get_tokens(text):
-        if txt:
-            formats.append((len(txt), formats_map(token)))
-
-    ss = lexer.saved_state_stack
-    if ss is not None:
-        state.pygments_stack = ss
-        # Clean up the lexer so that it can be re-used
-        lexer.saved_state_stack = None
-    return formats
-
-def create_formats(highlighter):
-    cache = {}
-    theme = highlighter.theme.copy()
-    theme[None] = NULL_FMT
-    return partial(format_for_pygments_token, theme, cache)
-
-class PythonHighlighter(SyntaxHighlighter):
-
-    state_map = {NORMAL:normal}
-    create_formats_func = create_formats
-    user_data_factory = PythonUserData
+PythonHighlighter = create_highlighter('PythonHighlighter', PythonLexer)

 if __name__ == '__main__':
    import os