Improvements to program mode template language:

- added if .. then .. else ... fi expressions. The else part is optional. The 'if' has a value: a = if ... then fi works
- added infix string relational operators ==, !=, <, <=, >, >=
- added infix numeric relational operators ==#, !=#, <#, <=#, >#, >=#
- numerous performance improvements. In particular, the "field' function is now evaluated inline
This commit is contained in:
Charles Haley 2020-09-06 11:18:34 +01:00
parent 74a2957693
commit 0a317f6ecc
2 changed files with 199 additions and 79 deletions

View File

@ -44,7 +44,7 @@ class TemplateHighlighter(QSyntaxHighlighter):
Formats = {} Formats = {}
BN_FACTOR = 1000 BN_FACTOR = 1000
KEYWORDS = ["program"] KEYWORDS = ["program", 'if', 'then', 'else', 'fi']
def __init__(self, parent=None): def __init__(self, parent=None):
super(TemplateHighlighter, self).__init__(parent) super(TemplateHighlighter, self).__init__(parent)

View File

@ -14,24 +14,27 @@ import re, string, traceback, numbers
from calibre import prints from calibre import prints
from calibre.constants import DEBUG from calibre.constants import DEBUG
from calibre.utils.formatter_functions import formatter_functions from calibre.utils.formatter_functions import formatter_functions
from calibre.utils.icu import strcmp
from polyglot.builtins import unicode_type, error_message from polyglot.builtins import unicode_type, error_message
class _Parser(object): class _Parser(object):
LEX_OP = 1 LEX_OP = 1
LEX_ID = 2 LEX_ID = 2
LEX_STR = 3 LEX_CONST = 3
LEX_NUM = 4 LEX_EOF = 4
LEX_EOF = 5 LEX_INFIX = 5
LEX_IF = 6
LEX_CONSTANTS = frozenset((LEX_STR, LEX_NUM)) LEX_THEN = 7
LEX_ELSE = 8
LEX_FI = 9
def __init__(self, val, prog, funcs, parent): def __init__(self, val, prog, funcs, parent):
self.lex_pos = 0 self.lex_pos = 0
self.prog = prog[0] self.prog = prog[0]
self.prog_len = len(self.prog) self.prog_len = len(self.prog)
if prog[1] != '': if prog[1] != '':
self.error(_('failed to scan program. Invalid input {0}').format(prog[1])) self.error(_('Failed to scan program. Invalid input {0}').format(prog[1]))
self.parent = parent self.parent = parent
self.parent_kwargs = parent.kwargs self.parent_kwargs = parent.kwargs
self.parent_book = parent.book self.parent_book = parent.book
@ -39,141 +42,252 @@ class _Parser(object):
self.funcs = funcs self.funcs = funcs
def error(self, message): def error(self, message):
m = 'Formatter: ' + message + _(' near ') try:
tval = "'" + self.prog[self.lex_pos-1][1] + "'"
except:
tval = _('Unknown')
m = 'Formatter: ' + message + _(' near')
if self.lex_pos > 0: if self.lex_pos > 0:
m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1]) m = '{0} {1}'.format(m, tval)
elif self.lex_pos < self.prog_len: elif self.lex_pos < self.prog_len:
m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1]) m = '{0} {1}'.format(m, tval)
else: else:
m = '{0} {1}'.format(m, _('end of program')) m = '{0} {1}'.format(m, _('end of program'))
raise ValueError(m) raise ValueError(m)
def token(self): def token(self):
if self.lex_pos >= self.prog_len: try:
token = self.prog[self.lex_pos][1]
self.lex_pos += 1
return token
except:
return None return None
token = self.prog[self.lex_pos][1]
self.lex_pos += 1
return token
def consume(self): def consume(self):
self.lex_pos += 1 self.lex_pos += 1
def token_op_is_a_equals(self): def token_op_is_equals(self):
if self.lex_pos >= self.prog_len: try:
token = self.prog[self.lex_pos]
return token[1] == '=' and token[0] == self.LEX_OP
except:
return False return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == '='
def token_op_is_a_lparen(self): def token_op_is_infix_compare(self):
if self.lex_pos >= self.prog_len: try:
return self.prog[self.lex_pos][0] == self.LEX_INFIX
except:
return False return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == '('
def token_op_is_a_rparen(self): def token_op_is_lparen(self):
if self.lex_pos >= self.prog_len: try:
token = self.prog[self.lex_pos]
return token[1] == '(' and token[0] == self.LEX_OP
except:
return False return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == ')'
def token_op_is_a_comma(self): def token_op_is_rparen(self):
if self.lex_pos >= self.prog_len: try:
token = self.prog[self.lex_pos]
return token[1] == ')' and token[0] == self.LEX_OP
except:
return False return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == ','
def token_op_is_a_semicolon(self): def token_op_is_comma(self):
if self.lex_pos >= self.prog_len: try:
token = self.prog[self.lex_pos]
return token[1] == ',' and token[0] == self.LEX_OP
except:
return False
def token_op_is_semicolon(self):
try:
token = self.prog[self.lex_pos]
return token[1] == ';' and token[0] == self.LEX_OP
except:
return False return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == ';'
def token_is_id(self): def token_is_id(self):
if self.lex_pos >= self.prog_len: try:
return self.prog[self.lex_pos][0] == self.LEX_ID
except:
return False
def token_is_if(self):
try:
return self.prog[self.lex_pos][0] == self.LEX_IF
except:
return False
def token_is_then(self):
try:
return self.prog[self.lex_pos][0] == self.LEX_THEN
except:
return False
def token_is_else(self):
try:
return self.prog[self.lex_pos][0] == self.LEX_ELSE
except:
return False
def token_is_fi(self):
try:
return self.prog[self.lex_pos][0] == self.LEX_FI
except:
return False return False
return self.prog[self.lex_pos][0] == self.LEX_ID
def token_is_constant(self): def token_is_constant(self):
if self.lex_pos >= self.prog_len: try:
return self.prog[self.lex_pos][0] == self.LEX_CONST
except:
return False return False
return self.prog[self.lex_pos][0] in self.LEX_CONSTANTS
def token_is_eof(self): def token_is_eof(self):
if self.lex_pos >= self.prog_len: try:
return self.prog[self.lex_pos][0] == self.LEX_EOF
except:
return True return True
token = self.prog[self.lex_pos]
return token[0] == self.LEX_EOF
def program(self): def program(self):
val = self.statement() val = self.statement()
if not self.token_is_eof(): if not self.token_is_eof():
self.error(_('syntax error - program ends before EOF')) self.error(_('Syntax error - program ends before EOF'))
return val return val
def statement(self): def statement(self):
while True: val = ''
val = self.expr() while not self.token_is_eof():
if self.token_is_eof(): val = self.infix_expr()
return val if not self.token_op_is_semicolon():
if not self.token_op_is_a_semicolon(): break
return val
self.consume() self.consume()
if self.token_is_eof(): return val
return val
def consume_if(self):
self.consume()
while not self.token_is_fi():
if self.token_is_if():
self.consume_if()
self.consume()
def consume_then_branch(self):
while not (self.token_is_eof() or self.token_is_fi() or self.token_is_else()):
if self.token_is_if():
self.consume_if()
self.consume()
def consume_else_branch(self):
while not (self.token_is_eof() or self.token_is_fi()):
if self.token_is_if():
self.consume_if()
self.consume()
def if_expression(self):
self.consume()
val = ''
test_part = self.infix_expr()
if not self.token_is_then():
self.error(_("Missing 'then' in if statement"))
if test_part:
self.consume()
val = self.statement()
if not (self.token_is_else() or self.token_is_fi()):
self.error(_("Missing 'else' or 'fi' in if statement"))
self.consume_else_branch()
else:
self.consume_then_branch()
if self.token_is_else():
self.consume()
val = self.statement()
if not self.token_is_fi():
self.error(_("Missing 'fi' in if statement"))
self.consume()
return val
INFIX_OPS = {
"==": lambda x, y: strcmp(x, y) == 0,
"!=": lambda x, y: strcmp(x, y) != 0,
"<": lambda x, y: strcmp(x, y) < 0,
"<=": lambda x, y: strcmp(x, y) <= 0,
">": lambda x, y: strcmp(x, y) > 0,
">=": lambda x, y: strcmp(x, y) >= 0,
"==#": lambda x, y: float(x) == float(y) if x and y else False,
"!=#": lambda x, y: float(x) != float(y) if x and y else False,
"<#": lambda x, y: float(x) < float(y) if x and y else False,
"<=#": lambda x, y: float(x) <= float(y) if x and y else False,
">#": lambda x, y: float(x) > float(y) if x and y else False,
">=#": lambda x, y: float(x) >= float(y) if x and y else False,
}
def infix_expr(self):
left = self.expr()
if self.token_op_is_infix_compare():
t = self.token()
right = self.expr()
return '1' if self.INFIX_OPS[t](left, right) else ''
return left
def expr(self): def expr(self):
if self.token_is_if():
return self.if_expression()
if self.token_is_id(): if self.token_is_id():
# We have an identifier. Determine if it is a function # We have an identifier. Determine if it is a function
id = self.token() id_ = self.token()
if not self.token_op_is_a_lparen(): if not self.token_op_is_lparen():
if self.token_op_is_a_equals(): if self.token_op_is_equals():
# classic assignment statement # classic assignment statement
self.consume() self.consume()
cls = self.funcs['assign'] cls = self.funcs['assign']
return cls.eval_(self.parent, self.parent_kwargs, return cls.eval_(self.parent, self.parent_kwargs,
self.parent_book, self.locals, id, self.expr()) self.parent_book, self.locals, id_, self.infix_expr())
val = self.locals.get(id, None) val = self.locals.get(id_, None)
if val is None: if val is None:
self.error(_('Unknown identifier ') + id) self.error(_('Unknown identifier {0}').format(id_))
return val return val
# We have a function. # We have a function.
# Check if it is a known one. We do this here so error reporting is # Check if it is a known one. We do this here so error reporting is
# better, as it can identify the tokens near the problem. # better, as it can identify the tokens near the problem.
id = id.strip() id_ = id_.strip()
if id not in self.funcs: if id_ not in self.funcs:
self.error(_('unknown function {0}').format(id)) self.error(_('Unknown function {0}').format(id_))
# Eat the paren # Eat the paren
self.consume() self.consume()
args = list() args = list()
while not self.token_op_is_a_rparen(): while not self.token_op_is_rparen():
if id == 'assign' and len(args) == 0: if id_ == 'assign' and len(args) == 0:
# Must handle the lvalue semantics of the assign function. # Must handle the lvalue semantics of the assign function.
# The first argument is the name of the destination, not # The first argument is the name of the destination, not
# the value. # the value.
if not self.token_is_id(): if not self.token_is_id():
self.error('assign requires the first parameter be an id') self.error(_("'Assign' requires the first parameter be an id"))
args.append(self.token()) args.append(self.token())
else: else:
# evaluate the argument (recursive call) # evaluate the argument (recursive call)
args.append(self.statement()) args.append(self.infix_expr())
if not self.token_op_is_a_comma(): if not self.token_op_is_comma():
break break
self.consume() self.consume()
if self.token() != ')': if self.token() != ')':
self.error(_('missing closing parenthesis')) self.error(_('Missing closing parenthesis'))
# Evaluate the function # Evaluate the function.
cls = self.funcs[id] if id_ == 'field':
# Evaluate the 'field' function inline for performance
if len(args) != 1:
self.error(_('Incorrect number of arguments for function {0}').format(id_))
return self.parent.get_value(args[0], [], self.parent_kwargs)
cls = self.funcs[id_]
if cls.arg_count != -1 and len(args) != cls.arg_count: if cls.arg_count != -1 and len(args) != cls.arg_count:
self.error('incorrect number of arguments for function {}'.format(id)) self.error(_('Incorrect number of arguments for function {0}').format(id_))
return cls.eval_(self.parent, self.parent_kwargs, return cls.eval_(self.parent, self.parent_kwargs,
self.parent_book, self.locals, *args) self.parent_book, self.locals, *args)
elif self.token_is_constant(): elif self.token_is_constant():
# String or number # String or number
return self.token() return self.token()
else: else:
self.error(_('expression is not function or constant')) self.error(_('Expression is not function or constant'))
class TemplateFormatter(string.Formatter): class TemplateFormatter(string.Formatter):
@ -241,14 +355,20 @@ class TemplateFormatter(string.Formatter):
# ################# 'Functional' template language ###################### # ################# 'Functional' template language ######################
lex_scanner = re.Scanner([ lex_scanner = re.Scanner([
(r'[(),=;]', lambda x,t: (1, t)), (r'(==#|!=#|<=#|<#|>=#|>#|==|!=|<=|<|>=|>)',
(r'-?[\d\.]+', lambda x,t: (3, t)), lambda x,t: (_Parser.LEX_INFIX, t)),
(r'\$', lambda x,t: (2, t)), (r'if\b', lambda x,t: (_Parser.LEX_IF, t)),
(r'\w+', lambda x,t: (2, t)), (r'then\b', lambda x,t: (_Parser.LEX_THEN, t)),
(r'".*?((?<!\\)")', lambda x,t: (3, t[1:-1])), (r'else\b', lambda x,t: (_Parser.LEX_ELSE, t)),
(r'\'.*?((?<!\\)\')', lambda x,t: (3, t[1:-1])), (r'fi\b', lambda x,t: (_Parser.LEX_FI, t)),
(r'\n#.*?(?:(?=\n)|$)', None), (r'[(),=;]', lambda x,t: (_Parser.LEX_OP, t)),
(r'\s', None) (r'-?[\d\.]+', lambda x,t: (_Parser.LEX_CONST, t)),
(r'\$', lambda x,t: (_Parser.LEX_ID, t)),
(r'\w+', lambda x,t: (_Parser.LEX_ID, t)),
(r'".*?((?<!\\)")', lambda x,t: (_Parser.LEX_CONST, t[1:-1])),
(r'\'.*?((?<!\\)\')', lambda x,t: (_Parser.LEX_CONST, t[1:-1])),
(r'\n#.*?(?:(?=\n)|$)', None),
(r'\s', None),
], flags=re.DOTALL) ], flags=re.DOTALL)
def _eval_program(self, val, prog, column_name): def _eval_program(self, val, prog, column_name):