Performance improvements when evaluating templates, and in particular GPM templates

This commit is contained in:
Charles Haley 2011-11-07 12:45:29 +01:00
parent d624474d4a
commit 4ec538475a
6 changed files with 101 additions and 65 deletions

View File

@ -45,9 +45,9 @@ class SafeFormat(TemplateFormatter):
def get_value(self, orig_key, args, kwargs): def get_value(self, orig_key, args, kwargs):
if not orig_key: if not orig_key:
return '' return ''
orig_key = orig_key.lower() key = orig_key = orig_key.lower()
key = orig_key if key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS and \
if key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS: key not in ALL_METADATA_FIELDS:
key = field_metadata.search_term_to_field_key(key) key = field_metadata.search_term_to_field_key(key)
if key is None or (self.book and if key is None or (self.book and
key not in self.book.all_field_keys()): key not in self.book.all_field_keys()):
@ -59,9 +59,8 @@ class SafeFormat(TemplateFormatter):
b = self.book.get_user_metadata(key, False) b = self.book.get_user_metadata(key, False)
except: except:
b = None b = None
if b and b['datatype'] == 'int' and self.book.get(key, 0) == 0: if b and ((b['datatype'] == 'int' and self.book.get(key, 0) == 0) or
v = '' (b['datatype'] == 'float' and self.book.get(key, 0.0) == 0.0)):
elif b and b['datatype'] == 'float' and self.book.get(key, 0.0) == 0.0:
v = '' v = ''
else: else:
v = self.book.format_field(key, series_with_index=False)[1] v = self.book.format_field(key, series_with_index=False)[1]
@ -95,7 +94,7 @@ class Metadata(object):
becomes a reserved field name. becomes a reserved field name.
''' '''
def __init__(self, title, authors=(_('Unknown'),), other=None): def __init__(self, title, authors=(_('Unknown'),), other=None, template_cache=None):
''' '''
@param title: title or ``_('Unknown')`` @param title: title or ``_('Unknown')``
@param authors: List of strings or [] @param authors: List of strings or []
@ -114,6 +113,7 @@ class Metadata(object):
self.author = list(authors) if authors else []# Needed for backward compatibility self.author = list(authors) if authors else []# Needed for backward compatibility
self.authors = list(authors) if authors else [] self.authors = list(authors) if authors else []
self.formatter = SafeFormat() self.formatter = SafeFormat()
self.template_cache = template_cache
def is_null(self, field): def is_null(self, field):
''' '''
@ -159,7 +159,8 @@ class Metadata(object):
d['display']['composite_template'], d['display']['composite_template'],
self, self,
_('TEMPLATE ERROR'), _('TEMPLATE ERROR'),
self).strip() self, column_name=field,
template_cache=self.template_cache).strip()
return val return val
if field.startswith('#') and field.endswith('_index'): if field.startswith('#') and field.endswith('_index'):
try: try:

View File

@ -936,6 +936,9 @@ class ResultCache(SearchQueryParser): # {{{
item.refresh_composites() item.refresh_composites()
def refresh(self, db, field=None, ascending=True): def refresh(self, db, field=None, ascending=True):
# reinitialize the template cache in case a composite column has changed
db.initialize_template_cache()
temp = db.conn.get('SELECT * FROM meta2') temp = db.conn.get('SELECT * FROM meta2')
self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else [] self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else []
for r in temp: for r in temp:

View File

@ -216,8 +216,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.create_custom_column(f['label'], f['name'], f['datatype'], self.create_custom_column(f['label'], f['name'], f['datatype'],
f['is_multiple'] is not None and len(f['is_multiple']) > 0, f['is_multiple'] is not None and len(f['is_multiple']) > 0,
f['is_editable'], f['display']) f['is_editable'], f['display'])
self.initialize_template_cache()
self.initialize_dynamic() self.initialize_dynamic()
def initialize_template_cache(self):
self.formatter_template_cache = {}
def get_property(self, idx, index_is_id=False, loc=-1): def get_property(self, idx, index_is_id=False, loc=-1):
row = self.data._data[idx] if index_is_id else self.data[idx] row = self.data._data[idx] if index_is_id else self.data[idx]
if row is not None: if row is not None:
@ -897,7 +901,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
''' '''
row = self.data._data[idx] if index_is_id else self.data[idx] row = self.data._data[idx] if index_is_id else self.data[idx]
fm = self.FIELD_MAP fm = self.FIELD_MAP
mi = Metadata(None) mi = Metadata(None, template_cache=self.formatter_template_cache)
aut_list = row[fm['au_map']] aut_list = row[fm['au_map']]
if aut_list: if aut_list:
@ -955,6 +959,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
mi.set_identifiers(self.get_identifiers(id, index_is_id=True)) mi.set_identifiers(self.get_identifiers(id, index_is_id=True))
mi.application_id = id mi.application_id = id
mi.id = id mi.id = id
for key, meta in self.field_metadata.custom_iteritems(): for key, meta in self.field_metadata.custom_iteritems():
mi.set_user_metadata(key, meta) mi.set_user_metadata(key, meta)
if meta['datatype'] == 'composite': if meta['datatype'] == 'composite':

View File

@ -635,9 +635,7 @@ class FieldMetadata(dict):
self._search_term_map[t] = key self._search_term_map[t] = key
def search_term_to_field_key(self, term): def search_term_to_field_key(self, term):
if term in self._search_term_map: return self._search_term_map.get(term, term)
return self._search_term_map[term]
return term
def searchable_fields(self): def searchable_fields(self):
return [k for k in self._tb_cats.keys() return [k for k in self._tb_cats.keys()

View File

@ -20,53 +20,84 @@ class _Parser(object):
LEX_NUM = 4 LEX_NUM = 4
LEX_EOF = 5 LEX_EOF = 5
LEX_CONSTANTS = frozenset([LEX_STR, LEX_NUM])
def __init__(self, val, prog, parent): def __init__(self, val, prog, parent):
self.lex_pos = 0 self.lex_pos = 0
self.prog = prog[0] self.prog = prog[0]
self.prog_len = len(self.prog)
if prog[1] != '': if prog[1] != '':
self.error(_('failed to scan program. Invalid input {0}').format(prog[1])) self.error(_('failed to scan program. Invalid input {0}').format(prog[1]))
self.parent = parent self.parent = parent
self.parent.locals = {'$':val} parent.locals = {'$':val}
self.parent_kwargs = parent.kwargs
self.parent_book = parent.book
self.parent_locals = parent.locals
def error(self, message): def error(self, message):
m = 'Formatter: ' + message + _(' near ') m = 'Formatter: ' + message + _(' near ')
if self.lex_pos > 0: if self.lex_pos > 0:
m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1]) m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1])
elif self.lex_pos < len(self.prog): elif self.lex_pos < self.prog_len:
m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1]) m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1])
else: else:
m = '{0} {1}'.format(m, _('end of program')) m = '{0} {1}'.format(m, _('end of program'))
raise ValueError(m) raise ValueError(m)
def token(self): def token(self):
if self.lex_pos >= len(self.prog): if self.lex_pos >= self.prog_len:
return None return None
token = self.prog[self.lex_pos] token = self.prog[self.lex_pos][1]
self.lex_pos += 1 self.lex_pos += 1
return token[1] return token
def lookahead(self):
if self.lex_pos >= len(self.prog):
return (self.LEX_EOF, '')
return self.prog[self.lex_pos]
def consume(self): def consume(self):
self.lex_pos += 1 self.lex_pos += 1
def token_op_is_a(self, val): def token_op_is_a_equals(self):
token = self.lookahead() if self.lex_pos >= self.prog_len:
return token[0] == self.LEX_OP and token[1] == val return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == '='
def token_op_is_a_lparen(self):
if self.lex_pos >= self.prog_len:
return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == '('
def token_op_is_a_rparen(self):
if self.lex_pos >= self.prog_len:
return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == ')'
def token_op_is_a_comma(self):
if self.lex_pos >= self.prog_len:
return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == ','
def token_op_is_a_semicolon(self):
if self.lex_pos >= self.prog_len:
return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == ';'
def token_is_id(self): def token_is_id(self):
token = self.lookahead() if self.lex_pos >= self.prog_len:
return token[0] == self.LEX_ID return False
return self.prog[self.lex_pos][0] == self.LEX_ID
def token_is_constant(self): def token_is_constant(self):
token = self.lookahead() if self.lex_pos >= self.prog_len:
return token[0] == self.LEX_STR or token[0] == self.LEX_NUM return False
return self.prog[self.lex_pos][0] in self.LEX_CONSTANTS
def token_is_eof(self): def token_is_eof(self):
token = self.lookahead() if self.lex_pos >= self.prog_len:
return True
token = self.prog[self.lex_pos]
return token[0] == self.LEX_EOF return token[0] == self.LEX_EOF
def program(self): def program(self):
@ -80,7 +111,7 @@ class _Parser(object):
val = self.expr() val = self.expr()
if self.token_is_eof(): if self.token_is_eof():
return val return val
if not self.token_op_is_a(';'): if not self.token_op_is_a_semicolon():
return val return val
self.consume() self.consume()
if self.token_is_eof(): if self.token_is_eof():
@ -91,13 +122,13 @@ class _Parser(object):
funcs = formatter_functions().get_functions() funcs = formatter_functions().get_functions()
# We have an identifier. Determine if it is a function # We have an identifier. Determine if it is a function
id = self.token() id = self.token()
if not self.token_op_is_a('('): if not self.token_op_is_a_lparen():
if self.token_op_is_a('='): if self.token_op_is_a_equals():
# classic assignment statement # classic assignment statement
self.consume() self.consume()
cls = funcs['assign'] cls = funcs['assign']
return cls.eval_(self.parent, self.parent.kwargs, return cls.eval_(self.parent, self.parent_kwargs,
self.parent.book, self.parent.locals, id, self.expr()) self.parent_book, self.parent_locals, id, self.expr())
val = self.parent.locals.get(id, None) val = self.parent.locals.get(id, None)
if val is None: if val is None:
self.error(_('Unknown identifier ') + id) self.error(_('Unknown identifier ') + id)
@ -105,13 +136,13 @@ class _Parser(object):
# We have a function. # We have a function.
# Check if it is a known one. We do this here so error reporting is # Check if it is a known one. We do this here so error reporting is
# better, as it can identify the tokens near the problem. # better, as it can identify the tokens near the problem.
if id not in funcs: if id not in funcs:
self.error(_('unknown function {0}').format(id)) self.error(_('unknown function {0}').format(id))
# Eat the paren # Eat the paren
self.consume() self.consume()
args = list() args = list()
while not self.token_op_is_a(')'): while not self.token_op_is_a_rparen():
if id == 'assign' and len(args) == 0: if id == 'assign' and len(args) == 0:
# Must handle the lvalue semantics of the assign function. # Must handle the lvalue semantics of the assign function.
# The first argument is the name of the destination, not # The first argument is the name of the destination, not
@ -122,25 +153,18 @@ class _Parser(object):
else: else:
# evaluate the argument (recursive call) # evaluate the argument (recursive call)
args.append(self.statement()) args.append(self.statement())
if not self.token_op_is_a(','): if not self.token_op_is_a_comma():
break break
self.consume() self.consume()
if self.token() != ')': if self.token() != ')':
self.error(_('missing closing parenthesis')) self.error(_('missing closing parenthesis'))
# Evaluate the function # Evaluate the function
if id in funcs:
cls = funcs[id] cls = funcs[id]
if cls.arg_count != -1 and len(args) != cls.arg_count: if cls.arg_count != -1 and len(args) != cls.arg_count:
self.error('incorrect number of arguments for function {}'.format(id)) self.error('incorrect number of arguments for function {}'.format(id))
return cls.eval_(self.parent, self.parent.kwargs, return cls.eval_(self.parent, self.parent_kwargs,
self.parent.book, self.parent.locals, *args) self.parent_book, self.parent_locals, *args)
else:
f = self.parent.functions[id]
if f[0] != -1 and len(args) != f[0]+1:
self.error('incorrect number of arguments for function {}'.format(id))
return f[1](self.parent, *args)
# can't get here
elif self.token_is_constant(): elif self.token_is_constant():
# String or number # String or number
return self.token() return self.token()
@ -163,7 +187,6 @@ class TemplateFormatter(string.Formatter):
string.Formatter.__init__(self) string.Formatter.__init__(self)
self.book = None self.book = None
self.kwargs = None self.kwargs = None
self.program_cache = {}
self.locals = {} self.locals = {}
def _do_format(self, val, fmt): def _do_format(self, val, fmt):
@ -222,14 +245,17 @@ class TemplateFormatter(string.Formatter):
(r'\s', None) (r'\s', None)
], flags=re.DOTALL) ], flags=re.DOTALL)
def _eval_program(self, val, prog): def _eval_program(self, val, prog, column_name):
# keep a cache of the lex'ed program under the theory that re-lexing # keep a cache of the lex'ed program under the theory that re-lexing
# is much more expensive than the cache lookup. This is certainly true # is much more expensive than the cache lookup. This is certainly true
# for more than a few tokens, but it isn't clear for simple programs. # for more than a few tokens, but it isn't clear for simple programs.
lprog = self.program_cache.get(prog, None) if column_name is not None and self.template_cache is not None:
lprog = self.template_cache.get(column_name, None)
if not lprog: if not lprog:
lprog = self.lex_scanner.scan(prog) lprog = self.lex_scanner.scan(prog)
self.program_cache[prog] = lprog self.template_cache[column_name] = lprog
else:
lprog = self.lex_scanner.scan(prog)
parser = _Parser(val, lprog, self) parser = _Parser(val, lprog, self)
return parser.program() return parser.program()
@ -257,7 +283,7 @@ class TemplateFormatter(string.Formatter):
if p >= 0: if p >= 0:
p += 1 p += 1
if p >= 0 and fmt[-1] == '\'': if p >= 0 and fmt[-1] == '\'':
val = self._eval_program(val, fmt[p+1:-1]) val = self._eval_program(val, fmt[p+1:-1], None)
colon = fmt[0:p].find(':') colon = fmt[0:p].find(':')
if colon < 0: if colon < 0:
dispfmt = '' dispfmt = ''
@ -303,11 +329,11 @@ class TemplateFormatter(string.Formatter):
return '' return ''
return prefix + val + suffix return prefix + val + suffix
def vformat(self, fmt, args, kwargs): def evaluate(self, fmt, args, kwargs):
if fmt.startswith('program:'): if fmt.startswith('program:'):
ans = self._eval_program(None, fmt[8:]) ans = self._eval_program(None, fmt[8:], self.column_name)
else: else:
ans = string.Formatter.vformat(self, fmt, args, kwargs) ans = self.vformat(fmt, args, kwargs)
return self.compress_spaces.sub(' ', ans).strip() return self.compress_spaces.sub(' ', ans).strip()
########## a formatter that throws exceptions ############ ########## a formatter that throws exceptions ############
@ -317,17 +343,20 @@ class TemplateFormatter(string.Formatter):
self.book = book self.book = book
self.composite_values = {} self.composite_values = {}
self.locals = {} self.locals = {}
return self.vformat(fmt, [], kwargs).strip() return self.evaluate(fmt, [], kwargs).strip()
########## a formatter guaranteed not to throw an exception ############ ########## a formatter guaranteed not to throw an exception ############
def safe_format(self, fmt, kwargs, error_value, book): def safe_format(self, fmt, kwargs, error_value, book,
column_name=None, template_cache=None):
self.column_name = column_name
self.template_cache = template_cache
self.kwargs = kwargs self.kwargs = kwargs
self.book = book self.book = book
self.composite_values = {} self.composite_values = {}
self.locals = {} self.locals = {}
try: try:
ans = self.vformat(fmt, [], kwargs).strip() ans = self.evaluate(fmt, [], kwargs).strip()
except Exception as e: except Exception as e:
# if DEBUG: # if DEBUG:
# traceback.print_exc() # traceback.print_exc()

View File

@ -85,10 +85,10 @@ class FormatterFunction(object):
ret = self.evaluate(formatter, kwargs, mi, locals, *args) ret = self.evaluate(formatter, kwargs, mi, locals, *args)
if isinstance(ret, (str, unicode)): if isinstance(ret, (str, unicode)):
return ret return ret
if isinstance(ret, (int, float, bool)):
return unicode(ret)
if isinstance(ret, list): if isinstance(ret, list):
return ','.join(list) return ','.join(list)
if isinstance(ret, (int, float, bool)):
return unicode(ret)
class BuiltinFormatterFunction(FormatterFunction): class BuiltinFormatterFunction(FormatterFunction):
def __init__(self): def __init__(self):