Template language: Speedup evaluation of general program mode templates by pre-compiling them to python. If you experience errors with this optimization, you can turn it off via Preferences->Tweaks. Also other miscellaneous optimizations in evaluating templates with composite columns

This commit is contained in:
Kovid Goyal 2012-03-15 13:26:59 +05:30
commit b2c1d46ba6
7 changed files with 248 additions and 46 deletions

View File

@ -502,3 +502,13 @@ tweak_book_prefer = 'epub'
# negative number to increase or decrease the font size.
change_book_details_font_size_by = 0
#: Compile General Program Mode templates to Python
# Compiled general program mode templates are significantly faster than
# interpreted templates. Setting this tweak to True causes calibre to compile
# (in most cases) general program mode templates. Setting it to False causes
# calibre to use the old behavior -- interpreting the templates. Set the tweak
# to False if some compiled templates produce incorrect values.
# Default: compile_gpm_templates = True
# No compile: compile_gpm_templates = False
compile_gpm_templates = True

View File

@ -9,16 +9,19 @@ import copy, traceback
from calibre import prints
from calibre.constants import DEBUG
from calibre.ebooks.metadata.book import SC_COPYABLE_FIELDS
from calibre.ebooks.metadata.book import SC_FIELDS_COPY_NOT_NULL
from calibre.ebooks.metadata.book import STANDARD_METADATA_FIELDS
from calibre.ebooks.metadata.book import TOP_LEVEL_IDENTIFIERS
from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
SC_FIELDS_COPY_NOT_NULL, STANDARD_METADATA_FIELDS,
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
from calibre.library.field_metadata import FieldMetadata
from calibre.utils.date import isoformat, format_date
from calibre.utils.icu import sort_key
from calibre.utils.formatter import TemplateFormatter
# Special sets used to optimize the performance of getting and setting
# attributes on Metadata objects
SIMPLE_GET = frozenset(STANDARD_METADATA_FIELDS - TOP_LEVEL_IDENTIFIERS)
SIMPLE_SET = frozenset(SIMPLE_GET - {'identifiers'})
def human_readable(size, precision=2):
""" Convert a size in bytes into megabytes """
return ('%.'+str(precision)+'f'+ 'MB') % ((size/(1024.*1024.)),)
@ -136,6 +139,8 @@ class Metadata(object):
def __getattribute__(self, field):
_data = object.__getattribute__(self, '_data')
if field in SIMPLE_GET:
return _data.get(field, None)
if field in TOP_LEVEL_IDENTIFIERS:
return _data.get('identifiers').get(field, None)
if field == 'language':
@ -143,8 +148,6 @@ class Metadata(object):
return _data.get('languages', [])[0]
except:
return NULL_VALUES['language']
if field in STANDARD_METADATA_FIELDS:
return _data.get(field, None)
try:
return object.__getattribute__(self, field)
except AttributeError:
@ -173,7 +176,11 @@ class Metadata(object):
def __setattr__(self, field, val, extra=None):
_data = object.__getattribute__(self, '_data')
if field in TOP_LEVEL_IDENTIFIERS:
if field in SIMPLE_SET:
if val is None:
val = copy.copy(NULL_VALUES.get(field, None))
_data[field] = val
elif field in TOP_LEVEL_IDENTIFIERS:
field, val = self._clean_identifier(field, val)
identifiers = _data['identifiers']
identifiers.pop(field, None)
@ -188,10 +195,6 @@ class Metadata(object):
if val and val.lower() != 'und':
langs = [val]
_data['languages'] = langs
elif field in STANDARD_METADATA_FIELDS:
if val is None:
val = copy.copy(NULL_VALUES.get(field, None))
_data[field] = val
elif field in _data['user_metadata'].iterkeys():
_data['user_metadata'][field]['#value#'] = val
_data['user_metadata'][field]['#extra#'] = extra
@ -404,9 +407,19 @@ class Metadata(object):
'''
if metadata is None:
traceback.print_stack()
return
um = {}
for key, meta in metadata.iteritems():
m = meta.copy()
if '#value#' not in m:
if m['datatype'] == 'text' and m['is_multiple']:
m['#value#'] = []
else:
for key in metadata:
self.set_user_metadata(key, metadata[key])
m['#value#'] = None
um[key] = m
_data = object.__getattribute__(self, '_data')
_data['user_metadata'].update(um)
def set_user_metadata(self, field, metadata):
'''
@ -420,9 +433,11 @@ class Metadata(object):
if metadata is None:
traceback.print_stack()
return
m = {}
for k in metadata:
m[k] = copy.copy(metadata[k])
m = dict(metadata)
# Copying the elements should not be necessary. The objects referenced
# in the dict should not change. Of course, they can be replaced.
# for k,v in metadata.iteritems():
# m[k] = copy.copy(v)
if '#value#' not in m:
if m['datatype'] == 'text' and m['is_multiple']:
m['#value#'] = []

View File

@ -227,6 +227,25 @@ class CustomColumns(object):
return self.conn.get('''SELECT extra FROM %s
WHERE book=?'''%lt, (idx,), all=False)
def get_custom_and_extra(self, idx, label=None, num=None, index_is_id=False):
if label is not None:
data = self.custom_column_label_map[label]
if num is not None:
data = self.custom_column_num_map[num]
idx = idx if index_is_id else self.id(idx)
row = self.data._data[idx]
ans = row[self.FIELD_MAP[data['num']]]
if data['is_multiple'] and data['datatype'] == 'text':
ans = ans.split(data['multiple_seps']['cache_to_list']) if ans else []
if data['display'].get('sort_alpha', False):
ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
if data['datatype'] != 'series':
return (ans, None)
ign,lt = self.custom_table_names(data['num'])
extra = self.conn.get('''SELECT extra FROM %s
WHERE book=?'''%lt, (idx,), all=False)
return (ans, extra)
# convenience methods for tag editing
def get_custom_items_with_ids(self, label=None, num=None):
if label is not None:

View File

@ -910,7 +910,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
Convenience method to return metadata as a :class:`Metadata` object.
Note that the list of formats is not verified.
'''
row = self.data._data[idx] if index_is_id else self.data[idx]
idx = idx if index_is_id else self.id(idx)
try:
row = self.data._data[idx]
except:
row = None
if row is None:
raise ValueError('No book with id: %d'%idx)
fm = self.FIELD_MAP
mi = Metadata(None, template_cache=self.formatter_template_cache)
@ -948,14 +956,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
mi.book_size = row[fm['size']]
mi.ondevice_col= row[fm['ondevice']]
mi.last_modified = row[fm['last_modified']]
id = idx if index_is_id else self.id(idx)
formats = row[fm['formats']]
mi.format_metadata = {}
if not formats:
good_formats = None
else:
formats = sorted(formats.split(','))
mi.format_metadata = FormatMetadata(self, id, formats)
mi.format_metadata = FormatMetadata(self, idx, formats)
good_formats = FormatsList(formats, mi.format_metadata)
mi.formats = good_formats
tags = row[fm['tags']]
@ -968,19 +975,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if mi.series:
mi.series_index = row[fm['series_index']]
mi.rating = row[fm['rating']]
mi.set_identifiers(self.get_identifiers(id, index_is_id=True))
mi.application_id = id
mi.id = id
mi.set_identifiers(self.get_identifiers(idx, index_is_id=True))
mi.application_id = idx
mi.id = idx
mi.set_all_user_metadata(self.field_metadata.custom_field_metadata())
for key, meta in self.field_metadata.custom_iteritems():
mi.set_user_metadata(key, meta)
if meta['datatype'] == 'composite':
mi.set(key, val=row[meta['rec_index']])
else:
mi.set(key, val=self.get_custom(idx, label=meta['label'],
index_is_id=index_is_id),
extra=self.get_custom_extra(idx, label=meta['label'],
index_is_id=index_is_id))
val, extra = self.get_custom_and_extra(idx, label=meta['label'],
index_is_id=True)
mi.set(key, val=val, extra=extra)
user_cats = self.prefs['user_categories']
user_cat_vals = {}
@ -999,12 +1005,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if get_cover:
if cover_as_data:
cdata = self.cover(id, index_is_id=True)
cdata = self.cover(idx, index_is_id=True)
if cdata:
mi.cover_data = ('jpeg', cdata)
else:
mi.cover = self.cover(id, index_is_id=True, as_path=True)
mi.has_cover = _('Yes') if self.has_cover(id) else ''
mi.cover = self.cover(idx, index_is_id=True, as_path=True)
mi.has_cover = _('Yes') if self.has_cover(idx) else ''
return mi
def has_book(self, mi):

View File

@ -388,6 +388,7 @@ class FieldMetadata(dict):
def __init__(self):
self._field_metadata = copy.deepcopy(self._field_metadata_prototype)
self._tb_cats = OrderedDict()
self._tb_custom_fields = {}
self._search_term_map = {}
self.custom_label_to_key_map = {}
for k,v in self._field_metadata:
@ -477,10 +478,8 @@ class FieldMetadata(dict):
yield (key, self._tb_cats[key])
def custom_iteritems(self):
for key in self._tb_cats:
fm = self._tb_cats[key]
if fm['is_custom']:
yield (key, self._tb_cats[key])
for key, meta in self._tb_custom_fields.iteritems():
yield (key, meta)
def items(self):
return list(self.iteritems())
@ -516,6 +515,8 @@ class FieldMetadata(dict):
return l
def custom_field_metadata(self, include_composites=True):
if include_composites:
return self._tb_custom_fields
l = {}
for k in self.custom_field_keys(include_composites):
l[k] = self._tb_cats[k]
@ -537,6 +538,7 @@ class FieldMetadata(dict):
'is_custom':True, 'is_category':is_category,
'link_column':'value','category_sort':'value',
'is_csp' : is_csp, 'is_editable': is_editable,}
self._tb_custom_fields[key] = self._tb_cats[key]
self._add_search_terms_to_map(key, [key])
self.custom_label_to_key_map[label] = key
if datatype == 'series':

View File

@ -11,7 +11,8 @@ __docformat__ = 'restructuredtext en'
import re, string, traceback
from calibre.constants import DEBUG
from calibre.utils.formatter_functions import formatter_functions
from calibre.utils.formatter_functions import formatter_functions, compile_user_function
from calibre.utils.config import tweaks
class _Parser(object):
LEX_OP = 1
@ -172,6 +173,130 @@ class _Parser(object):
self.error(_('expression is not function or constant'))
class _CompileParser(_Parser):
def __init__(self, val, prog, parent, compile_text):
self.lex_pos = 0
self.prog = prog[0]
self.prog_len = len(self.prog)
if prog[1] != '':
self.error(_('failed to scan program. Invalid input {0}').format(prog[1]))
self.parent = parent
parent.locals = {'$':val}
self.parent_kwargs = parent.kwargs
self.parent_book = parent.book
self.parent_locals = parent.locals
self.compile_text = compile_text
def program(self):
if self.compile_text:
t = self.compile_text
self.compile_text = '\n'
self.max_level = 0
val = self.statement()
if not self.token_is_eof():
self.error(_('syntax error - program ends before EOF'))
if self.compile_text:
t += "\targs=[[]"
for i in range(0, self.max_level):
t += ", None"
t += ']'
self.compile_text = t + self.compile_text + "\treturn args[0][0]\n"
return val
def statement(self, level=0):
while True:
val = self.expr(level)
if self.token_is_eof():
return val
if not self.token_op_is_a_semicolon():
return val
if self.compile_text:
self.compile_text += "\targs[%d] = list()\n"%(level,)
self.consume()
if self.token_is_eof():
return val
def expr(self, level):
if self.compile_text:
self.max_level = max(level, self.max_level)
if self.token_is_id():
funcs = formatter_functions().get_functions()
# We have an identifier. Determine if it is a function
id = self.token()
if not self.token_op_is_a_lparen():
if self.token_op_is_a_equals():
# classic assignment statement
self.consume()
cls = funcs['assign']
if self.compile_text:
self.compile_text += '\targs[%d] = list()\n'%(level+1,)
val = cls.eval_(self.parent, self.parent_kwargs,
self.parent_book, self.parent_locals, id, self.expr(level+1))
if self.compile_text:
self.compile_text += "\tlocals['%s'] = args[%d][0]\n"%(id, level+1)
self.compile_text += "\targs[%d].append(args[%d][0])\n"%(level, level+1)
return val
val = self.parent.locals.get(id, None)
if val is None:
self.error(_('Unknown identifier ') + id)
if self.compile_text:
self.compile_text += "\targs[%d].append(locals.get('%s'))\n"%(level, id)
return val
# We have a function.
# Check if it is a known one. We do this here so error reporting is
# better, as it can identify the tokens near the problem.
if id not in funcs:
self.error(_('unknown function {0}').format(id))
# Eat the paren
self.consume()
args = list()
if self.compile_text:
self.compile_text += '\targs[%d] = list()\n'%(level+1, )
while not self.token_op_is_a_rparen():
if id == 'assign' and len(args) == 0:
# Must handle the lvalue semantics of the assign function.
# The first argument is the name of the destination, not
# the value.
if not self.token_is_id():
self.error('assign requires the first parameter be an id')
t = self.token()
args.append(t)
if self.compile_text:
self.compile_text += "\targs[%d].append('%s')\n"%(level+1, t)
else:
# evaluate the argument (recursive call)
args.append(self.statement(level=level+1))
if not self.token_op_is_a_comma():
break
self.consume()
if self.token() != ')':
self.error(_('missing closing parenthesis'))
# Evaluate the function
cls = funcs[id]
if cls.arg_count != -1 and len(args) != cls.arg_count:
self.error('incorrect number of arguments for function {}'.format(id))
if self.compile_text:
self.compile_text += (
"\targs[%d].append(self.__funcs__['%s']"
".evaluate(formatter, kwargs, book, locals, *args[%d]))\n")%(level, id, level+1)
return cls.eval_(self.parent, self.parent_kwargs,
self.parent_book, self.parent_locals, *args)
elif self.token_is_constant():
# String or number
v = self.token()
if self.compile_text:
tv = v.replace("\\", "\\\\")
tv = tv.replace("'", "\\'")
self.compile_text += "\targs[%d].append('%s')\n"%(level, tv)
return v
else:
self.error(_('expression is not function or constant'))
compile_counter = 0
class TemplateFormatter(string.Formatter):
'''
Provides a format function that substitutes '' for any missing value
@ -249,6 +374,26 @@ class TemplateFormatter(string.Formatter):
# keep a cache of the lex'ed program under the theory that re-lexing
# is much more expensive than the cache lookup. This is certainly true
# for more than a few tokens, but it isn't clear for simple programs.
if tweaks['compile_gpm_templates']:
if column_name is not None and self.template_cache is not None:
lprog = self.template_cache.get(column_name, None)
if lprog:
return lprog.evaluate(self, self.kwargs, self.book, self.locals)
lprog = self.lex_scanner.scan(prog)
compile_text = ('__funcs__ = formatter_functions().get_functions()\n'
'def evaluate(self, formatter, kwargs, book, locals):\n'
)
else:
lprog = self.lex_scanner.scan(prog)
compile_text = None
parser = _CompileParser(val, lprog, self, compile_text)
val = parser.program()
if parser.compile_text:
global compile_counter
compile_counter += 1
f = compile_user_function("__A" + str(compile_counter), 'doc', -1, parser.compile_text)
self.template_cache[column_name] = f
else:
if column_name is not None and self.template_cache is not None:
lprog = self.template_cache.get(column_name, None)
if not lprog:
@ -257,7 +402,8 @@ class TemplateFormatter(string.Formatter):
else:
lprog = self.lex_scanner.scan(prog)
parser = _Parser(val, lprog, self)
return parser.program()
val = parser.program()
return val
################## Override parent classes methods #####################

View File

@ -11,6 +11,7 @@ __docformat__ = 'restructuredtext en'
import inspect, re, traceback
from calibre import human_readable
from calibre.constants import DEBUG
from calibre.utils.titlecase import titlecase
from calibre.utils.icu import capitalize, strcmp, sort_key
from calibre.utils.date import parse_date, format_date, now, UNDEFINED_DATE
@ -1156,11 +1157,14 @@ def compile_user_function(name, doc, arg_count, eval_func):
for line in eval_func.splitlines()])
prog = '''
from calibre.utils.formatter_functions import FormatterUserFunction
from calibre.utils.formatter_functions import formatter_functions
class UserFunction(FormatterUserFunction):
''' + func
locals = {}
exec prog in locals
cls = locals['UserFunction'](name, doc, arg_count, eval_func)
locals_ = {}
if DEBUG:
print prog
exec prog in locals_
cls = locals_['UserFunction'](name, doc, arg_count, eval_func)
return cls
def load_user_template_functions(funcs):