mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Template language: Speedup evaluation of general program mode templates by pre-compiling them to python. If you experience errors with this optimization, you can turn it off via Preferences->Tweaks. Also other miscellaneous optimizations in evaluating templates with composite columns
This commit is contained in:
commit
b2c1d46ba6
@ -502,3 +502,13 @@ tweak_book_prefer = 'epub'
|
||||
# negative number to increase or decrease the font size.
|
||||
change_book_details_font_size_by = 0
|
||||
|
||||
#: Compile General Program Mode templates to Python
|
||||
# Compiled general program mode templates are significantly faster than
|
||||
# interpreted templates. Setting this tweak to True causes calibre to compile
|
||||
# (in most cases) general program mode templates. Setting it to False causes
|
||||
# calibre to use the old behavior -- interpreting the templates. Set the tweak
|
||||
# to False if some compiled templates produce incorrect values.
|
||||
# Default: compile_gpm_templates = True
|
||||
# No compile: compile_gpm_templates = False
|
||||
compile_gpm_templates = True
|
||||
|
||||
|
@ -9,16 +9,19 @@ import copy, traceback
|
||||
|
||||
from calibre import prints
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.ebooks.metadata.book import SC_COPYABLE_FIELDS
|
||||
from calibre.ebooks.metadata.book import SC_FIELDS_COPY_NOT_NULL
|
||||
from calibre.ebooks.metadata.book import STANDARD_METADATA_FIELDS
|
||||
from calibre.ebooks.metadata.book import TOP_LEVEL_IDENTIFIERS
|
||||
from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
|
||||
from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
|
||||
SC_FIELDS_COPY_NOT_NULL, STANDARD_METADATA_FIELDS,
|
||||
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
|
||||
from calibre.library.field_metadata import FieldMetadata
|
||||
from calibre.utils.date import isoformat, format_date
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.formatter import TemplateFormatter
|
||||
|
||||
# Special sets used to optimize the performance of getting and setting
|
||||
# attributes on Metadata objects
|
||||
SIMPLE_GET = frozenset(STANDARD_METADATA_FIELDS - TOP_LEVEL_IDENTIFIERS)
|
||||
SIMPLE_SET = frozenset(SIMPLE_GET - {'identifiers'})
|
||||
|
||||
def human_readable(size, precision=2):
|
||||
""" Convert a size in bytes into megabytes """
|
||||
return ('%.'+str(precision)+'f'+ 'MB') % ((size/(1024.*1024.)),)
|
||||
@ -136,6 +139,8 @@ class Metadata(object):
|
||||
|
||||
def __getattribute__(self, field):
|
||||
_data = object.__getattribute__(self, '_data')
|
||||
if field in SIMPLE_GET:
|
||||
return _data.get(field, None)
|
||||
if field in TOP_LEVEL_IDENTIFIERS:
|
||||
return _data.get('identifiers').get(field, None)
|
||||
if field == 'language':
|
||||
@ -143,8 +148,6 @@ class Metadata(object):
|
||||
return _data.get('languages', [])[0]
|
||||
except:
|
||||
return NULL_VALUES['language']
|
||||
if field in STANDARD_METADATA_FIELDS:
|
||||
return _data.get(field, None)
|
||||
try:
|
||||
return object.__getattribute__(self, field)
|
||||
except AttributeError:
|
||||
@ -173,7 +176,11 @@ class Metadata(object):
|
||||
|
||||
def __setattr__(self, field, val, extra=None):
|
||||
_data = object.__getattribute__(self, '_data')
|
||||
if field in TOP_LEVEL_IDENTIFIERS:
|
||||
if field in SIMPLE_SET:
|
||||
if val is None:
|
||||
val = copy.copy(NULL_VALUES.get(field, None))
|
||||
_data[field] = val
|
||||
elif field in TOP_LEVEL_IDENTIFIERS:
|
||||
field, val = self._clean_identifier(field, val)
|
||||
identifiers = _data['identifiers']
|
||||
identifiers.pop(field, None)
|
||||
@ -188,10 +195,6 @@ class Metadata(object):
|
||||
if val and val.lower() != 'und':
|
||||
langs = [val]
|
||||
_data['languages'] = langs
|
||||
elif field in STANDARD_METADATA_FIELDS:
|
||||
if val is None:
|
||||
val = copy.copy(NULL_VALUES.get(field, None))
|
||||
_data[field] = val
|
||||
elif field in _data['user_metadata'].iterkeys():
|
||||
_data['user_metadata'][field]['#value#'] = val
|
||||
_data['user_metadata'][field]['#extra#'] = extra
|
||||
@ -404,9 +407,19 @@ class Metadata(object):
|
||||
'''
|
||||
if metadata is None:
|
||||
traceback.print_stack()
|
||||
return
|
||||
|
||||
um = {}
|
||||
for key, meta in metadata.iteritems():
|
||||
m = meta.copy()
|
||||
if '#value#' not in m:
|
||||
if m['datatype'] == 'text' and m['is_multiple']:
|
||||
m['#value#'] = []
|
||||
else:
|
||||
for key in metadata:
|
||||
self.set_user_metadata(key, metadata[key])
|
||||
m['#value#'] = None
|
||||
um[key] = m
|
||||
_data = object.__getattribute__(self, '_data')
|
||||
_data['user_metadata'].update(um)
|
||||
|
||||
def set_user_metadata(self, field, metadata):
|
||||
'''
|
||||
@ -420,9 +433,11 @@ class Metadata(object):
|
||||
if metadata is None:
|
||||
traceback.print_stack()
|
||||
return
|
||||
m = {}
|
||||
for k in metadata:
|
||||
m[k] = copy.copy(metadata[k])
|
||||
m = dict(metadata)
|
||||
# Copying the elements should not be necessary. The objects referenced
|
||||
# in the dict should not change. Of course, they can be replaced.
|
||||
# for k,v in metadata.iteritems():
|
||||
# m[k] = copy.copy(v)
|
||||
if '#value#' not in m:
|
||||
if m['datatype'] == 'text' and m['is_multiple']:
|
||||
m['#value#'] = []
|
||||
|
@ -227,6 +227,25 @@ class CustomColumns(object):
|
||||
return self.conn.get('''SELECT extra FROM %s
|
||||
WHERE book=?'''%lt, (idx,), all=False)
|
||||
|
||||
def get_custom_and_extra(self, idx, label=None, num=None, index_is_id=False):
|
||||
if label is not None:
|
||||
data = self.custom_column_label_map[label]
|
||||
if num is not None:
|
||||
data = self.custom_column_num_map[num]
|
||||
idx = idx if index_is_id else self.id(idx)
|
||||
row = self.data._data[idx]
|
||||
ans = row[self.FIELD_MAP[data['num']]]
|
||||
if data['is_multiple'] and data['datatype'] == 'text':
|
||||
ans = ans.split(data['multiple_seps']['cache_to_list']) if ans else []
|
||||
if data['display'].get('sort_alpha', False):
|
||||
ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
|
||||
if data['datatype'] != 'series':
|
||||
return (ans, None)
|
||||
ign,lt = self.custom_table_names(data['num'])
|
||||
extra = self.conn.get('''SELECT extra FROM %s
|
||||
WHERE book=?'''%lt, (idx,), all=False)
|
||||
return (ans, extra)
|
||||
|
||||
# convenience methods for tag editing
|
||||
def get_custom_items_with_ids(self, label=None, num=None):
|
||||
if label is not None:
|
||||
|
@ -910,7 +910,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
Convenience method to return metadata as a :class:`Metadata` object.
|
||||
Note that the list of formats is not verified.
|
||||
'''
|
||||
row = self.data._data[idx] if index_is_id else self.data[idx]
|
||||
idx = idx if index_is_id else self.id(idx)
|
||||
try:
|
||||
row = self.data._data[idx]
|
||||
except:
|
||||
row = None
|
||||
|
||||
if row is None:
|
||||
raise ValueError('No book with id: %d'%idx)
|
||||
|
||||
fm = self.FIELD_MAP
|
||||
mi = Metadata(None, template_cache=self.formatter_template_cache)
|
||||
|
||||
@ -948,14 +956,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
mi.book_size = row[fm['size']]
|
||||
mi.ondevice_col= row[fm['ondevice']]
|
||||
mi.last_modified = row[fm['last_modified']]
|
||||
id = idx if index_is_id else self.id(idx)
|
||||
formats = row[fm['formats']]
|
||||
mi.format_metadata = {}
|
||||
if not formats:
|
||||
good_formats = None
|
||||
else:
|
||||
formats = sorted(formats.split(','))
|
||||
mi.format_metadata = FormatMetadata(self, id, formats)
|
||||
mi.format_metadata = FormatMetadata(self, idx, formats)
|
||||
good_formats = FormatsList(formats, mi.format_metadata)
|
||||
mi.formats = good_formats
|
||||
tags = row[fm['tags']]
|
||||
@ -968,19 +975,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
if mi.series:
|
||||
mi.series_index = row[fm['series_index']]
|
||||
mi.rating = row[fm['rating']]
|
||||
mi.set_identifiers(self.get_identifiers(id, index_is_id=True))
|
||||
mi.application_id = id
|
||||
mi.id = id
|
||||
mi.set_identifiers(self.get_identifiers(idx, index_is_id=True))
|
||||
mi.application_id = idx
|
||||
mi.id = idx
|
||||
|
||||
mi.set_all_user_metadata(self.field_metadata.custom_field_metadata())
|
||||
for key, meta in self.field_metadata.custom_iteritems():
|
||||
mi.set_user_metadata(key, meta)
|
||||
if meta['datatype'] == 'composite':
|
||||
mi.set(key, val=row[meta['rec_index']])
|
||||
else:
|
||||
mi.set(key, val=self.get_custom(idx, label=meta['label'],
|
||||
index_is_id=index_is_id),
|
||||
extra=self.get_custom_extra(idx, label=meta['label'],
|
||||
index_is_id=index_is_id))
|
||||
val, extra = self.get_custom_and_extra(idx, label=meta['label'],
|
||||
index_is_id=True)
|
||||
mi.set(key, val=val, extra=extra)
|
||||
|
||||
user_cats = self.prefs['user_categories']
|
||||
user_cat_vals = {}
|
||||
@ -999,12 +1005,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
|
||||
if get_cover:
|
||||
if cover_as_data:
|
||||
cdata = self.cover(id, index_is_id=True)
|
||||
cdata = self.cover(idx, index_is_id=True)
|
||||
if cdata:
|
||||
mi.cover_data = ('jpeg', cdata)
|
||||
else:
|
||||
mi.cover = self.cover(id, index_is_id=True, as_path=True)
|
||||
mi.has_cover = _('Yes') if self.has_cover(id) else ''
|
||||
mi.cover = self.cover(idx, index_is_id=True, as_path=True)
|
||||
mi.has_cover = _('Yes') if self.has_cover(idx) else ''
|
||||
return mi
|
||||
|
||||
def has_book(self, mi):
|
||||
|
@ -388,6 +388,7 @@ class FieldMetadata(dict):
|
||||
def __init__(self):
|
||||
self._field_metadata = copy.deepcopy(self._field_metadata_prototype)
|
||||
self._tb_cats = OrderedDict()
|
||||
self._tb_custom_fields = {}
|
||||
self._search_term_map = {}
|
||||
self.custom_label_to_key_map = {}
|
||||
for k,v in self._field_metadata:
|
||||
@ -477,10 +478,8 @@ class FieldMetadata(dict):
|
||||
yield (key, self._tb_cats[key])
|
||||
|
||||
def custom_iteritems(self):
|
||||
for key in self._tb_cats:
|
||||
fm = self._tb_cats[key]
|
||||
if fm['is_custom']:
|
||||
yield (key, self._tb_cats[key])
|
||||
for key, meta in self._tb_custom_fields.iteritems():
|
||||
yield (key, meta)
|
||||
|
||||
def items(self):
|
||||
return list(self.iteritems())
|
||||
@ -516,6 +515,8 @@ class FieldMetadata(dict):
|
||||
return l
|
||||
|
||||
def custom_field_metadata(self, include_composites=True):
|
||||
if include_composites:
|
||||
return self._tb_custom_fields
|
||||
l = {}
|
||||
for k in self.custom_field_keys(include_composites):
|
||||
l[k] = self._tb_cats[k]
|
||||
@ -537,6 +538,7 @@ class FieldMetadata(dict):
|
||||
'is_custom':True, 'is_category':is_category,
|
||||
'link_column':'value','category_sort':'value',
|
||||
'is_csp' : is_csp, 'is_editable': is_editable,}
|
||||
self._tb_custom_fields[key] = self._tb_cats[key]
|
||||
self._add_search_terms_to_map(key, [key])
|
||||
self.custom_label_to_key_map[label] = key
|
||||
if datatype == 'series':
|
||||
|
@ -11,7 +11,8 @@ __docformat__ = 'restructuredtext en'
|
||||
import re, string, traceback
|
||||
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.utils.formatter_functions import formatter_functions
|
||||
from calibre.utils.formatter_functions import formatter_functions, compile_user_function
|
||||
from calibre.utils.config import tweaks
|
||||
|
||||
class _Parser(object):
|
||||
LEX_OP = 1
|
||||
@ -172,6 +173,130 @@ class _Parser(object):
|
||||
self.error(_('expression is not function or constant'))
|
||||
|
||||
|
||||
class _CompileParser(_Parser):
|
||||
def __init__(self, val, prog, parent, compile_text):
|
||||
self.lex_pos = 0
|
||||
self.prog = prog[0]
|
||||
self.prog_len = len(self.prog)
|
||||
if prog[1] != '':
|
||||
self.error(_('failed to scan program. Invalid input {0}').format(prog[1]))
|
||||
self.parent = parent
|
||||
parent.locals = {'$':val}
|
||||
self.parent_kwargs = parent.kwargs
|
||||
self.parent_book = parent.book
|
||||
self.parent_locals = parent.locals
|
||||
self.compile_text = compile_text
|
||||
|
||||
def program(self):
|
||||
if self.compile_text:
|
||||
t = self.compile_text
|
||||
self.compile_text = '\n'
|
||||
self.max_level = 0
|
||||
val = self.statement()
|
||||
if not self.token_is_eof():
|
||||
self.error(_('syntax error - program ends before EOF'))
|
||||
if self.compile_text:
|
||||
t += "\targs=[[]"
|
||||
for i in range(0, self.max_level):
|
||||
t += ", None"
|
||||
t += ']'
|
||||
self.compile_text = t + self.compile_text + "\treturn args[0][0]\n"
|
||||
return val
|
||||
|
||||
def statement(self, level=0):
|
||||
while True:
|
||||
val = self.expr(level)
|
||||
if self.token_is_eof():
|
||||
return val
|
||||
if not self.token_op_is_a_semicolon():
|
||||
return val
|
||||
if self.compile_text:
|
||||
self.compile_text += "\targs[%d] = list()\n"%(level,)
|
||||
self.consume()
|
||||
if self.token_is_eof():
|
||||
return val
|
||||
|
||||
def expr(self, level):
|
||||
if self.compile_text:
|
||||
self.max_level = max(level, self.max_level)
|
||||
|
||||
if self.token_is_id():
|
||||
funcs = formatter_functions().get_functions()
|
||||
# We have an identifier. Determine if it is a function
|
||||
id = self.token()
|
||||
if not self.token_op_is_a_lparen():
|
||||
if self.token_op_is_a_equals():
|
||||
# classic assignment statement
|
||||
self.consume()
|
||||
cls = funcs['assign']
|
||||
if self.compile_text:
|
||||
self.compile_text += '\targs[%d] = list()\n'%(level+1,)
|
||||
val = cls.eval_(self.parent, self.parent_kwargs,
|
||||
self.parent_book, self.parent_locals, id, self.expr(level+1))
|
||||
if self.compile_text:
|
||||
self.compile_text += "\tlocals['%s'] = args[%d][0]\n"%(id, level+1)
|
||||
self.compile_text += "\targs[%d].append(args[%d][0])\n"%(level, level+1)
|
||||
return val
|
||||
val = self.parent.locals.get(id, None)
|
||||
if val is None:
|
||||
self.error(_('Unknown identifier ') + id)
|
||||
if self.compile_text:
|
||||
self.compile_text += "\targs[%d].append(locals.get('%s'))\n"%(level, id)
|
||||
return val
|
||||
# We have a function.
|
||||
# Check if it is a known one. We do this here so error reporting is
|
||||
# better, as it can identify the tokens near the problem.
|
||||
if id not in funcs:
|
||||
self.error(_('unknown function {0}').format(id))
|
||||
|
||||
# Eat the paren
|
||||
self.consume()
|
||||
args = list()
|
||||
if self.compile_text:
|
||||
self.compile_text += '\targs[%d] = list()\n'%(level+1, )
|
||||
while not self.token_op_is_a_rparen():
|
||||
if id == 'assign' and len(args) == 0:
|
||||
# Must handle the lvalue semantics of the assign function.
|
||||
# The first argument is the name of the destination, not
|
||||
# the value.
|
||||
if not self.token_is_id():
|
||||
self.error('assign requires the first parameter be an id')
|
||||
t = self.token()
|
||||
args.append(t)
|
||||
if self.compile_text:
|
||||
self.compile_text += "\targs[%d].append('%s')\n"%(level+1, t)
|
||||
else:
|
||||
# evaluate the argument (recursive call)
|
||||
args.append(self.statement(level=level+1))
|
||||
if not self.token_op_is_a_comma():
|
||||
break
|
||||
self.consume()
|
||||
if self.token() != ')':
|
||||
self.error(_('missing closing parenthesis'))
|
||||
|
||||
# Evaluate the function
|
||||
cls = funcs[id]
|
||||
if cls.arg_count != -1 and len(args) != cls.arg_count:
|
||||
self.error('incorrect number of arguments for function {}'.format(id))
|
||||
if self.compile_text:
|
||||
self.compile_text += (
|
||||
"\targs[%d].append(self.__funcs__['%s']"
|
||||
".evaluate(formatter, kwargs, book, locals, *args[%d]))\n")%(level, id, level+1)
|
||||
return cls.eval_(self.parent, self.parent_kwargs,
|
||||
self.parent_book, self.parent_locals, *args)
|
||||
elif self.token_is_constant():
|
||||
# String or number
|
||||
v = self.token()
|
||||
if self.compile_text:
|
||||
tv = v.replace("\\", "\\\\")
|
||||
tv = tv.replace("'", "\\'")
|
||||
self.compile_text += "\targs[%d].append('%s')\n"%(level, tv)
|
||||
return v
|
||||
else:
|
||||
self.error(_('expression is not function or constant'))
|
||||
|
||||
compile_counter = 0
|
||||
|
||||
class TemplateFormatter(string.Formatter):
|
||||
'''
|
||||
Provides a format function that substitutes '' for any missing value
|
||||
@ -249,6 +374,26 @@ class TemplateFormatter(string.Formatter):
|
||||
# keep a cache of the lex'ed program under the theory that re-lexing
|
||||
# is much more expensive than the cache lookup. This is certainly true
|
||||
# for more than a few tokens, but it isn't clear for simple programs.
|
||||
if tweaks['compile_gpm_templates']:
|
||||
if column_name is not None and self.template_cache is not None:
|
||||
lprog = self.template_cache.get(column_name, None)
|
||||
if lprog:
|
||||
return lprog.evaluate(self, self.kwargs, self.book, self.locals)
|
||||
lprog = self.lex_scanner.scan(prog)
|
||||
compile_text = ('__funcs__ = formatter_functions().get_functions()\n'
|
||||
'def evaluate(self, formatter, kwargs, book, locals):\n'
|
||||
)
|
||||
else:
|
||||
lprog = self.lex_scanner.scan(prog)
|
||||
compile_text = None
|
||||
parser = _CompileParser(val, lprog, self, compile_text)
|
||||
val = parser.program()
|
||||
if parser.compile_text:
|
||||
global compile_counter
|
||||
compile_counter += 1
|
||||
f = compile_user_function("__A" + str(compile_counter), 'doc', -1, parser.compile_text)
|
||||
self.template_cache[column_name] = f
|
||||
else:
|
||||
if column_name is not None and self.template_cache is not None:
|
||||
lprog = self.template_cache.get(column_name, None)
|
||||
if not lprog:
|
||||
@ -257,7 +402,8 @@ class TemplateFormatter(string.Formatter):
|
||||
else:
|
||||
lprog = self.lex_scanner.scan(prog)
|
||||
parser = _Parser(val, lprog, self)
|
||||
return parser.program()
|
||||
val = parser.program()
|
||||
return val
|
||||
|
||||
################## Override parent classes methods #####################
|
||||
|
||||
|
@ -11,6 +11,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import inspect, re, traceback
|
||||
|
||||
from calibre import human_readable
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.utils.titlecase import titlecase
|
||||
from calibre.utils.icu import capitalize, strcmp, sort_key
|
||||
from calibre.utils.date import parse_date, format_date, now, UNDEFINED_DATE
|
||||
@ -1156,11 +1157,14 @@ def compile_user_function(name, doc, arg_count, eval_func):
|
||||
for line in eval_func.splitlines()])
|
||||
prog = '''
|
||||
from calibre.utils.formatter_functions import FormatterUserFunction
|
||||
from calibre.utils.formatter_functions import formatter_functions
|
||||
class UserFunction(FormatterUserFunction):
|
||||
''' + func
|
||||
locals = {}
|
||||
exec prog in locals
|
||||
cls = locals['UserFunction'](name, doc, arg_count, eval_func)
|
||||
locals_ = {}
|
||||
if DEBUG:
|
||||
print prog
|
||||
exec prog in locals_
|
||||
cls = locals_['UserFunction'](name, doc, arg_count, eval_func)
|
||||
return cls
|
||||
|
||||
def load_user_template_functions(funcs):
|
||||
|
Loading…
x
Reference in New Issue
Block a user