Template language: Speedup evaluation of general program mode templates by pre-compiling them to python. If you experience errors with this optimization, you can turn it off via Preferences->Tweaks. Also other miscellaneous optimizations in evaluating templates with composite columns

2025-07-09 03:04:10 -04:00 · 2012-03-15 13:26:59 +05:30 · 2012-03-15 13:26:59 +05:30 · b2c1d46ba6
commit b2c1d46ba6
parent b781ff54d0 01b55581a7
7 changed files with 248 additions and 46 deletions
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -502,3 +502,13 @@ tweak_book_prefer = 'epub'
 # negative number to increase or decrease the font size.
 change_book_details_font_size_by = 0

+#: Compile General Program Mode templates to Python
+# Compiled general program mode templates are significantly faster than
+# interpreted templates. Setting this tweak to True causes calibre to compile
+# (in most cases) general program mode templates. Setting it to False causes
+# calibre to use the old behavior -- interpreting the templates. Set the tweak
+# to False if some compiled templates produce incorrect values.
+# Default:    compile_gpm_templates = True
+# No compile: compile_gpm_templates = False
+compile_gpm_templates = True
+
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -9,16 +9,19 @@ import copy, traceback

 from calibre import prints
 from calibre.constants import DEBUG
-from calibre.ebooks.metadata.book import SC_COPYABLE_FIELDS
-from calibre.ebooks.metadata.book import SC_FIELDS_COPY_NOT_NULL
-from calibre.ebooks.metadata.book import STANDARD_METADATA_FIELDS
-from calibre.ebooks.metadata.book import TOP_LEVEL_IDENTIFIERS
-from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
+from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
+        SC_FIELDS_COPY_NOT_NULL, STANDARD_METADATA_FIELDS,
+        TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
 from calibre.library.field_metadata import FieldMetadata
 from calibre.utils.date import isoformat, format_date
 from calibre.utils.icu import sort_key
 from calibre.utils.formatter import TemplateFormatter

+# Special sets used to optimize the performance of getting and setting
+# attributes on Metadata objects
+SIMPLE_GET = frozenset(STANDARD_METADATA_FIELDS - TOP_LEVEL_IDENTIFIERS)
+SIMPLE_SET = frozenset(SIMPLE_GET - {'identifiers'})
+
 def human_readable(size, precision=2):
    """ Convert a size in bytes into megabytes """
    return ('%.'+str(precision)+'f'+ 'MB') % ((size/(1024.*1024.)),)
@ -136,6 +139,8 @@ class Metadata(object):

    def __getattribute__(self, field):
        _data = object.__getattribute__(self, '_data')
+        if field in SIMPLE_GET:
+            return _data.get(field, None)
        if field in TOP_LEVEL_IDENTIFIERS:
            return _data.get('identifiers').get(field, None)
        if field == 'language':
@ -143,8 +148,6 @@ class Metadata(object):
                return _data.get('languages', [])[0]
            except:
                return NULL_VALUES['language']
-        if field in STANDARD_METADATA_FIELDS:
-            return _data.get(field, None)
        try:
            return object.__getattribute__(self, field)
        except AttributeError:
@ -173,7 +176,11 @@ class Metadata(object):

    def __setattr__(self, field, val, extra=None):
        _data = object.__getattribute__(self, '_data')
-        if field in TOP_LEVEL_IDENTIFIERS:
+        if field in SIMPLE_SET:
+            if val is None:
+                val = copy.copy(NULL_VALUES.get(field, None))
+            _data[field] = val
+        elif field in TOP_LEVEL_IDENTIFIERS:
            field, val = self._clean_identifier(field, val)
            identifiers = _data['identifiers']
            identifiers.pop(field, None)
@ -188,10 +195,6 @@ class Metadata(object):
            if val and val.lower() != 'und':
                langs = [val]
            _data['languages'] = langs
-        elif field in STANDARD_METADATA_FIELDS:
-            if val is None:
-                val = copy.copy(NULL_VALUES.get(field, None))
-            _data[field] = val
        elif field in _data['user_metadata'].iterkeys():
            _data['user_metadata'][field]['#value#'] = val
            _data['user_metadata'][field]['#extra#'] = extra
@ -404,9 +407,19 @@ class Metadata(object):
        '''
        if metadata is None:
            traceback.print_stack()
+            return
+
+        um = {}
+        for key, meta in metadata.iteritems():
+            m = meta.copy()
+            if '#value#' not in m:
+                if m['datatype'] == 'text' and m['is_multiple']:
+                    m['#value#'] = []
                else:
-            for key in metadata:
-                self.set_user_metadata(key, metadata[key])
+                    m['#value#'] = None
+            um[key] = m
+        _data = object.__getattribute__(self, '_data')
+        _data['user_metadata'].update(um)

    def set_user_metadata(self, field, metadata):
        '''
@ -420,9 +433,11 @@ class Metadata(object):
            if metadata is None:
                traceback.print_stack()
                return
-            m = {}
-            for k in metadata:
-                m[k] = copy.copy(metadata[k])
+            m = dict(metadata)
+            # Copying the elements should not be necessary. The objects referenced
+            # in the dict should not change. Of course, they can be replaced.
+            # for k,v in metadata.iteritems():
+            #     m[k] = copy.copy(v)
            if '#value#' not in m:
                if m['datatype'] == 'text' and m['is_multiple']:
                    m['#value#'] = []
--- a/src/calibre/library/custom_columns.py
+++ b/src/calibre/library/custom_columns.py
@ -227,6 +227,25 @@ class CustomColumns(object):
        return self.conn.get('''SELECT extra FROM %s
                                WHERE book=?'''%lt, (idx,), all=False)

+    def get_custom_and_extra(self, idx, label=None, num=None, index_is_id=False):
+        if label is not None:
+            data = self.custom_column_label_map[label]
+        if num is not None:
+            data = self.custom_column_num_map[num]
+        idx = idx if index_is_id else self.id(idx)
+        row = self.data._data[idx]
+        ans = row[self.FIELD_MAP[data['num']]]
+        if data['is_multiple'] and data['datatype'] == 'text':
+            ans = ans.split(data['multiple_seps']['cache_to_list']) if ans else []
+            if data['display'].get('sort_alpha', False):
+                ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
+        if data['datatype'] != 'series':
+            return (ans, None)
+        ign,lt = self.custom_table_names(data['num'])
+        extra = self.conn.get('''SELECT extra FROM %s
+                                 WHERE book=?'''%lt, (idx,), all=False)
+        return (ans, extra)
+
    # convenience methods for tag editing
    def get_custom_items_with_ids(self, label=None, num=None):
        if label is not None:
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -910,7 +910,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        Convenience method to return metadata as a :class:`Metadata` object.
        Note that the list of formats is not verified.
        '''
-        row = self.data._data[idx] if index_is_id else self.data[idx]
+        idx = idx if index_is_id else self.id(idx)
+        try:
+            row = self.data._data[idx]
+        except:
+            row = None
+
+        if row is None:
+            raise ValueError('No book with id: %d'%idx)
+
        fm = self.FIELD_MAP
        mi = Metadata(None, template_cache=self.formatter_template_cache)

@ -948,14 +956,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        mi.book_size   = row[fm['size']]
        mi.ondevice_col= row[fm['ondevice']]
        mi.last_modified = row[fm['last_modified']]
-        id = idx if index_is_id else self.id(idx)
        formats = row[fm['formats']]
        mi.format_metadata = {}
        if not formats:
            good_formats = None
        else:
            formats = sorted(formats.split(','))
-            mi.format_metadata = FormatMetadata(self, id, formats)
+            mi.format_metadata = FormatMetadata(self, idx, formats)
            good_formats = FormatsList(formats, mi.format_metadata)
        mi.formats = good_formats
        tags = row[fm['tags']]
@ -968,19 +975,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        if mi.series:
            mi.series_index = row[fm['series_index']]
        mi.rating = row[fm['rating']]
-        mi.set_identifiers(self.get_identifiers(id, index_is_id=True))
-        mi.application_id = id
-        mi.id = id
+        mi.set_identifiers(self.get_identifiers(idx, index_is_id=True))
+        mi.application_id = idx
+        mi.id = idx

+        mi.set_all_user_metadata(self.field_metadata.custom_field_metadata())
        for key, meta in self.field_metadata.custom_iteritems():
-            mi.set_user_metadata(key, meta)
            if meta['datatype'] == 'composite':
                mi.set(key, val=row[meta['rec_index']])
            else:
-                mi.set(key, val=self.get_custom(idx, label=meta['label'],
-                                            index_is_id=index_is_id),
-                        extra=self.get_custom_extra(idx, label=meta['label'],
-                                                    index_is_id=index_is_id))
+                val, extra = self.get_custom_and_extra(idx, label=meta['label'],
+                                                       index_is_id=True)
+                mi.set(key, val=val, extra=extra)

        user_cats = self.prefs['user_categories']
        user_cat_vals = {}
@ -999,12 +1005,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):

        if get_cover:
            if cover_as_data:
-                cdata = self.cover(id, index_is_id=True)
+                cdata = self.cover(idx, index_is_id=True)
                if cdata:
                    mi.cover_data = ('jpeg', cdata)
            else:
-                mi.cover = self.cover(id, index_is_id=True, as_path=True)
-        mi.has_cover = _('Yes') if self.has_cover(id) else ''
+                mi.cover = self.cover(idx, index_is_id=True, as_path=True)
+        mi.has_cover = _('Yes') if self.has_cover(idx) else ''
        return mi

    def has_book(self, mi):
--- a/src/calibre/library/field_metadata.py
+++ b/src/calibre/library/field_metadata.py
@ -388,6 +388,7 @@ class FieldMetadata(dict):
    def __init__(self):
        self._field_metadata = copy.deepcopy(self._field_metadata_prototype)
        self._tb_cats = OrderedDict()
+        self._tb_custom_fields = {}
        self._search_term_map = {}
        self.custom_label_to_key_map = {}
        for k,v in self._field_metadata:
@ -477,10 +478,8 @@ class FieldMetadata(dict):
            yield (key, self._tb_cats[key])

    def custom_iteritems(self):
-        for key in self._tb_cats:
-            fm = self._tb_cats[key]
-            if fm['is_custom']:
-                yield (key, self._tb_cats[key])
+        for key, meta in self._tb_custom_fields.iteritems():
+            yield (key, meta)

    def items(self):
        return list(self.iteritems())
@ -516,6 +515,8 @@ class FieldMetadata(dict):
        return l

    def custom_field_metadata(self, include_composites=True):
+        if include_composites:
+            return self._tb_custom_fields
        l = {}
        for k in self.custom_field_keys(include_composites):
            l[k] = self._tb_cats[k]
@ -537,6 +538,7 @@ class FieldMetadata(dict):
                             'is_custom':True,     'is_category':is_category,
                             'link_column':'value','category_sort':'value',
                             'is_csp' : is_csp,     'is_editable': is_editable,}
+        self._tb_custom_fields[key] = self._tb_cats[key]
        self._add_search_terms_to_map(key, [key])
        self.custom_label_to_key_map[label] = key
        if datatype == 'series':
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@ -11,7 +11,8 @@ __docformat__ = 'restructuredtext en'
 import re, string, traceback

 from calibre.constants import DEBUG
-from calibre.utils.formatter_functions import formatter_functions
+from calibre.utils.formatter_functions import formatter_functions, compile_user_function
+from calibre.utils.config import tweaks

 class _Parser(object):
    LEX_OP  = 1
@ -172,6 +173,130 @@ class _Parser(object):
            self.error(_('expression is not function or constant'))


+class _CompileParser(_Parser):
+    def __init__(self, val, prog, parent, compile_text):
+        self.lex_pos = 0
+        self.prog = prog[0]
+        self.prog_len = len(self.prog)
+        if prog[1] != '':
+            self.error(_('failed to scan program. Invalid input {0}').format(prog[1]))
+        self.parent = parent
+        parent.locals = {'$':val}
+        self.parent_kwargs = parent.kwargs
+        self.parent_book = parent.book
+        self.parent_locals = parent.locals
+        self.compile_text = compile_text
+
+    def program(self):
+        if self.compile_text:
+            t = self.compile_text
+            self.compile_text = '\n'
+        self.max_level = 0
+        val = self.statement()
+        if not self.token_is_eof():
+            self.error(_('syntax error - program ends before EOF'))
+        if self.compile_text:
+            t += "\targs=[[]"
+            for i in range(0, self.max_level):
+                t += ", None"
+            t += ']'
+            self.compile_text = t + self.compile_text + "\treturn args[0][0]\n"
+        return val
+
+    def statement(self, level=0):
+        while True:
+            val = self.expr(level)
+            if self.token_is_eof():
+                return val
+            if not self.token_op_is_a_semicolon():
+                return val
+            if self.compile_text:
+                self.compile_text += "\targs[%d] = list()\n"%(level,)
+            self.consume()
+            if self.token_is_eof():
+                return val
+
+    def expr(self, level):
+        if self.compile_text:
+            self.max_level = max(level, self.max_level)
+
+        if self.token_is_id():
+            funcs = formatter_functions().get_functions()
+            # We have an identifier. Determine if it is a function
+            id = self.token()
+            if not self.token_op_is_a_lparen():
+                if self.token_op_is_a_equals():
+                    # classic assignment statement
+                    self.consume()
+                    cls = funcs['assign']
+                    if self.compile_text:
+                        self.compile_text += '\targs[%d] = list()\n'%(level+1,)
+                    val = cls.eval_(self.parent, self.parent_kwargs,
+                                    self.parent_book, self.parent_locals, id, self.expr(level+1))
+                    if self.compile_text:
+                        self.compile_text += "\tlocals['%s'] = args[%d][0]\n"%(id, level+1)
+                        self.compile_text += "\targs[%d].append(args[%d][0])\n"%(level, level+1)
+                    return val
+                val = self.parent.locals.get(id, None)
+                if val is None:
+                    self.error(_('Unknown identifier ') + id)
+                if self.compile_text:
+                    self.compile_text += "\targs[%d].append(locals.get('%s'))\n"%(level, id)
+                return val
+            # We have a function.
+            # Check if it is a known one. We do this here so error reporting is
+            # better, as it can identify the tokens near the problem.
+            if id not in funcs:
+                self.error(_('unknown function {0}').format(id))
+
+            # Eat the paren
+            self.consume()
+            args = list()
+            if self.compile_text:
+                self.compile_text += '\targs[%d] = list()\n'%(level+1, )
+            while not self.token_op_is_a_rparen():
+                if id == 'assign' and len(args) == 0:
+                    # Must handle the lvalue semantics of the assign function.
+                    # The first argument is the name of the destination, not
+                    # the value.
+                    if not self.token_is_id():
+                        self.error('assign requires the first parameter be an id')
+                    t = self.token()
+                    args.append(t)
+                    if self.compile_text:
+                        self.compile_text += "\targs[%d].append('%s')\n"%(level+1, t)
+                else:
+                    # evaluate the argument (recursive call)
+                    args.append(self.statement(level=level+1))
+                if not self.token_op_is_a_comma():
+                    break
+                self.consume()
+            if self.token() != ')':
+                self.error(_('missing closing parenthesis'))
+
+            # Evaluate the function
+            cls = funcs[id]
+            if cls.arg_count != -1 and len(args) != cls.arg_count:
+                self.error('incorrect number of arguments for function {}'.format(id))
+            if self.compile_text:
+                self.compile_text += (
+                    "\targs[%d].append(self.__funcs__['%s']"
+                    ".evaluate(formatter, kwargs, book, locals, *args[%d]))\n")%(level, id, level+1)
+            return cls.eval_(self.parent, self.parent_kwargs,
+                            self.parent_book, self.parent_locals, *args)
+        elif self.token_is_constant():
+            # String or number
+            v = self.token()
+            if self.compile_text:
+                tv = v.replace("\\", "\\\\")
+                tv = tv.replace("'", "\\'")
+                self.compile_text += "\targs[%d].append('%s')\n"%(level, tv)
+            return v
+        else:
+            self.error(_('expression is not function or constant'))
+
+compile_counter = 0
+
 class TemplateFormatter(string.Formatter):
    '''
    Provides a format function that substitutes '' for any missing value
@ -249,6 +374,26 @@ class TemplateFormatter(string.Formatter):
        # keep a cache of the lex'ed program under the theory that re-lexing
        # is much more expensive than the cache lookup. This is certainly true
        # for more than a few tokens, but it isn't clear for simple programs.
+        if tweaks['compile_gpm_templates']:
+            if column_name is not None and self.template_cache is not None:
+                lprog = self.template_cache.get(column_name, None)
+                if lprog:
+                    return lprog.evaluate(self, self.kwargs, self.book, self.locals)
+                lprog = self.lex_scanner.scan(prog)
+                compile_text = ('__funcs__ = formatter_functions().get_functions()\n'
+                                'def evaluate(self, formatter, kwargs, book, locals):\n'
+                                )
+            else:
+                lprog = self.lex_scanner.scan(prog)
+                compile_text = None
+            parser = _CompileParser(val, lprog, self, compile_text)
+            val = parser.program()
+            if parser.compile_text:
+                global compile_counter
+                compile_counter += 1
+                f = compile_user_function("__A" + str(compile_counter), 'doc', -1, parser.compile_text)
+                self.template_cache[column_name] = f
+        else:
            if column_name is not None and self.template_cache is not None:
                lprog = self.template_cache.get(column_name, None)
                if not lprog:
@ -257,7 +402,8 @@ class TemplateFormatter(string.Formatter):
            else:
                lprog = self.lex_scanner.scan(prog)
            parser = _Parser(val, lprog, self)
-        return parser.program()
+            val = parser.program()
+        return val

    ################## Override parent classes methods #####################

--- a/src/calibre/utils/formatter_functions.py
+++ b/src/calibre/utils/formatter_functions.py
@ -11,6 +11,7 @@ __docformat__ = 'restructuredtext en'
 import inspect, re, traceback

 from calibre import human_readable
+from calibre.constants import DEBUG
 from calibre.utils.titlecase import titlecase
 from calibre.utils.icu import capitalize, strcmp, sort_key
 from calibre.utils.date import parse_date, format_date, now, UNDEFINED_DATE
@ -1156,11 +1157,14 @@ def compile_user_function(name, doc, arg_count, eval_func):
                                   for line in eval_func.splitlines()])
    prog = '''
 from calibre.utils.formatter_functions import FormatterUserFunction
+from calibre.utils.formatter_functions import formatter_functions
 class UserFunction(FormatterUserFunction):
 ''' + func
-    locals = {}
-    exec prog in locals
-    cls = locals['UserFunction'](name, doc, arg_count, eval_func)
+    locals_ = {}
+    if DEBUG:
+        print prog
+    exec prog in locals_
+    cls = locals_['UserFunction'](name, doc, arg_count, eval_func)
    return cls

 def load_user_template_functions(funcs):