From 06904f92a5bbdc38add27b0078a054b365f47378 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 14 Mar 2012 15:34:40 +0100
Subject: [PATCH 1/3] Compile General Program Mode templates to python,
 controlled by the tweak "compile_gpm_templates" that defaults to True.

Several performance optimizations related to custom columns and field metadata.
---
 resources/default_tweaks.py                  |  10 ++
 src/calibre/ebooks/metadata/book/__init__.py |   6 +
 src/calibre/ebooks/metadata/book/base.py     |  39 +++--
 src/calibre/library/custom_columns.py        |  19 +++
 src/calibre/library/database2.py             |  14 +-
 src/calibre/library/field_metadata.py        |  10 +-
 src/calibre/utils/formatter.py               | 160 ++++++++++++++++++-
 src/calibre/utils/formatter_functions.py     |  10 +-
 8 files changed, 234 insertions(+), 34 deletions(-)

diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index ee2e07f412..33561f50b9 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -496,3 +496,13 @@ gui_view_history_size = 15
 # prefer HTMLZ to EPUB for tweaking, change this to 'htmlz'
 tweak_book_prefer = 'epub'
 
+#: Compile General Program Mode templates to Python
+
+# Compiled general program mode templates are significantly faster than
+# interpreted templates. Setting this tweak to True causes calibre to compile
+# (in most cases) general program mode templates. Setting it to False causes
+# calibre to use the old behavior -- interpreting the templates. Set the tweak
+# to False if some compiled templates produce incorrect values.
+# Default:    compile_gpm_templates = True
+# No compile: compile_gpm_templates = False
+compile_gpm_templates = True
diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py
index 38a824374c..b7ab91c26f 100644
--- a/src/calibre/ebooks/metadata/book/__init__.py
+++ b/src/calibre/ebooks/metadata/book/__init__.py
@@ -131,3 +131,9 @@ SERIALIZABLE_FIELDS =      SOCIAL_METADATA_FIELDS.union(
                            frozenset(['device_collections', 'formats',
                                'cover_data'])
                            # these are rebuilt when needed
+
+# A special set used to optimize the performance of Metadata.__setattr__
+ATTR_NORMAL_FIELDS =    frozenset(STANDARD_METADATA_FIELDS -
+                                     TOP_LEVEL_IDENTIFIERS -
+                                     set('identifiers') -
+                                     set('languages'))
diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index 0312a7db6a..71ebde8603 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -14,6 +14,7 @@ from calibre.ebooks.metadata.book import SC_FIELDS_COPY_NOT_NULL
 from calibre.ebooks.metadata.book import STANDARD_METADATA_FIELDS
 from calibre.ebooks.metadata.book import TOP_LEVEL_IDENTIFIERS
 from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
+from calibre.ebooks.metadata.book import ATTR_NORMAL_FIELDS
 from calibre.library.field_metadata import FieldMetadata
 from calibre.utils.date import isoformat, format_date
 from calibre.utils.icu import sort_key
@@ -136,6 +137,8 @@ class Metadata(object):
 
     def __getattribute__(self, field):
         _data = object.__getattribute__(self, '_data')
+        if field in ATTR_NORMAL_FIELDS:
+            return _data.get(field, None)
         if field in TOP_LEVEL_IDENTIFIERS:
             return _data.get('identifiers').get(field, None)
         if field == 'language':
@@ -143,8 +146,6 @@ class Metadata(object):
                 return _data.get('languages', [])[0]
             except:
                 return NULL_VALUES['language']
-        if field in STANDARD_METADATA_FIELDS:
-            return _data.get(field, None)
         try:
             return object.__getattribute__(self, field)
         except AttributeError:
@@ -173,7 +174,11 @@ class Metadata(object):
 
     def __setattr__(self, field, val, extra=None):
         _data = object.__getattribute__(self, '_data')
-        if field in TOP_LEVEL_IDENTIFIERS:
+        if field in ATTR_NORMAL_FIELDS:
+            if val is None:
+                val = copy.copy(NULL_VALUES.get(field, None))
+            _data[field] = val
+        elif field in TOP_LEVEL_IDENTIFIERS:
             field, val = self._clean_identifier(field, val)
             identifiers = _data['identifiers']
             identifiers.pop(field, None)
@@ -188,10 +193,6 @@ class Metadata(object):
             if val and val.lower() != 'und':
                 langs = [val]
             _data['languages'] = langs
-        elif field in STANDARD_METADATA_FIELDS:
-            if val is None:
-                val = copy.copy(NULL_VALUES.get(field, None))
-            _data[field] = val
         elif field in _data['user_metadata'].iterkeys():
             _data['user_metadata'][field]['#value#'] = val
             _data['user_metadata'][field]['#extra#'] = extra
@@ -404,9 +405,19 @@ class Metadata(object):
         '''
         if metadata is None:
             traceback.print_stack()
-        else:
-            for key in metadata:
-                self.set_user_metadata(key, metadata[key])
+            return
+
+        um = {}
+        for key, meta in metadata.iteritems():
+            m = meta.copy()
+            if '#value#' not in m:
+                if m['datatype'] == 'text' and m['is_multiple']:
+                    m['#value#'] = []
+                else:
+                    m['#value#'] = None
+            um[key] = m
+        _data = object.__getattribute__(self, '_data')
+        _data['user_metadata'].update(um)
 
     def set_user_metadata(self, field, metadata):
         '''
@@ -420,9 +431,11 @@ class Metadata(object):
             if metadata is None:
                 traceback.print_stack()
                 return
-            m = {}
-            for k in metadata:
-                m[k] = copy.copy(metadata[k])
+            m = dict(metadata)
+            # Copying the elements should not be necessary. The objects referenced
+            # in the dict should not change. Of course, they can be replaced.
+            # for k,v in metadata.iteritems():
+            #     m[k] = copy.copy(v)
             if '#value#' not in m:
                 if m['datatype'] == 'text' and m['is_multiple']:
                     m['#value#'] = []
diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py
index 4c5ade37b0..453f03f38a 100644
--- a/src/calibre/library/custom_columns.py
+++ b/src/calibre/library/custom_columns.py
@@ -227,6 +227,25 @@ class CustomColumns(object):
         return self.conn.get('''SELECT extra FROM %s
                                 WHERE book=?'''%lt, (idx,), all=False)
 
+    def get_custom_and_extra(self, idx, label=None, num=None, index_is_id=False):
+        if label is not None:
+            data = self.custom_column_label_map[label]
+        if num is not None:
+            data = self.custom_column_num_map[num]
+        idx = idx if index_is_id else self.id(idx)
+        row = self.data._data[idx]
+        ans = row[self.FIELD_MAP[data['num']]]
+        if data['is_multiple'] and data['datatype'] == 'text':
+            ans = ans.split(data['multiple_seps']['cache_to_list']) if ans else []
+            if data['display'].get('sort_alpha', False):
+                ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
+        if data['datatype'] != 'series':
+            return (ans, None)
+        ign,lt = self.custom_table_names(data['num'])
+        extra = self.conn.get('''SELECT extra FROM %s
+                                 WHERE book=?'''%lt, (idx,), all=False)
+        return (ans, extra)
+
     # convenience methods for tag editing
     def get_custom_items_with_ids(self, label=None, num=None):
         if label is not None:
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index d3475ffa75..2560f2e77f 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -909,7 +909,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         Convenience method to return metadata as a :class:`Metadata` object.
         Note that the list of formats is not verified.
         '''
-        row = self.data._data[idx] if index_is_id else self.data[idx]
+        idx = idx if index_is_id else self.id(idx)
+        row = self.data._data[idx]
         fm = self.FIELD_MAP
         mi = Metadata(None, template_cache=self.formatter_template_cache)
 
@@ -947,7 +948,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         mi.book_size   = row[fm['size']]
         mi.ondevice_col= row[fm['ondevice']]
         mi.last_modified = row[fm['last_modified']]
-        id = idx if index_is_id else self.id(idx)
+        id = idx
         formats = row[fm['formats']]
         mi.format_metadata = {}
         if not formats:
@@ -971,15 +972,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         mi.application_id = id
         mi.id = id
 
+        mi.set_all_user_metadata(self.field_metadata.custom_field_metadata())
         for key, meta in self.field_metadata.custom_iteritems():
-            mi.set_user_metadata(key, meta)
             if meta['datatype'] == 'composite':
                 mi.set(key, val=row[meta['rec_index']])
             else:
-                mi.set(key, val=self.get_custom(idx, label=meta['label'],
-                                            index_is_id=index_is_id),
-                        extra=self.get_custom_extra(idx, label=meta['label'],
-                                                    index_is_id=index_is_id))
+                val, extra = self.get_custom_and_extra(idx, label=meta['label'],
+                                                       index_is_id=True)
+                mi.set(key, val=val, extra=extra)
 
         user_cats = self.prefs['user_categories']
         user_cat_vals = {}
diff --git a/src/calibre/library/field_metadata.py b/src/calibre/library/field_metadata.py
index de95eabd40..c3517378f7 100644
--- a/src/calibre/library/field_metadata.py
+++ b/src/calibre/library/field_metadata.py
@@ -388,6 +388,7 @@ class FieldMetadata(dict):
     def __init__(self):
         self._field_metadata = copy.deepcopy(self._field_metadata_prototype)
         self._tb_cats = OrderedDict()
+        self._tb_custom_fields = {}
         self._search_term_map = {}
         self.custom_label_to_key_map = {}
         for k,v in self._field_metadata:
@@ -477,10 +478,8 @@ class FieldMetadata(dict):
             yield (key, self._tb_cats[key])
 
     def custom_iteritems(self):
-        for key in self._tb_cats:
-            fm = self._tb_cats[key]
-            if fm['is_custom']:
-                yield (key, self._tb_cats[key])
+        for key, meta in self._tb_custom_fields.iteritems():
+            yield (key, meta)
 
     def items(self):
         return list(self.iteritems())
@@ -516,6 +515,8 @@ class FieldMetadata(dict):
         return l
 
     def custom_field_metadata(self, include_composites=True):
+        if include_composites:
+            return self._tb_custom_fields
         l = {}
         for k in self.custom_field_keys(include_composites):
             l[k] = self._tb_cats[k]
@@ -537,6 +538,7 @@ class FieldMetadata(dict):
                              'is_custom':True,     'is_category':is_category,
                              'link_column':'value','category_sort':'value',
                              'is_csp' : is_csp,     'is_editable': is_editable,}
+        self._tb_custom_fields[key] = self._tb_cats[key]
         self._add_search_terms_to_map(key, [key])
         self.custom_label_to_key_map[label] = key
         if datatype == 'series':
diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py
index b1224de3da..be17b01111 100644
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@@ -11,7 +11,8 @@ __docformat__ = 'restructuredtext en'
 import re, string, traceback
 
 from calibre.constants import DEBUG
-from calibre.utils.formatter_functions import formatter_functions
+from calibre.utils.formatter_functions import formatter_functions, compile_user_function
+from calibre.utils.config import tweaks
 
 class _Parser(object):
     LEX_OP  = 1
@@ -172,6 +173,130 @@ class _Parser(object):
             self.error(_('expression is not function or constant'))
 
 
+class _CompileParser(_Parser):
+    def __init__(self, val, prog, parent, compile_text):
+        self.lex_pos = 0
+        self.prog = prog[0]
+        self.prog_len = len(self.prog)
+        if prog[1] != '':
+            self.error(_('failed to scan program. Invalid input {0}').format(prog[1]))
+        self.parent = parent
+        parent.locals = {'$':val}
+        self.parent_kwargs = parent.kwargs
+        self.parent_book = parent.book
+        self.parent_locals = parent.locals
+        self.compile_text = compile_text
+
+    def program(self):
+        if self.compile_text:
+            t = self.compile_text
+            self.compile_text = '\n'
+        self.max_level = 0
+        val = self.statement()
+        if not self.token_is_eof():
+            self.error(_('syntax error - program ends before EOF'))
+        if self.compile_text:
+            t += "\targs=[[]"
+            for i in range(0, self.max_level):
+                t += ", None"
+            t += ']'
+            self.compile_text = t + self.compile_text + "\treturn args[0][0]\n"
+        return val
+
+    def statement(self, level=0):
+        while True:
+            val = self.expr(level)
+            if self.token_is_eof():
+                return val
+            if not self.token_op_is_a_semicolon():
+                return val
+            if self.compile_text:
+                self.compile_text += "\targs[%d] = list()\n"%(level,)
+            self.consume()
+            if self.token_is_eof():
+                return val
+
+    def expr(self, level):
+        if self.compile_text:
+            self.max_level = max(level, self.max_level)
+
+        if self.token_is_id():
+            funcs = formatter_functions().get_functions()
+            # We have an identifier. Determine if it is a function
+            id = self.token()
+            if not self.token_op_is_a_lparen():
+                if self.token_op_is_a_equals():
+                    # classic assignment statement
+                    self.consume()
+                    cls = funcs['assign']
+                    if self.compile_text:
+                        self.compile_text += '\targs[%d] = list()\n'%(level+1,)
+                    val = cls.eval_(self.parent, self.parent_kwargs,
+                                    self.parent_book, self.parent_locals, id, self.expr(level+1))
+                    if self.compile_text:
+                        self.compile_text += "\tlocals['%s'] = args[%d][0]\n"%(id, level+1)
+                        self.compile_text += "\targs[%d].append(args[%d][0])\n"%(level, level+1)
+                    return val
+                val = self.parent.locals.get(id, None)
+                if val is None:
+                    self.error(_('Unknown identifier ') + id)
+                if self.compile_text:
+                    self.compile_text += "\targs[%d].append(locals.get('%s'))\n"%(level, id)
+                return val
+            # We have a function.
+            # Check if it is a known one. We do this here so error reporting is
+            # better, as it can identify the tokens near the problem.
+            if id not in funcs:
+                self.error(_('unknown function {0}').format(id))
+
+            # Eat the paren
+            self.consume()
+            args = list()
+            if self.compile_text:
+                self.compile_text += '\targs[%d] = list()\n'%(level+1, )
+            while not self.token_op_is_a_rparen():
+                if id == 'assign' and len(args) == 0:
+                    # Must handle the lvalue semantics of the assign function.
+                    # The first argument is the name of the destination, not
+                    # the value.
+                    if not self.token_is_id():
+                        self.error('assign requires the first parameter be an id')
+                    t = self.token()
+                    args.append(t)
+                    if self.compile_text:
+                        self.compile_text += "\targs[%d].append('%s')\n"%(level+1, t)
+                else:
+                    # evaluate the argument (recursive call)
+                    args.append(self.statement(level=level+1))
+                if not self.token_op_is_a_comma():
+                    break
+                self.consume()
+            if self.token() != ')':
+                self.error(_('missing closing parenthesis'))
+
+            # Evaluate the function
+            cls = funcs[id]
+            if cls.arg_count != -1 and len(args) != cls.arg_count:
+                self.error('incorrect number of arguments for function {}'.format(id))
+            if self.compile_text:
+                self.compile_text += (
+                    "\targs[%d].append(self.__funcs__['%s']"
+                    ".evaluate(formatter, kwargs, book, locals, *args[%d]))\n")%(level, id, level+1)
+            return cls.eval_(self.parent, self.parent_kwargs,
+                            self.parent_book, self.parent_locals, *args)
+        elif self.token_is_constant():
+            # String or number
+            v = self.token()
+            if self.compile_text:
+                tv = v.replace("\\", "\\\\")
+                tv = tv.replace("'", "\\'")
+                self.compile_text += "\targs[%d].append('%s')\n"%(level, tv)
+            return v
+        else:
+            self.error(_('expression is not function or constant'))
+
+compile_counter = 0
+
 class TemplateFormatter(string.Formatter):
     '''
     Provides a format function that substitutes '' for any missing value
@@ -249,15 +374,36 @@ class TemplateFormatter(string.Formatter):
         # keep a cache of the lex'ed program under the theory that re-lexing
         # is much more expensive than the cache lookup. This is certainly true
         # for more than a few tokens, but it isn't clear for simple programs.
-        if column_name is not None and self.template_cache is not None:
-            lprog = self.template_cache.get(column_name, None)
-            if not lprog:
+        if tweaks['compile_gpm_templates']:
+            if column_name is not None and self.template_cache is not None:
+                lprog = self.template_cache.get(column_name, None)
+                if lprog:
+                    return lprog.evaluate(self, self.kwargs, self.book, self.locals)
                 lprog = self.lex_scanner.scan(prog)
-                self.template_cache[column_name] = lprog
+                compile_text = ('__funcs__ = formatter_functions().get_functions()\n'
+                                'def evaluate(self, formatter, kwargs, book, locals):\n'
+                                )
+            else:
+                lprog = self.lex_scanner.scan(prog)
+                compile_text = None
+            parser = _CompileParser(val, lprog, self, compile_text)
+            val = parser.program()
+            if parser.compile_text:
+                global compile_counter
+                compile_counter += 1
+                f = compile_user_function("__A" + str(compile_counter), 'doc', -1, parser.compile_text)
+                self.template_cache[column_name] = f
         else:
+            if column_name is not None and self.template_cache is not None:
+                lprog = self.template_cache.get(column_name, None)
+                if not lprog:
+                    lprog = self.lex_scanner.scan(prog)
+                    self.template_cache[column_name] = lprog
+            else:
                 lprog = self.lex_scanner.scan(prog)
-        parser = _Parser(val, lprog, self)
-        return parser.program()
+            parser = _Parser(val, lprog, self)
+            val = parser.program()
+        return val
 
     ################## Override parent classes methods #####################
 
diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py
index ec887887db..fcff101ad2 100644
--- a/src/calibre/utils/formatter_functions.py
+++ b/src/calibre/utils/formatter_functions.py
@@ -11,6 +11,7 @@ __docformat__ = 'restructuredtext en'
 import inspect, re, traceback
 
 from calibre import human_readable
+from calibre.constants import DEBUG
 from calibre.utils.titlecase import titlecase
 from calibre.utils.icu import capitalize, strcmp, sort_key
 from calibre.utils.date import parse_date, format_date, now, UNDEFINED_DATE
@@ -1156,11 +1157,14 @@ def compile_user_function(name, doc, arg_count, eval_func):
                                    for line in eval_func.splitlines()])
     prog = '''
 from calibre.utils.formatter_functions import FormatterUserFunction
+from calibre.utils.formatter_functions import formatter_functions
 class UserFunction(FormatterUserFunction):
 ''' + func
-    locals = {}
-    exec prog in locals
-    cls = locals['UserFunction'](name, doc, arg_count, eval_func)
+    locals_ = {}
+    if DEBUG:
+        print prog
+    exec prog in locals_
+    cls = locals_['UserFunction'](name, doc, arg_count, eval_func)
     return cls
 
 def load_user_template_functions(funcs):

From 19af7031b38c3d560880b6930099ac84bbeda0ab Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 14 Mar 2012 16:15:04 +0100
Subject: [PATCH 2/3] Apparently default_tweaks.py requires an empty line at
 the end.

---
 resources/default_tweaks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index 5b09e3f583..7f258afdc9 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -511,3 +511,4 @@ change_book_details_font_size_by = 0
 # Default:    compile_gpm_templates = True
 # No compile: compile_gpm_templates = False
 compile_gpm_templates = True
+

From 01b55581a7499c341ef5a0a6359370148dd9a6b8 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Thu, 15 Mar 2012 07:20:50 +0100
Subject: [PATCH 3/3] Fix improper exclusion of "languages" from the new
 metadata 'process_first' set.

---
 src/calibre/ebooks/metadata/book/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py
index b7ab91c26f..00bbc68f29 100644
--- a/src/calibre/ebooks/metadata/book/__init__.py
+++ b/src/calibre/ebooks/metadata/book/__init__.py
@@ -135,5 +135,4 @@ SERIALIZABLE_FIELDS =      SOCIAL_METADATA_FIELDS.union(
 # A special set used to optimize the performance of Metadata.__setattr__
 ATTR_NORMAL_FIELDS =    frozenset(STANDARD_METADATA_FIELDS -
                                      TOP_LEVEL_IDENTIFIERS -
-                                     set('identifiers') -
-                                     set('languages'))
+                                     set('identifiers'))