From 06904f92a5bbdc38add27b0078a054b365f47378 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Wed, 14 Mar 2012 15:34:40 +0100 Subject: [PATCH 1/3] Compile General Program Mode templates to python, controlled by the tweak "compile_gpm_templates" that defaults to True. Several performance optimizations related to custom columns and field metadata. --- resources/default_tweaks.py | 10 ++ src/calibre/ebooks/metadata/book/__init__.py | 6 + src/calibre/ebooks/metadata/book/base.py | 39 +++-- src/calibre/library/custom_columns.py | 19 +++ src/calibre/library/database2.py | 14 +- src/calibre/library/field_metadata.py | 10 +- src/calibre/utils/formatter.py | 160 ++++++++++++++++++- src/calibre/utils/formatter_functions.py | 10 +- 8 files changed, 234 insertions(+), 34 deletions(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index ee2e07f412..33561f50b9 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -496,3 +496,13 @@ gui_view_history_size = 15 # prefer HTMLZ to EPUB for tweaking, change this to 'htmlz' tweak_book_prefer = 'epub' +#: Compile General Program Mode templates to Python + +# Compiled general program mode templates are significantly faster than +# interpreted templates. Setting this tweak to True causes calibre to compile +# (in most cases) general program mode templates. Setting it to False causes +# calibre to use the old behavior -- interpreting the templates. Set the tweak +# to False if some compiled templates produce incorrect values. +# Default: compile_gpm_templates = True +# No compile: compile_gpm_templates = False +compile_gpm_templates = True diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py index 38a824374c..b7ab91c26f 100644 --- a/src/calibre/ebooks/metadata/book/__init__.py +++ b/src/calibre/ebooks/metadata/book/__init__.py @@ -131,3 +131,9 @@ SERIALIZABLE_FIELDS = SOCIAL_METADATA_FIELDS.union( frozenset(['device_collections', 'formats', 'cover_data']) # these are rebuilt when needed + +# A special set used to optimize the performance of Metadata.__setattr__ +ATTR_NORMAL_FIELDS = frozenset(STANDARD_METADATA_FIELDS - + TOP_LEVEL_IDENTIFIERS - + set('identifiers') - + set('languages')) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 0312a7db6a..71ebde8603 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -14,6 +14,7 @@ from calibre.ebooks.metadata.book import SC_FIELDS_COPY_NOT_NULL from calibre.ebooks.metadata.book import STANDARD_METADATA_FIELDS from calibre.ebooks.metadata.book import TOP_LEVEL_IDENTIFIERS from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS +from calibre.ebooks.metadata.book import ATTR_NORMAL_FIELDS from calibre.library.field_metadata import FieldMetadata from calibre.utils.date import isoformat, format_date from calibre.utils.icu import sort_key @@ -136,6 +137,8 @@ class Metadata(object): def __getattribute__(self, field): _data = object.__getattribute__(self, '_data') + if field in ATTR_NORMAL_FIELDS: + return _data.get(field, None) if field in TOP_LEVEL_IDENTIFIERS: return _data.get('identifiers').get(field, None) if field == 'language': @@ -143,8 +146,6 @@ class Metadata(object): return _data.get('languages', [])[0] except: return NULL_VALUES['language'] - if field in STANDARD_METADATA_FIELDS: - return _data.get(field, None) try: return object.__getattribute__(self, field) except AttributeError: @@ -173,7 +174,11 @@ class Metadata(object): def __setattr__(self, field, val, extra=None): _data = object.__getattribute__(self, '_data') - if field in TOP_LEVEL_IDENTIFIERS: + if field in ATTR_NORMAL_FIELDS: + if val is None: + val = copy.copy(NULL_VALUES.get(field, None)) + _data[field] = val + elif field in TOP_LEVEL_IDENTIFIERS: field, val = self._clean_identifier(field, val) identifiers = _data['identifiers'] identifiers.pop(field, None) @@ -188,10 +193,6 @@ class Metadata(object): if val and val.lower() != 'und': langs = [val] _data['languages'] = langs - elif field in STANDARD_METADATA_FIELDS: - if val is None: - val = copy.copy(NULL_VALUES.get(field, None)) - _data[field] = val elif field in _data['user_metadata'].iterkeys(): _data['user_metadata'][field]['#value#'] = val _data['user_metadata'][field]['#extra#'] = extra @@ -404,9 +405,19 @@ class Metadata(object): ''' if metadata is None: traceback.print_stack() - else: - for key in metadata: - self.set_user_metadata(key, metadata[key]) + return + + um = {} + for key, meta in metadata.iteritems(): + m = meta.copy() + if '#value#' not in m: + if m['datatype'] == 'text' and m['is_multiple']: + m['#value#'] = [] + else: + m['#value#'] = None + um[key] = m + _data = object.__getattribute__(self, '_data') + _data['user_metadata'].update(um) def set_user_metadata(self, field, metadata): ''' @@ -420,9 +431,11 @@ class Metadata(object): if metadata is None: traceback.print_stack() return - m = {} - for k in metadata: - m[k] = copy.copy(metadata[k]) + m = dict(metadata) + # Copying the elements should not be necessary. The objects referenced + # in the dict should not change. Of course, they can be replaced. + # for k,v in metadata.iteritems(): + # m[k] = copy.copy(v) if '#value#' not in m: if m['datatype'] == 'text' and m['is_multiple']: m['#value#'] = [] diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py index 4c5ade37b0..453f03f38a 100644 --- a/src/calibre/library/custom_columns.py +++ b/src/calibre/library/custom_columns.py @@ -227,6 +227,25 @@ class CustomColumns(object): return self.conn.get('''SELECT extra FROM %s WHERE book=?'''%lt, (idx,), all=False) + def get_custom_and_extra(self, idx, label=None, num=None, index_is_id=False): + if label is not None: + data = self.custom_column_label_map[label] + if num is not None: + data = self.custom_column_num_map[num] + idx = idx if index_is_id else self.id(idx) + row = self.data._data[idx] + ans = row[self.FIELD_MAP[data['num']]] + if data['is_multiple'] and data['datatype'] == 'text': + ans = ans.split(data['multiple_seps']['cache_to_list']) if ans else [] + if data['display'].get('sort_alpha', False): + ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower())) + if data['datatype'] != 'series': + return (ans, None) + ign,lt = self.custom_table_names(data['num']) + extra = self.conn.get('''SELECT extra FROM %s + WHERE book=?'''%lt, (idx,), all=False) + return (ans, extra) + # convenience methods for tag editing def get_custom_items_with_ids(self, label=None, num=None): if label is not None: diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index d3475ffa75..2560f2e77f 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -909,7 +909,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): Convenience method to return metadata as a :class:`Metadata` object. Note that the list of formats is not verified. ''' - row = self.data._data[idx] if index_is_id else self.data[idx] + idx = idx if index_is_id else self.id(idx) + row = self.data._data[idx] fm = self.FIELD_MAP mi = Metadata(None, template_cache=self.formatter_template_cache) @@ -947,7 +948,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): mi.book_size = row[fm['size']] mi.ondevice_col= row[fm['ondevice']] mi.last_modified = row[fm['last_modified']] - id = idx if index_is_id else self.id(idx) + id = idx formats = row[fm['formats']] mi.format_metadata = {} if not formats: @@ -971,15 +972,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): mi.application_id = id mi.id = id + mi.set_all_user_metadata(self.field_metadata.custom_field_metadata()) for key, meta in self.field_metadata.custom_iteritems(): - mi.set_user_metadata(key, meta) if meta['datatype'] == 'composite': mi.set(key, val=row[meta['rec_index']]) else: - mi.set(key, val=self.get_custom(idx, label=meta['label'], - index_is_id=index_is_id), - extra=self.get_custom_extra(idx, label=meta['label'], - index_is_id=index_is_id)) + val, extra = self.get_custom_and_extra(idx, label=meta['label'], + index_is_id=True) + mi.set(key, val=val, extra=extra) user_cats = self.prefs['user_categories'] user_cat_vals = {} diff --git a/src/calibre/library/field_metadata.py b/src/calibre/library/field_metadata.py index de95eabd40..c3517378f7 100644 --- a/src/calibre/library/field_metadata.py +++ b/src/calibre/library/field_metadata.py @@ -388,6 +388,7 @@ class FieldMetadata(dict): def __init__(self): self._field_metadata = copy.deepcopy(self._field_metadata_prototype) self._tb_cats = OrderedDict() + self._tb_custom_fields = {} self._search_term_map = {} self.custom_label_to_key_map = {} for k,v in self._field_metadata: @@ -477,10 +478,8 @@ class FieldMetadata(dict): yield (key, self._tb_cats[key]) def custom_iteritems(self): - for key in self._tb_cats: - fm = self._tb_cats[key] - if fm['is_custom']: - yield (key, self._tb_cats[key]) + for key, meta in self._tb_custom_fields.iteritems(): + yield (key, meta) def items(self): return list(self.iteritems()) @@ -516,6 +515,8 @@ class FieldMetadata(dict): return l def custom_field_metadata(self, include_composites=True): + if include_composites: + return self._tb_custom_fields l = {} for k in self.custom_field_keys(include_composites): l[k] = self._tb_cats[k] @@ -537,6 +538,7 @@ class FieldMetadata(dict): 'is_custom':True, 'is_category':is_category, 'link_column':'value','category_sort':'value', 'is_csp' : is_csp, 'is_editable': is_editable,} + self._tb_custom_fields[key] = self._tb_cats[key] self._add_search_terms_to_map(key, [key]) self.custom_label_to_key_map[label] = key if datatype == 'series': diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py index b1224de3da..be17b01111 100644 --- a/src/calibre/utils/formatter.py +++ b/src/calibre/utils/formatter.py @@ -11,7 +11,8 @@ __docformat__ = 'restructuredtext en' import re, string, traceback from calibre.constants import DEBUG -from calibre.utils.formatter_functions import formatter_functions +from calibre.utils.formatter_functions import formatter_functions, compile_user_function +from calibre.utils.config import tweaks class _Parser(object): LEX_OP = 1 @@ -172,6 +173,130 @@ class _Parser(object): self.error(_('expression is not function or constant')) +class _CompileParser(_Parser): + def __init__(self, val, prog, parent, compile_text): + self.lex_pos = 0 + self.prog = prog[0] + self.prog_len = len(self.prog) + if prog[1] != '': + self.error(_('failed to scan program. Invalid input {0}').format(prog[1])) + self.parent = parent + parent.locals = {'$':val} + self.parent_kwargs = parent.kwargs + self.parent_book = parent.book + self.parent_locals = parent.locals + self.compile_text = compile_text + + def program(self): + if self.compile_text: + t = self.compile_text + self.compile_text = '\n' + self.max_level = 0 + val = self.statement() + if not self.token_is_eof(): + self.error(_('syntax error - program ends before EOF')) + if self.compile_text: + t += "\targs=[[]" + for i in range(0, self.max_level): + t += ", None" + t += ']' + self.compile_text = t + self.compile_text + "\treturn args[0][0]\n" + return val + + def statement(self, level=0): + while True: + val = self.expr(level) + if self.token_is_eof(): + return val + if not self.token_op_is_a_semicolon(): + return val + if self.compile_text: + self.compile_text += "\targs[%d] = list()\n"%(level,) + self.consume() + if self.token_is_eof(): + return val + + def expr(self, level): + if self.compile_text: + self.max_level = max(level, self.max_level) + + if self.token_is_id(): + funcs = formatter_functions().get_functions() + # We have an identifier. Determine if it is a function + id = self.token() + if not self.token_op_is_a_lparen(): + if self.token_op_is_a_equals(): + # classic assignment statement + self.consume() + cls = funcs['assign'] + if self.compile_text: + self.compile_text += '\targs[%d] = list()\n'%(level+1,) + val = cls.eval_(self.parent, self.parent_kwargs, + self.parent_book, self.parent_locals, id, self.expr(level+1)) + if self.compile_text: + self.compile_text += "\tlocals['%s'] = args[%d][0]\n"%(id, level+1) + self.compile_text += "\targs[%d].append(args[%d][0])\n"%(level, level+1) + return val + val = self.parent.locals.get(id, None) + if val is None: + self.error(_('Unknown identifier ') + id) + if self.compile_text: + self.compile_text += "\targs[%d].append(locals.get('%s'))\n"%(level, id) + return val + # We have a function. + # Check if it is a known one. We do this here so error reporting is + # better, as it can identify the tokens near the problem. + if id not in funcs: + self.error(_('unknown function {0}').format(id)) + + # Eat the paren + self.consume() + args = list() + if self.compile_text: + self.compile_text += '\targs[%d] = list()\n'%(level+1, ) + while not self.token_op_is_a_rparen(): + if id == 'assign' and len(args) == 0: + # Must handle the lvalue semantics of the assign function. + # The first argument is the name of the destination, not + # the value. + if not self.token_is_id(): + self.error('assign requires the first parameter be an id') + t = self.token() + args.append(t) + if self.compile_text: + self.compile_text += "\targs[%d].append('%s')\n"%(level+1, t) + else: + # evaluate the argument (recursive call) + args.append(self.statement(level=level+1)) + if not self.token_op_is_a_comma(): + break + self.consume() + if self.token() != ')': + self.error(_('missing closing parenthesis')) + + # Evaluate the function + cls = funcs[id] + if cls.arg_count != -1 and len(args) != cls.arg_count: + self.error('incorrect number of arguments for function {}'.format(id)) + if self.compile_text: + self.compile_text += ( + "\targs[%d].append(self.__funcs__['%s']" + ".evaluate(formatter, kwargs, book, locals, *args[%d]))\n")%(level, id, level+1) + return cls.eval_(self.parent, self.parent_kwargs, + self.parent_book, self.parent_locals, *args) + elif self.token_is_constant(): + # String or number + v = self.token() + if self.compile_text: + tv = v.replace("\\", "\\\\") + tv = tv.replace("'", "\\'") + self.compile_text += "\targs[%d].append('%s')\n"%(level, tv) + return v + else: + self.error(_('expression is not function or constant')) + +compile_counter = 0 + class TemplateFormatter(string.Formatter): ''' Provides a format function that substitutes '' for any missing value @@ -249,15 +374,36 @@ class TemplateFormatter(string.Formatter): # keep a cache of the lex'ed program under the theory that re-lexing # is much more expensive than the cache lookup. This is certainly true # for more than a few tokens, but it isn't clear for simple programs. - if column_name is not None and self.template_cache is not None: - lprog = self.template_cache.get(column_name, None) - if not lprog: + if tweaks['compile_gpm_templates']: + if column_name is not None and self.template_cache is not None: + lprog = self.template_cache.get(column_name, None) + if lprog: + return lprog.evaluate(self, self.kwargs, self.book, self.locals) lprog = self.lex_scanner.scan(prog) - self.template_cache[column_name] = lprog + compile_text = ('__funcs__ = formatter_functions().get_functions()\n' + 'def evaluate(self, formatter, kwargs, book, locals):\n' + ) + else: + lprog = self.lex_scanner.scan(prog) + compile_text = None + parser = _CompileParser(val, lprog, self, compile_text) + val = parser.program() + if parser.compile_text: + global compile_counter + compile_counter += 1 + f = compile_user_function("__A" + str(compile_counter), 'doc', -1, parser.compile_text) + self.template_cache[column_name] = f else: + if column_name is not None and self.template_cache is not None: + lprog = self.template_cache.get(column_name, None) + if not lprog: + lprog = self.lex_scanner.scan(prog) + self.template_cache[column_name] = lprog + else: lprog = self.lex_scanner.scan(prog) - parser = _Parser(val, lprog, self) - return parser.program() + parser = _Parser(val, lprog, self) + val = parser.program() + return val ################## Override parent classes methods ##################### diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index ec887887db..fcff101ad2 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -11,6 +11,7 @@ __docformat__ = 'restructuredtext en' import inspect, re, traceback from calibre import human_readable +from calibre.constants import DEBUG from calibre.utils.titlecase import titlecase from calibre.utils.icu import capitalize, strcmp, sort_key from calibre.utils.date import parse_date, format_date, now, UNDEFINED_DATE @@ -1156,11 +1157,14 @@ def compile_user_function(name, doc, arg_count, eval_func): for line in eval_func.splitlines()]) prog = ''' from calibre.utils.formatter_functions import FormatterUserFunction +from calibre.utils.formatter_functions import formatter_functions class UserFunction(FormatterUserFunction): ''' + func - locals = {} - exec prog in locals - cls = locals['UserFunction'](name, doc, arg_count, eval_func) + locals_ = {} + if DEBUG: + print prog + exec prog in locals_ + cls = locals_['UserFunction'](name, doc, arg_count, eval_func) return cls def load_user_template_functions(funcs): From 19af7031b38c3d560880b6930099ac84bbeda0ab Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Wed, 14 Mar 2012 16:15:04 +0100 Subject: [PATCH 2/3] Apparently default_tweaks.py requires an empty line at the end. --- resources/default_tweaks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 5b09e3f583..7f258afdc9 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -511,3 +511,4 @@ change_book_details_font_size_by = 0 # Default: compile_gpm_templates = True # No compile: compile_gpm_templates = False compile_gpm_templates = True + From 01b55581a7499c341ef5a0a6359370148dd9a6b8 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Thu, 15 Mar 2012 07:20:50 +0100 Subject: [PATCH 3/3] Fix improper exclusion of "languages" from the new metadata 'process_first' set. --- src/calibre/ebooks/metadata/book/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py index b7ab91c26f..00bbc68f29 100644 --- a/src/calibre/ebooks/metadata/book/__init__.py +++ b/src/calibre/ebooks/metadata/book/__init__.py @@ -135,5 +135,4 @@ SERIALIZABLE_FIELDS = SOCIAL_METADATA_FIELDS.union( # A special set used to optimize the performance of Metadata.__setattr__ ATTR_NORMAL_FIELDS = frozenset(STANDARD_METADATA_FIELDS - TOP_LEVEL_IDENTIFIERS - - set('identifiers') - - set('languages')) + set('identifiers'))