diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index a15cb62aaf..7f258afdc9 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -502,3 +502,13 @@ tweak_book_prefer = 'epub' # negative number to increase or decrease the font size. change_book_details_font_size_by = 0 +#: Compile General Program Mode templates to Python +# Compiled general program mode templates are significantly faster than +# interpreted templates. Setting this tweak to True causes calibre to compile +# (in most cases) general program mode templates. Setting it to False causes +# calibre to use the old behavior -- interpreting the templates. Set the tweak +# to False if some compiled templates produce incorrect values. +# Default: compile_gpm_templates = True +# No compile: compile_gpm_templates = False +compile_gpm_templates = True + diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 754bcbfa66..ce80486af8 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -9,16 +9,19 @@ import copy, traceback from calibre import prints from calibre.constants import DEBUG -from calibre.ebooks.metadata.book import SC_COPYABLE_FIELDS -from calibre.ebooks.metadata.book import SC_FIELDS_COPY_NOT_NULL -from calibre.ebooks.metadata.book import STANDARD_METADATA_FIELDS -from calibre.ebooks.metadata.book import TOP_LEVEL_IDENTIFIERS -from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS +from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS, + SC_FIELDS_COPY_NOT_NULL, STANDARD_METADATA_FIELDS, + TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS) from calibre.library.field_metadata import FieldMetadata from calibre.utils.date import isoformat, format_date from calibre.utils.icu import sort_key from calibre.utils.formatter import TemplateFormatter +# Special sets used to optimize the performance of getting and setting +# attributes on Metadata objects +SIMPLE_GET = frozenset(STANDARD_METADATA_FIELDS - TOP_LEVEL_IDENTIFIERS) +SIMPLE_SET = frozenset(SIMPLE_GET - {'identifiers'}) + def human_readable(size, precision=2): """ Convert a size in bytes into megabytes """ return ('%.'+str(precision)+'f'+ 'MB') % ((size/(1024.*1024.)),) @@ -136,6 +139,8 @@ class Metadata(object): def __getattribute__(self, field): _data = object.__getattribute__(self, '_data') + if field in SIMPLE_GET: + return _data.get(field, None) if field in TOP_LEVEL_IDENTIFIERS: return _data.get('identifiers').get(field, None) if field == 'language': @@ -143,8 +148,6 @@ class Metadata(object): return _data.get('languages', [])[0] except: return NULL_VALUES['language'] - if field in STANDARD_METADATA_FIELDS: - return _data.get(field, None) try: return object.__getattribute__(self, field) except AttributeError: @@ -173,7 +176,11 @@ class Metadata(object): def __setattr__(self, field, val, extra=None): _data = object.__getattribute__(self, '_data') - if field in TOP_LEVEL_IDENTIFIERS: + if field in SIMPLE_SET: + if val is None: + val = copy.copy(NULL_VALUES.get(field, None)) + _data[field] = val + elif field in TOP_LEVEL_IDENTIFIERS: field, val = self._clean_identifier(field, val) identifiers = _data['identifiers'] identifiers.pop(field, None) @@ -188,10 +195,6 @@ class Metadata(object): if val and val.lower() != 'und': langs = [val] _data['languages'] = langs - elif field in STANDARD_METADATA_FIELDS: - if val is None: - val = copy.copy(NULL_VALUES.get(field, None)) - _data[field] = val elif field in _data['user_metadata'].iterkeys(): _data['user_metadata'][field]['#value#'] = val _data['user_metadata'][field]['#extra#'] = extra @@ -404,9 +407,19 @@ class Metadata(object): ''' if metadata is None: traceback.print_stack() - else: - for key in metadata: - self.set_user_metadata(key, metadata[key]) + return + + um = {} + for key, meta in metadata.iteritems(): + m = meta.copy() + if '#value#' not in m: + if m['datatype'] == 'text' and m['is_multiple']: + m['#value#'] = [] + else: + m['#value#'] = None + um[key] = m + _data = object.__getattribute__(self, '_data') + _data['user_metadata'].update(um) def set_user_metadata(self, field, metadata): ''' @@ -420,9 +433,11 @@ class Metadata(object): if metadata is None: traceback.print_stack() return - m = {} - for k in metadata: - m[k] = copy.copy(metadata[k]) + m = dict(metadata) + # Copying the elements should not be necessary. The objects referenced + # in the dict should not change. Of course, they can be replaced. + # for k,v in metadata.iteritems(): + # m[k] = copy.copy(v) if '#value#' not in m: if m['datatype'] == 'text' and m['is_multiple']: m['#value#'] = [] diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py index 4c5ade37b0..453f03f38a 100644 --- a/src/calibre/library/custom_columns.py +++ b/src/calibre/library/custom_columns.py @@ -227,6 +227,25 @@ class CustomColumns(object): return self.conn.get('''SELECT extra FROM %s WHERE book=?'''%lt, (idx,), all=False) + def get_custom_and_extra(self, idx, label=None, num=None, index_is_id=False): + if label is not None: + data = self.custom_column_label_map[label] + if num is not None: + data = self.custom_column_num_map[num] + idx = idx if index_is_id else self.id(idx) + row = self.data._data[idx] + ans = row[self.FIELD_MAP[data['num']]] + if data['is_multiple'] and data['datatype'] == 'text': + ans = ans.split(data['multiple_seps']['cache_to_list']) if ans else [] + if data['display'].get('sort_alpha', False): + ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower())) + if data['datatype'] != 'series': + return (ans, None) + ign,lt = self.custom_table_names(data['num']) + extra = self.conn.get('''SELECT extra FROM %s + WHERE book=?'''%lt, (idx,), all=False) + return (ans, extra) + # convenience methods for tag editing def get_custom_items_with_ids(self, label=None, num=None): if label is not None: diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 555d91b7df..bcc4b05399 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -910,7 +910,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): Convenience method to return metadata as a :class:`Metadata` object. Note that the list of formats is not verified. ''' - row = self.data._data[idx] if index_is_id else self.data[idx] + idx = idx if index_is_id else self.id(idx) + try: + row = self.data._data[idx] + except: + row = None + + if row is None: + raise ValueError('No book with id: %d'%idx) + fm = self.FIELD_MAP mi = Metadata(None, template_cache=self.formatter_template_cache) @@ -948,14 +956,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): mi.book_size = row[fm['size']] mi.ondevice_col= row[fm['ondevice']] mi.last_modified = row[fm['last_modified']] - id = idx if index_is_id else self.id(idx) formats = row[fm['formats']] mi.format_metadata = {} if not formats: good_formats = None else: formats = sorted(formats.split(',')) - mi.format_metadata = FormatMetadata(self, id, formats) + mi.format_metadata = FormatMetadata(self, idx, formats) good_formats = FormatsList(formats, mi.format_metadata) mi.formats = good_formats tags = row[fm['tags']] @@ -968,19 +975,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if mi.series: mi.series_index = row[fm['series_index']] mi.rating = row[fm['rating']] - mi.set_identifiers(self.get_identifiers(id, index_is_id=True)) - mi.application_id = id - mi.id = id + mi.set_identifiers(self.get_identifiers(idx, index_is_id=True)) + mi.application_id = idx + mi.id = idx + mi.set_all_user_metadata(self.field_metadata.custom_field_metadata()) for key, meta in self.field_metadata.custom_iteritems(): - mi.set_user_metadata(key, meta) if meta['datatype'] == 'composite': mi.set(key, val=row[meta['rec_index']]) else: - mi.set(key, val=self.get_custom(idx, label=meta['label'], - index_is_id=index_is_id), - extra=self.get_custom_extra(idx, label=meta['label'], - index_is_id=index_is_id)) + val, extra = self.get_custom_and_extra(idx, label=meta['label'], + index_is_id=True) + mi.set(key, val=val, extra=extra) user_cats = self.prefs['user_categories'] user_cat_vals = {} @@ -999,12 +1005,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if get_cover: if cover_as_data: - cdata = self.cover(id, index_is_id=True) + cdata = self.cover(idx, index_is_id=True) if cdata: mi.cover_data = ('jpeg', cdata) else: - mi.cover = self.cover(id, index_is_id=True, as_path=True) - mi.has_cover = _('Yes') if self.has_cover(id) else '' + mi.cover = self.cover(idx, index_is_id=True, as_path=True) + mi.has_cover = _('Yes') if self.has_cover(idx) else '' return mi def has_book(self, mi): diff --git a/src/calibre/library/field_metadata.py b/src/calibre/library/field_metadata.py index 5b81f4a6a6..58914e7572 100644 --- a/src/calibre/library/field_metadata.py +++ b/src/calibre/library/field_metadata.py @@ -388,6 +388,7 @@ class FieldMetadata(dict): def __init__(self): self._field_metadata = copy.deepcopy(self._field_metadata_prototype) self._tb_cats = OrderedDict() + self._tb_custom_fields = {} self._search_term_map = {} self.custom_label_to_key_map = {} for k,v in self._field_metadata: @@ -477,10 +478,8 @@ class FieldMetadata(dict): yield (key, self._tb_cats[key]) def custom_iteritems(self): - for key in self._tb_cats: - fm = self._tb_cats[key] - if fm['is_custom']: - yield (key, self._tb_cats[key]) + for key, meta in self._tb_custom_fields.iteritems(): + yield (key, meta) def items(self): return list(self.iteritems()) @@ -516,6 +515,8 @@ class FieldMetadata(dict): return l def custom_field_metadata(self, include_composites=True): + if include_composites: + return self._tb_custom_fields l = {} for k in self.custom_field_keys(include_composites): l[k] = self._tb_cats[k] @@ -537,6 +538,7 @@ class FieldMetadata(dict): 'is_custom':True, 'is_category':is_category, 'link_column':'value','category_sort':'value', 'is_csp' : is_csp, 'is_editable': is_editable,} + self._tb_custom_fields[key] = self._tb_cats[key] self._add_search_terms_to_map(key, [key]) self.custom_label_to_key_map[label] = key if datatype == 'series': diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py index b1224de3da..be17b01111 100644 --- a/src/calibre/utils/formatter.py +++ b/src/calibre/utils/formatter.py @@ -11,7 +11,8 @@ __docformat__ = 'restructuredtext en' import re, string, traceback from calibre.constants import DEBUG -from calibre.utils.formatter_functions import formatter_functions +from calibre.utils.formatter_functions import formatter_functions, compile_user_function +from calibre.utils.config import tweaks class _Parser(object): LEX_OP = 1 @@ -172,6 +173,130 @@ class _Parser(object): self.error(_('expression is not function or constant')) +class _CompileParser(_Parser): + def __init__(self, val, prog, parent, compile_text): + self.lex_pos = 0 + self.prog = prog[0] + self.prog_len = len(self.prog) + if prog[1] != '': + self.error(_('failed to scan program. Invalid input {0}').format(prog[1])) + self.parent = parent + parent.locals = {'$':val} + self.parent_kwargs = parent.kwargs + self.parent_book = parent.book + self.parent_locals = parent.locals + self.compile_text = compile_text + + def program(self): + if self.compile_text: + t = self.compile_text + self.compile_text = '\n' + self.max_level = 0 + val = self.statement() + if not self.token_is_eof(): + self.error(_('syntax error - program ends before EOF')) + if self.compile_text: + t += "\targs=[[]" + for i in range(0, self.max_level): + t += ", None" + t += ']' + self.compile_text = t + self.compile_text + "\treturn args[0][0]\n" + return val + + def statement(self, level=0): + while True: + val = self.expr(level) + if self.token_is_eof(): + return val + if not self.token_op_is_a_semicolon(): + return val + if self.compile_text: + self.compile_text += "\targs[%d] = list()\n"%(level,) + self.consume() + if self.token_is_eof(): + return val + + def expr(self, level): + if self.compile_text: + self.max_level = max(level, self.max_level) + + if self.token_is_id(): + funcs = formatter_functions().get_functions() + # We have an identifier. Determine if it is a function + id = self.token() + if not self.token_op_is_a_lparen(): + if self.token_op_is_a_equals(): + # classic assignment statement + self.consume() + cls = funcs['assign'] + if self.compile_text: + self.compile_text += '\targs[%d] = list()\n'%(level+1,) + val = cls.eval_(self.parent, self.parent_kwargs, + self.parent_book, self.parent_locals, id, self.expr(level+1)) + if self.compile_text: + self.compile_text += "\tlocals['%s'] = args[%d][0]\n"%(id, level+1) + self.compile_text += "\targs[%d].append(args[%d][0])\n"%(level, level+1) + return val + val = self.parent.locals.get(id, None) + if val is None: + self.error(_('Unknown identifier ') + id) + if self.compile_text: + self.compile_text += "\targs[%d].append(locals.get('%s'))\n"%(level, id) + return val + # We have a function. + # Check if it is a known one. We do this here so error reporting is + # better, as it can identify the tokens near the problem. + if id not in funcs: + self.error(_('unknown function {0}').format(id)) + + # Eat the paren + self.consume() + args = list() + if self.compile_text: + self.compile_text += '\targs[%d] = list()\n'%(level+1, ) + while not self.token_op_is_a_rparen(): + if id == 'assign' and len(args) == 0: + # Must handle the lvalue semantics of the assign function. + # The first argument is the name of the destination, not + # the value. + if not self.token_is_id(): + self.error('assign requires the first parameter be an id') + t = self.token() + args.append(t) + if self.compile_text: + self.compile_text += "\targs[%d].append('%s')\n"%(level+1, t) + else: + # evaluate the argument (recursive call) + args.append(self.statement(level=level+1)) + if not self.token_op_is_a_comma(): + break + self.consume() + if self.token() != ')': + self.error(_('missing closing parenthesis')) + + # Evaluate the function + cls = funcs[id] + if cls.arg_count != -1 and len(args) != cls.arg_count: + self.error('incorrect number of arguments for function {}'.format(id)) + if self.compile_text: + self.compile_text += ( + "\targs[%d].append(self.__funcs__['%s']" + ".evaluate(formatter, kwargs, book, locals, *args[%d]))\n")%(level, id, level+1) + return cls.eval_(self.parent, self.parent_kwargs, + self.parent_book, self.parent_locals, *args) + elif self.token_is_constant(): + # String or number + v = self.token() + if self.compile_text: + tv = v.replace("\\", "\\\\") + tv = tv.replace("'", "\\'") + self.compile_text += "\targs[%d].append('%s')\n"%(level, tv) + return v + else: + self.error(_('expression is not function or constant')) + +compile_counter = 0 + class TemplateFormatter(string.Formatter): ''' Provides a format function that substitutes '' for any missing value @@ -249,15 +374,36 @@ class TemplateFormatter(string.Formatter): # keep a cache of the lex'ed program under the theory that re-lexing # is much more expensive than the cache lookup. This is certainly true # for more than a few tokens, but it isn't clear for simple programs. - if column_name is not None and self.template_cache is not None: - lprog = self.template_cache.get(column_name, None) - if not lprog: + if tweaks['compile_gpm_templates']: + if column_name is not None and self.template_cache is not None: + lprog = self.template_cache.get(column_name, None) + if lprog: + return lprog.evaluate(self, self.kwargs, self.book, self.locals) lprog = self.lex_scanner.scan(prog) - self.template_cache[column_name] = lprog + compile_text = ('__funcs__ = formatter_functions().get_functions()\n' + 'def evaluate(self, formatter, kwargs, book, locals):\n' + ) + else: + lprog = self.lex_scanner.scan(prog) + compile_text = None + parser = _CompileParser(val, lprog, self, compile_text) + val = parser.program() + if parser.compile_text: + global compile_counter + compile_counter += 1 + f = compile_user_function("__A" + str(compile_counter), 'doc', -1, parser.compile_text) + self.template_cache[column_name] = f else: + if column_name is not None and self.template_cache is not None: + lprog = self.template_cache.get(column_name, None) + if not lprog: + lprog = self.lex_scanner.scan(prog) + self.template_cache[column_name] = lprog + else: lprog = self.lex_scanner.scan(prog) - parser = _Parser(val, lprog, self) - return parser.program() + parser = _Parser(val, lprog, self) + val = parser.program() + return val ################## Override parent classes methods ##################### diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index ec887887db..fcff101ad2 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -11,6 +11,7 @@ __docformat__ = 'restructuredtext en' import inspect, re, traceback from calibre import human_readable +from calibre.constants import DEBUG from calibre.utils.titlecase import titlecase from calibre.utils.icu import capitalize, strcmp, sort_key from calibre.utils.date import parse_date, format_date, now, UNDEFINED_DATE @@ -1156,11 +1157,14 @@ def compile_user_function(name, doc, arg_count, eval_func): for line in eval_func.splitlines()]) prog = ''' from calibre.utils.formatter_functions import FormatterUserFunction +from calibre.utils.formatter_functions import formatter_functions class UserFunction(FormatterUserFunction): ''' + func - locals = {} - exec prog in locals - cls = locals['UserFunction'](name, doc, arg_count, eval_func) + locals_ = {} + if DEBUG: + print prog + exec prog in locals_ + cls = locals_['UserFunction'](name, doc, arg_count, eval_func) return cls def load_user_template_functions(funcs):