From 23307b3aa50a2e5ebc68ec2a02e5af4258745a6e Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Thu, 30 Jun 2011 18:09:18 +0100 Subject: [PATCH 1/5] Add format_metadata to get_metadata using a cache. Add formatter functions to deal with the information. --- src/calibre/library/database2.py | 17 +++++++++-- src/calibre/utils/formatter_functions.py | 37 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 3ebd63afde..530e5d8adf 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -8,6 +8,7 @@ The database used to store ebook metadata ''' import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, \ json, uuid, tempfile, hashlib +from collections import defaultdict import threading, random from itertools import repeat from math import ceil @@ -487,6 +488,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self) self.refresh() self.last_update_check = self.last_modified() + self.format_metadata_cache = defaultdict(dict) def break_cycles(self): self.data.break_cycles() @@ -914,11 +916,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): mi.book_size = row[fm['size']] mi.ondevice_col= row[fm['ondevice']] mi.last_modified = row[fm['last_modified']] + id = idx if index_is_id else self.id(idx) formats = row[fm['formats']] + mi.format_metadata = {} if not formats: formats = None else: formats = formats.split(',') + for f in formats: + mi.format_metadata[f] = self.format_metadata(id, f, allow_cache=True) mi.formats = formats tags = row[fm['tags']] if tags: @@ -927,7 +933,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if mi.series: mi.series_index = row[fm['series_index']] mi.rating = row[fm['rating']] - id = idx if index_is_id else self.id(idx) mi.set_identifiers(self.get_identifiers(id, index_is_id=True)) mi.application_id = id mi.id = id @@ -1126,13 +1131,16 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if m: return m['mtime'] - def format_metadata(self, id_, fmt): + def format_metadata(self, id_, fmt, allow_cache=True): + if allow_cache and fmt in self.format_metadata_cache.get(id_, {}): + return self.format_metadata_cache[id_][fmt] path = self.format_abspath(id_, fmt, index_is_id=True) ans = {} if path is not None: stat = os.stat(path) ans['size'] = stat.st_size ans['mtime'] = utcfromtimestamp(stat.st_mtime) + self.format_metadata_cache[id_][fmt] = ans return ans def format_hash(self, id_, fmt): @@ -1254,6 +1262,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ret.name = f.name else: ret = f.read() + try: + self.format_metadata(index if index_is_id else self.id(index), + format, allow_cache=False) + except: + traceback.print_exc() return ret def add_format_with_hooks(self, index, format, fpath, index_is_id=False, diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index 1684b9f85b..f3d8370895 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -519,6 +519,41 @@ class BuiltinSelect(BuiltinFormatterFunction): return v[len(key)+1:] return '' +class BuiltinFormatsModtimes(BuiltinFormatterFunction): + name = 'formats_modtimes' + arg_count = 0 + category = 'Get values from metadata' + __doc__ = doc = _('formats_modtimes() -- return a comma-separated list of ' + 'colon_separated items representing modification times ' + 'for the formats of a book. You can use the select ' + 'function to get the mod time for a specific ' + 'format. Note that format names are always uppercase, ' + 'as in EPUB.' + ) + + def evaluate(self, formatter, kwargs, mi, locals): + fmt_data = mi.get('format_metadata', {}) + print fmt_data + return ','.join(k.upper()+':'+format_date(v['mtime'], 'iso') + for k,v in fmt_data.iteritems()) + +class BuiltinFormatsSizes(BuiltinFormatterFunction): + name = 'formats_sizes' + arg_count = 0 + category = 'Get values from metadata' + __doc__ = doc = _('formats_sizes() -- return a comma-separated list of ' + 'colon_separated items representing sizes ' + 'of the formats of a book. You can use the select ' + 'function to get the size for a specific ' + 'format. Note that format names are always uppercase, ' + 'as in EPUB.' + ) + + def evaluate(self, formatter, kwargs, mi, locals): + fmt_data = mi.get('format_metadata', {}) + print fmt_data + return ','.join(k.upper()+':'+str(v['size']) for k,v in fmt_data.iteritems()) + class BuiltinSublist(BuiltinFormatterFunction): name = 'sublist' arg_count = 4 @@ -814,6 +849,8 @@ builtin_eval = BuiltinEval() builtin_first_non_empty = BuiltinFirstNonEmpty() builtin_field = BuiltinField() builtin_format_date = BuiltinFormatDate() +builtin_formats_modt= BuiltinFormatsModtimes() +builtin_formats_size= BuiltinFormatsSizes() builtin_identifier_in_list = BuiltinIdentifierInList() builtin_ifempty = BuiltinIfempty() builtin_in_list = BuiltinInList() From 137093ebb9121b7a0f5bb8864b4c57b5ad68d9d5 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Thu, 30 Jun 2011 19:24:18 +0100 Subject: [PATCH 2/5] More changes for formats_ and numeric sorting --- src/calibre/library/caches.py | 9 +++- src/calibre/utils/formatter_functions.py | 66 +++++++++++++++++++----- 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index b9dd2f3ed7..dad9ce0bae 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -1024,7 +1024,14 @@ class SortKeyGenerator(object): dt = 'datetime' elif sb == 'number': try: - val = float(val) + val = val.replace(',', '').strip() + p = 1 + for i, candidate in enumerate( + (' B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB')): + if val.endswith(candidate): + p = 1024**(i) + val = val[:-len(candidate)] + val = float(val) * p except: val = 0.0 dt = 'float' diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index f3d8370895..f484c617b9 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import inspect, re, traceback +from calibre import human_readable from calibre.utils.titlecase import titlecase from calibre.utils.icu import capitalize, strcmp, sort_key from calibre.utils.date import parse_date, format_date, now, UNDEFINED_DATE @@ -521,20 +522,21 @@ class BuiltinSelect(BuiltinFormatterFunction): class BuiltinFormatsModtimes(BuiltinFormatterFunction): name = 'formats_modtimes' - arg_count = 0 + arg_count = 1 category = 'Get values from metadata' - __doc__ = doc = _('formats_modtimes() -- return a comma-separated list of ' - 'colon_separated items representing modification times ' - 'for the formats of a book. You can use the select ' - 'function to get the mod time for a specific ' - 'format. Note that format names are always uppercase, ' - 'as in EPUB.' + __doc__ = doc = _('formats_modtimes(date_format) -- return a comma-separated ' + 'list of colon_separated items representing modification times ' + 'for the formats of a book. The date_format parameter ' + 'specifies how the date is to be formatted. See the ' + 'date_format function for details. You can use the select ' + 'function to get the mod time for a specific ' + 'format. Note that format names are always uppercase, ' + 'as in EPUB.' ) - def evaluate(self, formatter, kwargs, mi, locals): + def evaluate(self, formatter, kwargs, mi, locals, fmt): fmt_data = mi.get('format_metadata', {}) - print fmt_data - return ','.join(k.upper()+':'+format_date(v['mtime'], 'iso') + return ','.join(k.upper()+':'+format_date(v['mtime'], fmt) for k,v in fmt_data.iteritems()) class BuiltinFormatsSizes(BuiltinFormatterFunction): @@ -551,9 +553,47 @@ class BuiltinFormatsSizes(BuiltinFormatterFunction): def evaluate(self, formatter, kwargs, mi, locals): fmt_data = mi.get('format_metadata', {}) - print fmt_data return ','.join(k.upper()+':'+str(v['size']) for k,v in fmt_data.iteritems()) +class BuiltinHumanReadable(BuiltinFormatterFunction): + name = 'human_readable' + arg_count = 1 + category = 'Formatting values' + __doc__ = doc = _('human_readable(v) -- return a string ' + 'representing the number v in KB, MB, GB, etc.' + ) + + def evaluate(self, formatter, kwargs, mi, locals, val): + try: + return human_readable(long(val)) + except: + return '' + +class BuiltinFormatNumber(BuiltinFormatterFunction): + name = 'format_number' + arg_count = 2 + category = 'Formatting values' + __doc__ = doc = _('format_number(v, template) -- format the number v using ' + 'a python formatting template such as "{0:5.2f}" or ' + '"{0:,d}" or "${0:5,.2f}". The field_name part of the ' + 'template must be a 0 (zero), as shown in the examples. See ' + 'the template language and python documentation for more ' + 'examples. Returns the empty string if formatting fails.' + ) + + def evaluate(self, formatter, kwargs, mi, locals, val, template): + if val == '' or val == 'None': + return '' + try: + return template.format(float(val)) + except: + pass + try: + return template.format(int(val)) + except: + pass + return '' + class BuiltinSublist(BuiltinFormatterFunction): name = 'sublist' arg_count = 4 @@ -626,7 +666,7 @@ class BuiltinSubitems(BuiltinFormatterFunction): class BuiltinFormatDate(BuiltinFormatterFunction): name = 'format_date' arg_count = 2 - category = 'Date functions' + category = 'Formatting values' __doc__ = doc = _('format_date(val, format_string) -- format the value, ' 'which must be a date, using the format_string, returning a string. ' 'The formatting codes are: ' @@ -849,8 +889,10 @@ builtin_eval = BuiltinEval() builtin_first_non_empty = BuiltinFirstNonEmpty() builtin_field = BuiltinField() builtin_format_date = BuiltinFormatDate() +builtin_format_numb = BuiltinFormatNumber() builtin_formats_modt= BuiltinFormatsModtimes() builtin_formats_size= BuiltinFormatsSizes() +builtin_human_rable = BuiltinHumanReadable() builtin_identifier_in_list = BuiltinIdentifierInList() builtin_ifempty = BuiltinIfempty() builtin_in_list = BuiltinInList() From d967da0b30b04b7c352eb9210c805dbcad606e37 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 1 Jul 2011 11:07:46 +0100 Subject: [PATCH 3/5] Clear the format metadata cache for a book when a format is deleted. --- src/calibre/library/database2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 530e5d8adf..23642bcec7 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -1333,6 +1333,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def remove_format(self, index, format, index_is_id=False, notify=True, commit=True, db_only=False): id = index if index_is_id else self.id(index) + del self.format_metadata_cache[id] name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False) if name: if not db_only: From 13d17b1f11b195dc7648dd5dc75a98dcf5235a0c Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 1 Jul 2011 11:22:52 +0100 Subject: [PATCH 4/5] Documentation of the new formatter functions --- src/calibre/manual/template_lang.rst | 4 ++++ src/calibre/utils/formatter_functions.py | 12 ++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/calibre/manual/template_lang.rst b/src/calibre/manual/template_lang.rst index f9824187e5..52bf095d01 100644 --- a/src/calibre/manual/template_lang.rst +++ b/src/calibre/manual/template_lang.rst @@ -124,6 +124,8 @@ The functions available are listed below. Note that the definitive documentation * ``capitalize()`` -- return the value with the first letter upper case and the rest lower case. * ``contains(pattern, text if match, text if not match)`` -- checks if field contains matches for the regular expression `pattern`. Returns `text if match` if matches are found, otherwise it returns `text if no match`. * ``count(separator)`` -- interprets the value as a list of items separated by `separator`, returning the number of items in the list. Most lists use a comma as the separator, but authors uses an ampersand. Examples: `{tags:count(,)}`, `{authors:count(&)}` + * ``format_number(template)`` -- interprets the value as a number and format that number using a python formatting template such as "{0:5.2f}" or "{0:,d}" or "${0:5,.2f}". The field_name part of the template must be a 0 (zero) (the "{0:" in the above examples). See the template language and python documentation for more examples. Returns the empty string if formatting fails. + * ``human_readable()`` -- expects the value to be a number and returns a string representing that number in KB, MB, GB, etc. * ``ifempty(text)`` -- if the field is not empty, return the value of the field. Otherwise return `text`. * ``in_list(separator, pattern, found_val, not_found_val)`` -- interpret the field as a list of items separated by `separator`, comparing the `pattern` against each value in the list. If the pattern matches a value, return `found_val`, otherwise return `not_found_val`. * ``list_item(index, separator)`` -- interpret the field as a list of items separated by `separator`, returning the `index`th item. The first item is number zero. The last item can be returned using `list_item(-1,separator)`. If the item is not in the list, then the empty value is returned. The separator has the same meaning as in the `count` function. @@ -257,6 +259,8 @@ The following functions are available in addition to those described in single-f iso : the date with time and timezone. Must be the only format present. * ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables. + * ``formats_modtimes(date_format)`` -- return a comma-separated list of colon_separated items representing modification times for the formats of a book. The date_format parameter specifies how the date is to be formatted. See the date_format function for details. You can use the select function to get the mod time for a specific format. Note that format names are always uppercase, as in EPUB. + * ``formats_sizes()`` -- return a comma-separated list of colon_separated items representing sizes in bytes of the formats of a book. You can use the select function to get the size for a specific format. Note that format names are always uppercase, as in EPUB. * ``not(value)`` -- returns the string "1" if the value is empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want. * ``merge_lists(list1, list2, separator)`` -- return a list made by merging the items in list1 and list2, removing duplicate items using a case-insensitive compare. If items differ in case, the one in list1 is used. The items in list1 and list2 are separated by separator, as are the items in the returned list. * ``multiply(x, y)`` -- returns x * y. Throws an exception if either x or y are not numbers. diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index f484c617b9..654e171339 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -544,7 +544,7 @@ class BuiltinFormatsSizes(BuiltinFormatterFunction): arg_count = 0 category = 'Get values from metadata' __doc__ = doc = _('formats_sizes() -- return a comma-separated list of ' - 'colon_separated items representing sizes ' + 'colon_separated items representing sizes in bytes' 'of the formats of a book. You can use the select ' 'function to get the size for a specific ' 'format. Note that format names are always uppercase, ' @@ -574,11 +574,11 @@ class BuiltinFormatNumber(BuiltinFormatterFunction): arg_count = 2 category = 'Formatting values' __doc__ = doc = _('format_number(v, template) -- format the number v using ' - 'a python formatting template such as "{0:5.2f}" or ' - '"{0:,d}" or "${0:5,.2f}". The field_name part of the ' - 'template must be a 0 (zero), as shown in the examples. See ' - 'the template language and python documentation for more ' - 'examples. Returns the empty string if formatting fails.' + 'a python formatting template such as "{0:5.2f}" or ' + '"{0:,d}" or "${0:5,.2f}". The field_name part of the ' + 'template must be a 0 (zero) (the "{0:" in the above examples). ' + 'See the template language and python documentation for more ' + 'examples. Returns the empty string if formatting fails.' ) def evaluate(self, formatter, kwargs, mi, locals, val, template): From 92ad849bc6c971a6935921d8946e7c696a651a75 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 2 Jul 2011 17:47:48 +0100 Subject: [PATCH 5/5] Minor cleanup of the function list. Get rid of the useless variable declarations. --- src/calibre/utils/formatter_functions.py | 66 ++++++------------------ 1 file changed, 16 insertions(+), 50 deletions(-) diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index fa15498d1f..6916b0903a 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -886,56 +886,22 @@ class BuiltinDaysBetween(BuiltinFormatterFunction): i = d1 - d2 return str('%d.%d'%(i.days, i.seconds/8640)) - -builtin_add = BuiltinAdd() -builtin_and = BuiltinAnd() -builtin_assign = BuiltinAssign() -builtin_booksize = BuiltinBooksize() -builtin_capitalize = BuiltinCapitalize() -builtin_cmp = BuiltinCmp() -builtin_contains = BuiltinContains() -builtin_count = BuiltinCount() -builtin_days_between= BuiltinDaysBetween() -builtin_divide = BuiltinDivide() -builtin_eval = BuiltinEval() -builtin_first_non_empty = BuiltinFirstNonEmpty() -builtin_field = BuiltinField() -builtin_format_date = BuiltinFormatDate() -builtin_format_numb = BuiltinFormatNumber() -builtin_formats_modt= BuiltinFormatsModtimes() -builtin_formats_size= BuiltinFormatsSizes() -builtin_has_cover = BuiltinHasCover() -builtin_human_rable = BuiltinHumanReadable() -builtin_identifier_in_list = BuiltinIdentifierInList() -builtin_ifempty = BuiltinIfempty() -builtin_in_list = BuiltinInList() -builtin_list_item = BuiltinListitem() -builtin_lookup = BuiltinLookup() -builtin_lowercase = BuiltinLowercase() -builtin_merge_lists = BuiltinMergeLists() -builtin_multiply = BuiltinMultiply() -builtin_not = BuiltinNot() -builtin_ondevice = BuiltinOndevice() -builtin_or = BuiltinOr() -builtin_print = BuiltinPrint() -builtin_raw_field = BuiltinRawField() -builtin_re = BuiltinRe() -builtin_select = BuiltinSelect() -builtin_shorten = BuiltinShorten() -builtin_strcat = BuiltinStrcat() -builtin_strcmp = BuiltinStrcmp() -builtin_str_in_list = BuiltinStrInList() -builtin_subitems = BuiltinSubitems() -builtin_sublist = BuiltinSublist() -builtin_substr = BuiltinSubstr() -builtin_subtract = BuiltinSubtract() -builtin_swaparound = BuiltinSwapAroundComma() -builtin_switch = BuiltinSwitch() -builtin_template = BuiltinTemplate() -builtin_test = BuiltinTest() -builtin_titlecase = BuiltinTitlecase() -builtin_today = BuiltinToday() -builtin_uppercase = BuiltinUppercase() +formatter_builtins = [ + BuiltinAdd(), BuiltinAnd(), BuiltinAssign(), BuiltinBooksize(), + BuiltinCapitalize(), BuiltinCmp(), BuiltinContains(), BuiltinCount(), + BuiltinDaysBetween(), BuiltinDivide(), BuiltinEval(), + BuiltinFirstNonEmpty(), BuiltinField(), BuiltinFormatDate(), + BuiltinFormatNumber(), BuiltinFormatsModtimes(), BuiltinFormatsSizes(), + BuiltinHasCover(), BuiltinHumanReadable(), BuiltinIdentifierInList(), + BuiltinIfempty(), BuiltinInList(), BuiltinListitem(), BuiltinLookup(), + BuiltinLowercase(), BuiltinMergeLists(), BuiltinMultiply(), BuiltinNot(), + BuiltinOndevice(), BuiltinOr(), BuiltinPrint(), BuiltinRawField(), + BuiltinRe(), BuiltinSelect(), BuiltinShorten(), BuiltinStrcat(), + BuiltinStrcmp(), BuiltinStrInList(), BuiltinSubitems(), BuiltinSublist(), + BuiltinSubstr(), BuiltinSubtract(), BuiltinSwapAroundComma(), + BuiltinSwitch(), BuiltinTemplate(), BuiltinTest(), BuiltinTitlecase(), + BuiltinToday(), BuiltinUppercase(), +] class FormatterUserFunction(FormatterFunction): def __init__(self, name, doc, arg_count, program_text):