diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index dbc28a4200..18ef989988 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -10,7 +10,8 @@ __docformat__ = 'restructuredtext en' import os, traceback, random, shutil, re from io import BytesIO from collections import defaultdict -from functools import wraps, partial +from functools import wraps +from future_builtins import zip from calibre import isbytestring from calibre.constants import iswindows, preferred_encoding @@ -19,7 +20,7 @@ from calibre.db import SPOOL_SIZE, _get_next_series_num_for_list from calibre.db.categories import get_categories from calibre.db.locking import create_locks from calibre.db.errors import NoSuchFormat -from calibre.db.fields import create_field +from calibre.db.fields import create_field, IDENTITY from calibre.db.search import Search from calibre.db.tables import VirtualTable from calibre.db.write import get_series_values @@ -804,42 +805,59 @@ class Cache(object): ascending=True or False). The most significant field is the first 2-tuple. ''' - all_book_ids = frozenset(self._all_book_ids() if ids_to_sort is None - else ids_to_sort) - ids_to_sort = all_book_ids if ids_to_sort is None else ids_to_sort + ids_to_sort = self._all_book_ids() if ids_to_sort is None else ids_to_sort get_metadata = self._get_proxy_metadata lang_map = self.fields['languages'].book_value_map virtual_fields = virtual_fields or {} fm = {'title':'sort', 'authors':'author_sort'} - def sort_key(field): - 'Handle series type fields' + def sort_key_func(field): + 'Handle series type fields, virtual fields and the id field' idx = field + '_index' is_series = idx in self.fields try: - ans = self.fields[fm.get(field, field)].sort_keys_for_books( - get_metadata, lang_map, all_book_ids) + func = self.fields[fm.get(field, field)].sort_keys_for_books(get_metadata, lang_map) except KeyError: if field == 'id': - ans = {bid:bid for bid in all_book_ids} + return IDENTITY else: - ans = virtual_fields[fm.get(field, field)].sort_keys_for_books( - get_metadata, lang_map, all_book_ids) + return virtual_fields[fm.get(field, field)].sort_keys_for_books(get_metadata, lang_map) if is_series: - idx_ans = self.fields[idx].sort_keys_for_books( - get_metadata, lang_map, all_book_ids) - ans = {k:(v, idx_ans[k]) for k, v in ans.iteritems()} - return ans + idx_func = self.fields[idx].sort_keys_for_books(get_metadata, lang_map) + def skf(book_id): + return (func(book_id), idx_func(book_id)) + return skf + return func - sort_keys = tuple(sort_key(field[0]) for field in fields) + if len(fields) == 1: + return sorted(ids_to_sort, key=sort_key_func(fields[0][0]), + reverse=not fields[0][1]) + sort_key_funcs = tuple(sort_key_func(field) for field, order in fields) + orders = tuple(1 if order else -1 for _, order in fields) + Lazy = object() # Lazy load the sort keys for sub-sort fields - if len(sort_keys) == 1: - sk = sort_keys[0] - return sorted(ids_to_sort, key=lambda i:sk[i], reverse=not - fields[0][1]) - else: - return sorted(ids_to_sort, key=partial(SortKey, fields, sort_keys)) + class SortKey(object): + + __slots__ = ('book_id', 'sort_key') + + def __init__(self, book_id): + self.book_id = book_id + # Calculate only the first sub-sort key since that will always be used + self.sort_key = [key(book_id) if i == 0 else Lazy for i, key in enumerate(sort_key_funcs)] + + def __cmp__(self, other): + for i, (order, self_key, other_key) in enumerate(zip(orders, self.sort_key, other.sort_key)): + if self_key is Lazy: + self_key = self.sort_key[i] = sort_key_funcs[i](self.book_id) + if other_key is Lazy: + other_key = other.sort_key[i] = sort_key_funcs[i](other.book_id) + ans = cmp(self_key, other_key) + if ans != 0: + return ans * order + return 0 + + return sorted(ids_to_sort, key=SortKey) @read_api def search(self, query, restriction='', virtual_fields=None, book_ids=None): @@ -1713,17 +1731,3 @@ class Cache(object): # }}} -class SortKey(object): # {{{ - - def __init__(self, fields, sort_keys, book_id): - self.orders = tuple(1 if f[1] else -1 for f in fields) - self.sort_key = tuple(sk[book_id] for sk in sort_keys) - - def __cmp__(self, other): - for i, order in enumerate(self.orders): - ans = cmp(self.sort_key[i], other.sort_key[i]) - if ans != 0: - return ans * order - return 0 -# }}} - diff --git a/src/calibre/db/fields.py b/src/calibre/db/fields.py index fc7bee2c51..b59fc58608 100644 --- a/src/calibre/db/fields.py +++ b/src/calibre/db/fields.py @@ -25,6 +25,8 @@ from calibre.utils.localization import calibre_langcode_to_name def bool_sort_key(bools_are_tristate): return (lambda x:{True: 1, False: 2, None: 3}.get(x, 3)) if bools_are_tristate else lambda x:{True: 1, False: 2, None: 2}.get(x, 2) +IDENTITY = lambda x: x + class Field(object): is_many = False @@ -36,7 +38,7 @@ class Field(object): dt = self.metadata['datatype'] self.has_text_data = dt in {'text', 'comments', 'series', 'enumeration'} self.table_type = self.table.table_type - self._sort_key = (sort_key if dt in ('text', 'series', 'enumeration') else lambda x: x) + self._sort_key = (sort_key if dt in ('text', 'series', 'enumeration') else IDENTITY) # This will be compared to the output of sort_key() which is a # bytestring, therefore it is safer to have it be a bytestring. @@ -112,12 +114,11 @@ class Field(object): ''' return iter(()) - def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids): + def sort_keys_for_books(self, get_metadata, lang_map): ''' - Return a mapping of book_id -> sort_key. The sort key is suitable for + Return a function that maps book_id to sort_key. The sort key is suitable for use in sorting the list of all books by this field, via the python cmp - method. all_book_ids is the list/set of book ids for which sort_keys - should be generated. + method. ''' raise NotImplementedError() @@ -165,9 +166,13 @@ class OneToOneField(Field): def __iter__(self): return self.table.book_col_map.iterkeys() - def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids): - return {id_: self._sort_key(self.table.book_col_map.get(id_, - self._default_sort_key)) for id_ in all_book_ids} + def sort_keys_for_books(self, get_metadata, lang_map): + bcmg = self.table.book_col_map.get + dk = self._default_sort_key + sk = self._sort_key + if sk is IDENTITY: + return lambda book_id:bcmg(book_id, dk) + return lambda book_id:sk(bcmg(book_id, dk)) def iter_searchable_values(self, get_metadata, candidates, default_value=None): cbm = self.table.book_col_map @@ -263,9 +268,12 @@ class CompositeField(OneToOneField): self._render_cache[book_id] = ans return ans - def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids): - return {id_: self._sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in - all_book_ids} + def sort_keys_for_books(self, get_metadata, lang_map): + gv = self.get_value_with_cache + sk = self._sort_key + if sk is IDENTITY: + return lambda book_id:gv(book_id, get_metadata) + return lambda book_id:sk(gv(book_id, get_metadata)) def iter_searchable_values(self, get_metadata, candidates, default_value=None): val_map = defaultdict(set) @@ -362,9 +370,8 @@ class OnDeviceField(OneToOneField): def __iter__(self): return iter(()) - def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids): - return {id_: self.for_book(id_) for id_ in - all_book_ids} + def sort_keys_for_books(self, get_metadata, lang_map): + return self.for_book def iter_searchable_values(self, get_metadata, candidates, default_value=None): val_map = defaultdict(set) @@ -373,6 +380,27 @@ class OnDeviceField(OneToOneField): for val, book_ids in val_map.iteritems(): yield val, book_ids +class LazySortMap(object): + + __slots__ = ('default_sort_key', 'sort_key_func', 'id_map', 'cache') + + def __init__(self, default_sort_key, sort_key_func, id_map): + self.default_sort_key = default_sort_key + self.sort_key_func = sort_key_func + self.id_map = id_map + self.cache = {None:default_sort_key} + + def __call__(self, item_id): + try: + return self.cache[item_id] + except KeyError: + try: + val = self.cache[item_id] = self.sort_key_func(self.id_map[item_id]) + except KeyError: + val = self.cache[item_id] = self.default_sort_key + return val + + class ManyToOneField(Field): is_many = True @@ -397,13 +425,10 @@ class ManyToOneField(Field): def __iter__(self): return self.table.id_map.iterkeys() - def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids): - ans = {id_: self.table.book_col_map.get(id_, None) - for id_ in all_book_ids} - sk_map = {cid: (self._default_sort_key if cid is None else - self._sort_key(self.table.id_map[cid])) - for cid in ans.itervalues()} - return {id_: sk_map[cid] for id_, cid in ans.iteritems()} + def sort_keys_for_books(self, get_metadata, lang_map): + sk_map = LazySortMap(self._default_sort_key, self._sort_key, self.table.id_map) + bcmg = self.table.book_col_map.get + return lambda book_id:sk_map(bcmg(book_id, None)) def iter_searchable_values(self, get_metadata, candidates, default_value=None): cbm = self.table.col_book_map @@ -447,17 +472,17 @@ class ManyToManyField(Field): def __iter__(self): return self.table.id_map.iterkeys() - def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids): - ans = {id_: self.table.book_col_map.get(id_, ()) - for id_ in all_book_ids} - all_cids = set() - for cids in ans.itervalues(): - all_cids = all_cids.union(set(cids)) - sk_map = {cid: self._sort_key(self.table.id_map[cid]) for cid in all_cids} - sort_func = (lambda x:tuple(sorted(x))) if self.sort_sort_key else tuple - return {id_: (sort_func(sk_map[cid] for cid in cids) if cids else - (self._default_sort_key,)) - for id_, cids in ans.iteritems()} + def sort_keys_for_books(self, get_metadata, lang_map): + sk_map = LazySortMap(self._default_sort_key, self._sort_key, self.table.id_map) + bcmg = self.table.book_col_map.get + dsk = (self._default_sort_key,) + if self.sort_sort_key: + def sk(book_id): + return tuple(sorted(sk_map(x) for x in bcmg(book_id, ()))) or dsk + else: + def sk(book_id): + return tuple(sk_map(x) for x in bcmg(book_id, ())) or dsk + return sk def iter_searchable_values(self, get_metadata, candidates, default_value=None): cbm = self.table.col_book_map @@ -491,13 +516,11 @@ class IdentifiersField(ManyToManyField): ids = default_value return ids - def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids): + def sort_keys_for_books(self, get_metadata, lang_map): 'Sort by identifier keys' - ans = {id_: self.table.book_col_map.get(id_, ()) - for id_ in all_book_ids} - return {id_: (tuple(sorted(cids.iterkeys())) if cids else - (self._default_sort_key,)) - for id_, cids in ans.iteritems()} + bcmg = self.table.book_col_map.get + dv = {self._default_sort_key:None} + return lambda book_id: tuple(sorted(bcmg(book_id, dv).iterkeys())) def iter_searchable_values(self, get_metadata, candidates, default_value=()): bcm = self.table.book_col_map @@ -566,22 +589,43 @@ class FormatsField(ManyToManyField): ans.append(c) return ans +class LazySeriesSortMap(object): + + __slots__ = ('default_sort_key', 'sort_key_func', 'id_map', 'cache') + + def __init__(self, default_sort_key, sort_key_func, id_map): + self.default_sort_key = default_sort_key + self.sort_key_func = sort_key_func + self.id_map = id_map + self.cache = {} + + def __call__(self, item_id, lang): + try: + return self.cache[(item_id, lang)] + except KeyError: + try: + val = self.cache[(item_id, lang)] = self.sort_key_func(self.id_map[item_id], lang) + except KeyError: + val = self.cache[(item_id, lang)] = self.default_sort_key + return val + class SeriesField(ManyToOneField): - def sort_key_for_series(self, book_id, lang_map, series_sort_order): - sid = self.table.book_col_map.get(book_id, None) - if sid is None: - return self._default_sort_key - lang = lang_map.get(book_id, None) or None - if lang: - lang = lang[0] - return self._sort_key(title_sort(self.table.id_map[sid], - order=series_sort_order, lang=lang)) - - def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids): + def sort_keys_for_books(self, get_metadata, lang_map): sso = tweaks['title_series_sorting'] - return {book_id:self.sort_key_for_series(book_id, lang_map, sso) for book_id - in all_book_ids} + ssk = self._sort_key + ts = title_sort + def sk(val, lang): + return ssk(ts(val, order=sso, lang=lang)) + sk_map = LazySeriesSortMap(self._default_sort_key, sk, self.table.id_map) + bcmg = self.table.book_col_map.get + lang_map = {k:v[0] if v else None for k, v in lang_map.iteritems()} + + def key(book_id): + lang = lang_map.get(book_id, None) + return sk_map(bcmg(book_id, None), lang) + + return key def category_sort_value(self, item_id, book_ids, lang_map): lang = None diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index 2dcd519cad..101d1b12cc 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -126,6 +126,7 @@ class ReadingTest(BaseTest): def test_sorting(self): # {{{ 'Test sorting' cache = self.init_cache() + ae = self.assertEqual for field, order in { 'title' : [2, 1, 3], 'authors': [2, 1, 3], @@ -151,49 +152,63 @@ class ReadingTest(BaseTest): '#comments':[3, 2, 1], }.iteritems(): x = list(reversed(order)) - self.assertEqual(order, cache.multisort([(field, True)], + ae(order, cache.multisort([(field, True)], ids_to_sort=x), 'Ascending sort of %s failed'%field) - self.assertEqual(x, cache.multisort([(field, False)], + ae(x, cache.multisort([(field, False)], ids_to_sort=order), 'Descending sort of %s failed'%field) - # Test subsorting - self.assertEqual([3, 2, 1], cache.multisort([('identifiers', True), - ('title', True)]), 'Subsort failed') - # Test sorting of is_multiple fields. # Author like fields should be sorted by generating sort names from the # actual values in entry order for field in ('authors', '#authors'): - self.assertEqual( + ae( cache.set_field(field, {1:('aa bb', 'bb cc', 'cc dd'), 2:('bb aa', 'xx yy'), 3: ('aa bb', 'bb aa')}), {1, 2, 3}) - self.assertEqual([2, 3, 1], cache.multisort([(field, True)], ids_to_sort=(1, 2, 3))) - self.assertEqual([1, 3, 2], cache.multisort([(field, False)], ids_to_sort=(1, 2, 3))) + ae([2, 3, 1], cache.multisort([(field, True)], ids_to_sort=(1, 2, 3))) + ae([1, 3, 2], cache.multisort([(field, False)], ids_to_sort=(1, 2, 3))) # All other is_multiple fields should be sorted by sorting the values # for each book and using that as the sort key for field in ('tags', '#tags'): - self.assertEqual( + ae( cache.set_field(field, {1:('b', 'a'), 2:('c', 'y'), 3: ('b', 'z')}), {1, 2, 3}) - self.assertEqual([1, 3, 2], cache.multisort([(field, True)], ids_to_sort=(1, 2, 3))) - self.assertEqual([2, 3, 1], cache.multisort([(field, False)], ids_to_sort=(1, 2, 3))) + ae([1, 3, 2], cache.multisort([(field, True)], ids_to_sort=(1, 2, 3))) + ae([2, 3, 1], cache.multisort([(field, False)], ids_to_sort=(1, 2, 3))) # Test tweak to sort dates by visible format from calibre.utils.date import parse_only_date as p from calibre.utils.config_base import Tweak - self.assertEqual(cache.set_field('pubdate', {1:p('2001-3-3'), 2:p('2002-2-3'), 3:p('2003-1-3')}), {1, 2, 3}) - self.assertEqual([1, 2, 3], cache.multisort([('pubdate', True)])) + ae(cache.set_field('pubdate', {1:p('2001-3-3'), 2:p('2002-2-3'), 3:p('2003-1-3')}), {1, 2, 3}) + ae([1, 2, 3], cache.multisort([('pubdate', True)])) with Tweak('gui_pubdate_display_format', 'MMM'), Tweak('sort_dates_using_visible_fields', True): c2 = self.init_cache() - self.assertEqual([3, 2, 1], c2.multisort([('pubdate', True)])) + ae([3, 2, 1], c2.multisort([('pubdate', True)])) # Test bool sorting when not tristate cache.set_pref('bools_are_tristate', False) c2 = self.init_cache() - self.assertEqual([2, 3, 1], c2.multisort([('#yesno', True), ('id', False)])) + ae([2, 3, 1], c2.multisort([('#yesno', True), ('id', False)])) + # Test subsorting + ae([3, 2, 1], cache.multisort([('identifiers', True), + ('title', True)]), 'Subsort failed') + from calibre.ebooks.metadata.book.base import Metadata + for i in xrange(7): + cache.create_book_entry(Metadata('title%d' % i), apply_import_tags=False) + cache.create_custom_column('one', 'CC1', 'int', False) + cache.create_custom_column('two', 'CC2', 'int', False) + cache.create_custom_column('three', 'CC3', 'int', False) + cache.close() + cache = self.init_cache() + cache.set_field('#one', {(i+(5*m)):m for m in (0, 1) for i in xrange(1, 6)}) + cache.set_field('#two', {i+(m*3):m for m in (0, 1, 2) for i in (1, 2, 3)}) + cache.set_field('#two', {10:2}) + cache.set_field('#three', {i:i for i in xrange(1, 11)}) + ae(list(xrange(1, 11)), cache.multisort([('#one', True), ('#two', True)], ids_to_sort=sorted(cache.all_book_ids()))) + ae([4, 5, 1, 2, 3, 7,8, 9, 10, 6], cache.multisort([('#one', True), ('#two', False)], ids_to_sort=sorted(cache.all_book_ids()))) + ae([5, 4, 3, 2, 1, 10, 9, 8, 7, 6], cache.multisort([('#one', True), ('#two', False), ('#three', False)], ids_to_sort=sorted(cache.all_book_ids()))) # }}} def test_get_metadata(self): # {{{ diff --git a/src/calibre/db/view.py b/src/calibre/db/view.py index 43243318d5..04f205f21f 100644 --- a/src/calibre/db/view.py +++ b/src/calibre/db/view.py @@ -30,8 +30,9 @@ class MarkedVirtualField(object): for book_id in candidates: yield self.marked_ids.get(book_id, default_value), {book_id} - def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids): - return {bid:self.marked_ids.get(bid, None) for bid in all_book_ids} + def sort_keys_for_books(self, get_metadata, lang_map): + g = self.marked_ids.get + return lambda book_id:g(book_id, None) class TableRow(object):