New db backend: Sorting implemented with tests

This commit is contained in:
Kovid Goyal 2011-09-07 16:07:37 -06:00
parent fe27e08a83
commit 26a9197cfb
4 changed files with 148 additions and 25 deletions

View File

@ -200,7 +200,12 @@ class Cache(object):
``book_id``. If no such book exists or it has no defined value for the ``book_id``. If no such book exists or it has no defined value for the
field ``name`` or no such field exists, then ``default_value`` is returned. field ``name`` or no such field exists, then ``default_value`` is returned.
The returned value for is_multiple fields are always tuples. default_values is not used for title, title_sort, authors, author_sort
and series_index. This is because these always have values in the db.
default_value is used for all custom columns.
The returned value for is_multiple fields are always tuples, unless
default_value is returned.
''' '''
if self.composites and name in self.composites: if self.composites and name in self.composites:
return self.composite_for(name, book_id, return self.composite_for(name, book_id,
@ -254,7 +259,7 @@ class Cache(object):
''' '''
Frozen set of all known book ids. Frozen set of all known book ids.
''' '''
return frozenset(self.fields['uuid'].iter_book_ids()) return frozenset(self.fields['uuid'])
@read_api @read_api
def all_field_ids(self, name): def all_field_ids(self, name):
@ -348,17 +353,37 @@ class Cache(object):
@read_api @read_api
def multisort(self, fields, ids_to_sort=None): def multisort(self, fields, ids_to_sort=None):
'''
Return a list of sorted book ids. If ids_to_sort is None, all book ids
are returned.
fields must be a list of 2-tuples of the form (field_name,
ascending=True or False). The most significant field is the first
2-tuple.
'''
all_book_ids = frozenset(self._all_book_ids() if ids_to_sort is None all_book_ids = frozenset(self._all_book_ids() if ids_to_sort is None
else ids_to_sort) else ids_to_sort)
get_metadata = partial(self._get_metadata, get_user_categories=False) get_metadata = partial(self._get_metadata, get_user_categories=False)
sort_keys = tuple(self.fields[field[0]].sort_keys_for_books(get_metadata, fm = {'title':'sort', 'authors':'author_sort'}
all_book_ids) for field in fields)
def sort_key(field):
'Handle series type fields'
ans = self.fields[fm.get(field, field)].sort_keys_for_books(get_metadata,
all_book_ids)
idx = field + '_index'
if idx in self.fields:
idx_ans = self.fields[idx].sort_keys_for_books(get_metadata,
all_book_ids)
ans = {k:(v, idx_ans[k]) for k, v in ans.iteritems()}
return ans
sort_keys = tuple(sort_key(field[0]) for field in fields)
if len(sort_keys) == 1: if len(sort_keys) == 1:
sk = sort_keys[0] sk = sort_keys[0]
return sorted(all_book_ids, key=lambda i:sk[i], reverse=not return sorted(all_book_ids, key=lambda i:sk[i], reverse=not
fields[1]) fields[0][1])
else: else:
return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys)) return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys))

View File

@ -2,7 +2,7 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
from future_builtins import map #from future_builtins import map
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -12,6 +12,8 @@ from threading import Lock
from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
from calibre.utils.date import UNDEFINED_DATE
from calibre.utils.localization import calibre_langcode_to_name
class Field(object): class Field(object):
@ -21,7 +23,16 @@ class Field(object):
'series', 'enumeration') 'series', 'enumeration')
self.table_type = self.table.table_type self.table_type = self.table.table_type
dt = self.metadata['datatype'] dt = self.metadata['datatype']
self._sort_key = (sort_key if dt == 'text' else lambda x: x) self._sort_key = (sort_key if dt in ('text', 'series', 'enumeration') else lambda x: x)
self._default_sort_key = ''
if self.metadata['datatype'] in ('int', 'float', 'rating'):
self._default_sort_key = 0
elif self.metadata['datatype'] == 'bool':
self._default_sort_key = None
elif self.metadata['datatype'] == 'datetime':
self._default_sort_key = UNDEFINED_DATE
if self.name == 'languages':
self._sort_key = lambda x:sort_key(calibre_langcode_to_name(x))
@property @property
def metadata(self): def metadata(self):
@ -63,7 +74,8 @@ class Field(object):
''' '''
Return a mapping of book_id -> sort_key. The sort key is suitable for Return a mapping of book_id -> sort_key. The sort key is suitable for
use in sorting the list of all books by this field, via the python cmp use in sorting the list of all books by this field, via the python cmp
method. method. all_book_ids is the list/set of book ids for which sort_keys
should be generated.
''' '''
raise NotImplementedError() raise NotImplementedError()
@ -83,8 +95,8 @@ class OneToOneField(Field):
return self.table.book_col_map.iterkeys() return self.table.book_col_map.iterkeys()
def sort_keys_for_books(self, get_metadata, all_book_ids): def sort_keys_for_books(self, get_metadata, all_book_ids):
return {id_ : self._sort_key(self.book_col_map.get(id_, '')) for id_ in return {id_ : self._sort_key(self.table.book_col_map.get(id_,
all_book_ids} self._default_sort_key)) for id_ in all_book_ids}
class CompositeField(OneToOneField): class CompositeField(OneToOneField):
@ -182,10 +194,12 @@ class ManyToOneField(Field):
return self.table.id_map.iterkeys() return self.table.id_map.iterkeys()
def sort_keys_for_books(self, get_metadata, all_book_ids): def sort_keys_for_books(self, get_metadata, all_book_ids):
keys = {id_ : self._sort_key(self.table.id_map.get(id_, '')) for id_ in ans = {id_ : self.table.book_col_map.get(id_, None)
all_book_ids} for id_ in all_book_ids}
return {id_ : keys.get( sk_map = {cid : (self._default_sort_key if cid is None else
self.book_col_map.get(id_, None), '') for id_ in all_book_ids} self._sort_key(self.table.id_map[cid]))
for cid in ans.itervalues()}
return {id_ : sk_map[cid] for id_, cid in ans.iteritems()}
class ManyToManyField(Field): class ManyToManyField(Field):
@ -211,16 +225,17 @@ class ManyToManyField(Field):
return self.table.id_map.iterkeys() return self.table.id_map.iterkeys()
def sort_keys_for_books(self, get_metadata, all_book_ids): def sort_keys_for_books(self, get_metadata, all_book_ids):
keys = {id_ : self._sort_key(self.table.id_map.get(id_, '')) for id_ in ans = {id_ : self.table.book_col_map.get(id_, ())
all_book_ids} for id_ in all_book_ids}
all_cids = set()
for cids in ans.itervalues():
all_cids = all_cids.union(set(cids))
sk_map = {cid : self._sort_key(self.table.id_map[cid])
for cid in all_cids}
return {id_ : (tuple(sk_map[cid] for cid in cids) if cids else
(self._default_sort_key,))
for id_, cids in ans.iteritems()}
def sort_key_for_book(book_id):
item_ids = self.table.book_col_map.get(book_id, ())
if self.alphabetical_sort:
item_ids = sorted(item_ids, key=keys.get)
return tuple(map(keys.get, item_ids))
return {id_ : sort_key_for_book(id_) for id_ in all_book_ids}
class IdentifiersField(ManyToManyField): class IdentifiersField(ManyToManyField):
@ -230,6 +245,15 @@ class IdentifiersField(ManyToManyField):
ids = default_value ids = default_value
return ids return ids
def sort_keys_for_books(self, get_metadata, all_book_ids):
'Sort by identifier keys'
ans = {id_ : self.table.book_col_map.get(id_, ())
for id_ in all_book_ids}
return {id_ : (tuple(sorted(cids.iterkeys())) if cids else
(self._default_sort_key,))
for id_, cids in ans.iteritems()}
class AuthorsField(ManyToManyField): class AuthorsField(ManyToManyField):
def author_data(self, author_id): def author_data(self, author_id):

Binary file not shown.

View File

@ -39,8 +39,40 @@ class ReadingTest(unittest.TestCase):
shutil.rmtree(self.library_path) shutil.rmtree(self.library_path)
def test_read(self): # {{{ def test_read(self): # {{{
'Test the reading of data from the database'
cache = init_cache(self.library_path) cache = init_cache(self.library_path)
tests = { tests = {
3 : {
'title': 'Unknown',
'sort': 'Unknown',
'authors': ('Unknown',),
'author_sort': 'Unknown',
'series' : None,
'series_index': 1.0,
'rating': None,
'tags': None,
'identifiers': None,
'timestamp': datetime.datetime(2011, 9, 7, 13, 54, 41,
tzinfo=local_tz),
'pubdate': datetime.datetime(2011, 9, 7, 13, 54, 41,
tzinfo=local_tz),
'last_modified': datetime.datetime(2011, 9, 7, 13, 54, 41,
tzinfo=local_tz),
'publisher': None,
'languages': None,
'comments': None,
'#enum': None,
'#authors':None,
'#date':None,
'#rating':None,
'#series':None,
'#series_index': None,
'#tags':None,
'#yesno':None,
'#comments': None,
},
2 : { 2 : {
'title': 'Title One', 'title': 'Title One',
'sort': 'One', 'sort': 'One',
@ -74,10 +106,10 @@ class ReadingTest(unittest.TestCase):
'sort': 'Title Two', 'sort': 'Title Two',
'authors': ('Author Two', 'Author One'), 'authors': ('Author Two', 'Author One'),
'author_sort': 'Two, Author & One, Author', 'author_sort': 'Two, Author & One, Author',
'series' : 'Series Two', 'series' : 'Series One',
'series_index': 2.0, 'series_index': 2.0,
'rating': 6.0, 'rating': 6.0,
'tags': ('Tag Two',), 'tags': ('Tag One',),
'identifiers': {'test':'two'}, 'identifiers': {'test':'two'},
'timestamp': datetime.datetime(2011, 9, 6, 0, 0, 'timestamp': datetime.datetime(2011, 9, 6, 0, 0,
tzinfo=local_tz), tzinfo=local_tz),
@ -105,6 +137,48 @@ class ReadingTest(unittest.TestCase):
cache.field_for(field, book_id)) cache.field_for(field, book_id))
# }}} # }}}
def test_sorting(self): # {{{
'Test sorting'
cache = init_cache(self.library_path)
for field, order in {
'title' : [2, 1, 3],
'authors': [2, 1, 3],
'series' : [3, 2, 1],
'tags' : [3, 1, 2],
'rating' : [3, 2, 1],
# 'identifiers': [3, 2, 1], There is no stable sort since 1 and
# 2 have the same identifier keys
# TODO: Add an empty book to the db and ensure that empty
# fields sort the same as they do in db2
'timestamp': [2, 1, 3],
'pubdate' : [1, 2, 3],
'publisher': [3, 2, 1],
'last_modified': [2, 1, 3],
'languages': [3, 2, 1],
'comments': [3, 2, 1],
'#enum' : [3, 2, 1],
'#authors' : [3, 2, 1],
'#date': [3, 1, 2],
'#rating':[3, 2, 1],
'#series':[3, 2, 1],
'#tags':[3, 2, 1],
'#yesno':[3, 1, 2],
'#comments':[3, 2, 1],
}.iteritems():
x = list(reversed(order))
self.assertEqual(order, cache.multisort([(field, True)],
ids_to_sort=x),
'Ascending sort of %s failed'%field)
self.assertEqual(x, cache.multisort([(field, False)],
ids_to_sort=order),
'Descending sort of %s failed'%field)
# Test subsorting
self.assertEqual([3, 2, 1], cache.multisort([('identifiers', True),
('title', True)]), 'Subsort failed')
# }}}
def tests(): def tests():
return unittest.TestLoader().loadTestsFromTestCase(ReadingTest) return unittest.TestLoader().loadTestsFromTestCase(ReadingTest)