Further sorting speedup and have GUI use new multisort when resorting.

This commit is contained in:
Kovid Goyal 2010-09-12 08:44:52 -06:00
commit 5208e1a713
5 changed files with 51 additions and 31 deletions

View File

@ -114,3 +114,11 @@ add_new_book_tags_when_importing_books = False
# Set the maximum number of tags to show per book in the content server # Set the maximum number of tags to show per book in the content server
max_content_server_tags_shown=5 max_content_server_tags_shown=5
# Set the maximum number of sort 'levels' that calibre will use to resort the
# library after certain operations such as searches or device insertion. Each
# sort level adds a performance penalty. If the database is large (thousands of
# books) the penalty might be noticeable. If you are not concerned about multi-
# level sorts, and if you are seeing a slowdown, reduce the value of this tweak.
maximum_resort_levels = 5

View File

@ -247,7 +247,7 @@ class BooksModel(QAbstractTableModel): # {{{
# the search and count records for restrictions # the search and count records for restrictions
self.searched.emit(True) self.searched.emit(True)
def sort(self, col, order, reset=True, update_history=True): def sort(self, col, order, reset=True):
if not self.db: if not self.db:
return return
self.about_to_be_sorted.emit(self.db.id) self.about_to_be_sorted.emit(self.db.id)
@ -258,21 +258,17 @@ class BooksModel(QAbstractTableModel): # {{{
self.clear_caches() self.clear_caches()
self.reset() self.reset()
self.sorted_on = (label, order) self.sorted_on = (label, order)
if update_history: self.sort_history.insert(0, self.sorted_on)
self.sort_history.insert(0, self.sorted_on)
self.sorting_done.emit(self.db.index) self.sorting_done.emit(self.db.index)
def refresh(self, reset=True): def refresh(self, reset=True):
self.db.refresh(field=None) self.db.refresh(field=None)
self.resort(reset=reset) self.resort(reset=reset)
def resort(self, reset=True, history=5): # Bug report needed history=4 :) def resort(self, reset=True):
for col,ord in reversed(self.sort_history[:history]): if not self.db:
try: return
col = self.column_map.index(col) self.db.multisort(self.sort_history[:tweaks['maximum_resort_levels']])
except ValueError:
col = 0
self.sort(col, ord, reset=False, update_history=False)
if reset: if reset:
self.reset() self.reset()
@ -1028,6 +1024,11 @@ class DeviceBooksModel(BooksModel): # {{{
if reset: if reset:
self.reset() self.reset()
def resort(self, reset=True):
if self.sorted_on:
self.sort(self.column_map.index(self.sorted_on[0]),
self.sorted_on[1], reset=reset)
def columnCount(self, parent): def columnCount(self, parent):
if parent and parent.isValid(): if parent and parent.isValid():
return 0 return 0

View File

@ -112,7 +112,7 @@ class ResultCache(SearchQueryParser):
''' '''
def __init__(self, FIELD_MAP, field_metadata): def __init__(self, FIELD_MAP, field_metadata):
self.FIELD_MAP = FIELD_MAP self.FIELD_MAP = FIELD_MAP
self._map = self._map_filtered = self._data = [] self._map = self._data = self._map_filtered = []
self.first_sort = True self.first_sort = True
self.search_restriction = '' self.search_restriction = ''
self.field_metadata = field_metadata self.field_metadata = field_metadata
@ -480,8 +480,11 @@ class ResultCache(SearchQueryParser):
q = u'%s (%s)' % (search_restriction, query) q = u'%s (%s)' % (search_restriction, query)
if not q: if not q:
return list(self._map) return list(self._map)
matches = sorted(self.parse(q)) matches = self.parse(q)
return [id for id in self._map if id in matches] tmap = list(itertools.repeat(False, len(self._data)))
for x in matches:
tmap[x] = True
return [x for x in self._map if tmap[x]]
def set_search_restriction(self, s): def set_search_restriction(self, s):
self.search_restriction = s self.search_restriction = s
@ -490,10 +493,14 @@ class ResultCache(SearchQueryParser):
def remove(self, id): def remove(self, id):
self._data[id] = None self._data[id] = None
if id in self._map: try:
self._map.remove(id) self._map.remove(id)
if id in self._map_filtered: except ValueError:
pass
try:
self._map_filtered.remove(id) self._map_filtered.remove(id)
except ValueError:
pass
def set(self, row, col, val, row_is_id=False): def set(self, row, col, val, row_is_id=False):
id = row if row_is_id else self._map_filtered[row] id = row if row_is_id else self._map_filtered[row]
@ -548,9 +555,7 @@ class ResultCache(SearchQueryParser):
def books_deleted(self, ids): def books_deleted(self, ids):
for id in ids: for id in ids:
self._data[id] = None self.remove(id)
if id in self._map: self._map.remove(id)
if id in self._map_filtered: self._map_filtered.remove(id)
def count(self): def count(self):
return len(self._map) return len(self._map)
@ -644,28 +649,32 @@ class ResultCache(SearchQueryParser):
self.FIELD_MAP['series_index'], self.FIELD_MAP['series_index'],
library_order=tweaks['title_series_sorting'] == 'library_order') library_order=tweaks['title_series_sorting'] == 'library_order')
else: else:
fcmp = functools.partial(self.cmp, self.FIELD_MAP[field], fcmp = functools.partial(self.cmp, self.field_metadata[field]['rec_index'],
subsort=subsort, asstr=as_string) subsort=subsort, asstr=as_string)
self._map.sort(cmp=fcmp, reverse=not ascending) self._map.sort(cmp=fcmp, reverse=not ascending)
self._map_filtered = [id for id in self._map if id in self._map_filtered] tmap = list(itertools.repeat(False, len(self._data)))
for x in self._map_filtered:
tmap[x] = True
self._map_filtered = [x for x in self._map if tmap[x]]
def multisort(self, fields=[], subsort=False): def multisort(self, fields=[], subsort=False):
fields = [(self.sanitize_field_name(x), bool(y)) for x, y in fields] fields = [(self.sanitize_field_name(x), bool(y)) for x, y in fields]
keys = self.field_metadata.field_keys()
fields = [x for x in fields if x[0] in keys]
if subsort and 'sort' not in [x[0] for x in fields]: if subsort and 'sort' not in [x[0] for x in fields]:
fields += [('sort', True)] fields += [('sort', True)]
if not fields: if not fields:
fields = [('timestamp', False)] fields = [('timestamp', False)]
keys = self.field_metadata.keys()
for f, order in fields:
if f not in keys:
raise ValueError(f + ' not an existing field name')
keyg = SortKeyGenerator(fields, self.field_metadata, self._data) keyg = SortKeyGenerator(fields, self.field_metadata, self._data)
if len(fields) == 1: if len(fields) == 1:
self._map.sort(key=keyg, reverse=not fields[0][1]) self._map.sort(key=keyg, reverse=not fields[0][1])
else: else:
self._map.sort(key=keyg) self._map.sort(key=keyg)
self._map_filtered = [id for id in self._map if id in self._map_filtered] tmap = list(itertools.repeat(False, len(self._data)))
for x in self._map_filtered:
tmap[x] = True
self._map_filtered = [x for x in self._map if tmap[x]]
class SortKey(object): class SortKey(object):
@ -677,16 +686,14 @@ class SortKey(object):
for i, ascending in enumerate(self.orders): for i, ascending in enumerate(self.orders):
ans = cmp(self.values[i], other.values[i]) ans = cmp(self.values[i], other.values[i])
if ans != 0: if ans != 0:
if not ascending: return ans * ascending
ans *= -1
return ans
return 0 return 0
class SortKeyGenerator(object): class SortKeyGenerator(object):
def __init__(self, fields, field_metadata, data): def __init__(self, fields, field_metadata, data):
self.field_metadata = field_metadata self.field_metadata = field_metadata
self.orders = [x[1] for x in fields] self.orders = [-1 if x[1] else 1 for x in fields]
self.entries = [(x[0], field_metadata[x[0]]) for x in fields] self.entries = [(x[0], field_metadata[x[0]]) for x in fields]
self.library_order = tweaks['title_series_sorting'] == 'library_order' self.library_order = tweaks['title_series_sorting'] == 'library_order'
self.data = data self.data = data
@ -735,7 +742,7 @@ if __name__ == '__main__':
db.refresh() db.refresh()
fields = db.field_metadata.keys() fields = db.field_metadata.field_keys()
print fields print fields
@ -765,7 +772,7 @@ if __name__ == '__main__':
print 'Running single sort differentials' print 'Running single sort differentials'
for field in fields: for field in fields:
if field in ('search', 'id', 'news', 'flags'): continue if field in ('search', 'id', 'news', 'flags'): continue
print '\t', field print '\t', field, db.field_metadata[field]['datatype']
old, new = test_single_sort(field) old, new = test_single_sort(field)
if old[1] != new[1] or old[2] != new[2]: if old[1] != new[1] or old[2] != new[2]:
print '\t\t', 'Sort failure!' print '\t\t', 'Sort failure!'

View File

@ -311,6 +311,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.search_getting_ids = self.data.search_getting_ids self.search_getting_ids = self.data.search_getting_ids
self.refresh = functools.partial(self.data.refresh, self) self.refresh = functools.partial(self.data.refresh, self)
self.sort = self.data.sort self.sort = self.data.sort
self.multisort = self.data.multisort
self.index = self.data.index self.index = self.data.index
self.refresh_ids = functools.partial(self.data.refresh_ids, self) self.refresh_ids = functools.partial(self.data.refresh_ids, self)
self.row = self.data.row self.row = self.data.row

View File

@ -335,6 +335,9 @@ class FieldMetadata(dict):
def keys(self): def keys(self):
return self._tb_cats.keys() return self._tb_cats.keys()
def field_keys(self):
return [k for k in self._tb_cats.keys() if self._tb_cats[k]['kind']=='field']
def iterkeys(self): def iterkeys(self):
for key in self._tb_cats: for key in self._tb_cats:
yield key yield key