mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Much faster sorting code
This commit is contained in:
parent
e531b51767
commit
7382552d18
@ -607,16 +607,22 @@ class ResultCache(SearchQueryParser):
|
|||||||
y = UNDEFINED_DATE
|
y = UNDEFINED_DATE
|
||||||
return cmp(x, y)
|
return cmp(x, y)
|
||||||
if subsort and ans == 0:
|
if subsort and ans == 0:
|
||||||
return cmp(self._data[x][11].lower(), self._data[y][11].lower())
|
idx = self.FIELD_MAP['sort']
|
||||||
|
return cmp(self._data[x][idx].lower(), self._data[y][idx].lower())
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def sort(self, field, ascending, subsort=False):
|
def sanitize_field_name(self, field):
|
||||||
field = field.lower().strip()
|
field = field.lower().strip()
|
||||||
if field in ('author', 'tag', 'comment'):
|
if field not in self.field_metadata.iterkeys():
|
||||||
field += 's'
|
if field in ('author', 'tag', 'comment'):
|
||||||
if field == 'date': field = 'timestamp'
|
field += 's'
|
||||||
elif field == 'title': field = 'sort'
|
if field == 'date': field = 'timestamp'
|
||||||
elif field == 'authors': field = 'author_sort'
|
elif field == 'title': field = 'sort'
|
||||||
|
elif field == 'authors': field = 'author_sort'
|
||||||
|
return field
|
||||||
|
|
||||||
|
def sort(self, field, ascending, subsort=False):
|
||||||
|
field = self.sanitize_field_name(field)
|
||||||
as_string = field not in ('size', 'rating', 'timestamp')
|
as_string = field not in ('size', 'rating', 'timestamp')
|
||||||
|
|
||||||
if self.first_sort:
|
if self.first_sort:
|
||||||
@ -643,6 +649,164 @@ class ResultCache(SearchQueryParser):
|
|||||||
self._map.sort(cmp=fcmp, reverse=not ascending)
|
self._map.sort(cmp=fcmp, reverse=not ascending)
|
||||||
self._map_filtered = [id for id in self._map if id in self._map_filtered]
|
self._map_filtered = [id for id in self._map if id in self._map_filtered]
|
||||||
|
|
||||||
|
def multisort(self, fields=[], subsort=False):
|
||||||
|
fields = [(self.sanitize_field_name(x), bool(y)) for x, y in fields]
|
||||||
|
if subsort and 'sort' not in [x[0] for x in fields]:
|
||||||
|
fields += [('sort', True)]
|
||||||
|
if not fields:
|
||||||
|
fields = [('timestamp', False)]
|
||||||
|
keys = self.field_metadata.keys()
|
||||||
|
for f, order in fields:
|
||||||
|
if f not in keys:
|
||||||
|
raise ValueError(f + ' not an existing field name')
|
||||||
|
|
||||||
|
keyg = SortKeyGenerator(fields, self.field_metadata, self._data)
|
||||||
|
if len(fields) == 1:
|
||||||
|
self._map.sort(key=keyg, reverse=not fields[0][1])
|
||||||
|
else:
|
||||||
|
self._map.sort(key=keyg)
|
||||||
|
self._map_filtered = [id for id in self._map if id in self._map_filtered]
|
||||||
|
|
||||||
|
|
||||||
|
class SortKey(object):
|
||||||
|
|
||||||
|
def __init__(self, orders, values):
|
||||||
|
self.orders, self.values = orders, values
|
||||||
|
|
||||||
|
def __cmp__(self, other):
|
||||||
|
for i, ascending in enumerate(self.orders):
|
||||||
|
ans = cmp(self.values[i], other.values[i])
|
||||||
|
if ans != 0:
|
||||||
|
if not ascending:
|
||||||
|
ans *= -1
|
||||||
|
return ans
|
||||||
|
return 0
|
||||||
|
|
||||||
|
class SortKeyGenerator(object):
|
||||||
|
|
||||||
|
def __init__(self, fields, field_metadata, data):
|
||||||
|
self.field_metadata = field_metadata
|
||||||
|
self.orders = [x[1] for x in fields]
|
||||||
|
self.entries = [(x[0], field_metadata[x[0]]) for x in fields]
|
||||||
|
self.library_order = tweaks['title_series_sorting'] == 'library_order'
|
||||||
|
self.data = data
|
||||||
|
|
||||||
|
def __call__(self, record):
|
||||||
|
values = tuple(self.itervals(self.data[record]))
|
||||||
|
if len(values) == 1:
|
||||||
|
return values[0]
|
||||||
|
return SortKey(self.orders, values)
|
||||||
|
|
||||||
|
def itervals(self, record):
|
||||||
|
for name, fm in self.entries:
|
||||||
|
dt = fm['datatype']
|
||||||
|
val = record[fm['rec_index']]
|
||||||
|
|
||||||
|
if dt == 'datetime':
|
||||||
|
if val is None:
|
||||||
|
val = UNDEFINED_DATE
|
||||||
|
|
||||||
|
elif dt == 'series':
|
||||||
|
if val is None:
|
||||||
|
val = ('', 1)
|
||||||
|
else:
|
||||||
|
val = val.lower()
|
||||||
|
if self.library_order:
|
||||||
|
val = title_sort(val)
|
||||||
|
sidx_fm = self.field_metadata[name + '_index']
|
||||||
|
sidx = record[sidx_fm['rec_index']]
|
||||||
|
val = (val, sidx)
|
||||||
|
|
||||||
|
elif dt in ('text', 'comments'):
|
||||||
|
if val is None:
|
||||||
|
val = ''
|
||||||
|
val = val.lower()
|
||||||
|
yield val
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Testing.timing for new multi-sort {{{
|
||||||
|
import time
|
||||||
|
|
||||||
|
from calibre.library import db
|
||||||
|
db = db()
|
||||||
|
|
||||||
|
db.refresh()
|
||||||
|
|
||||||
|
fields = db.field_metadata.keys()
|
||||||
|
|
||||||
|
print fields
|
||||||
|
|
||||||
|
|
||||||
|
def do_single_sort(meth, field, order):
|
||||||
|
if meth == 'old':
|
||||||
|
db.data.sort(field, order)
|
||||||
|
else:
|
||||||
|
db.data.multisort([(field, order)])
|
||||||
|
|
||||||
|
def test_single_sort(field):
|
||||||
|
for meth in ('old', 'new'):
|
||||||
|
ttime = 0
|
||||||
|
NUM = 10
|
||||||
|
asc = desc = None
|
||||||
|
for i in range(NUM):
|
||||||
|
db.data.sort('id', False)
|
||||||
|
st = time.time()
|
||||||
|
do_single_sort(meth, field, True)
|
||||||
|
asc = db.data._map
|
||||||
|
do_single_sort(meth, field, False)
|
||||||
|
desc = db.data._map
|
||||||
|
ttime += time.time() - st
|
||||||
|
yield (ttime/NUM, asc, desc)
|
||||||
|
|
||||||
|
|
||||||
|
print 'Running single sort differentials'
|
||||||
|
for field in fields:
|
||||||
|
if field in ('search', 'id', 'news', 'flags'): continue
|
||||||
|
print '\t', field
|
||||||
|
old, new = test_single_sort(field)
|
||||||
|
if old[1] != new[1] or old[2] != new[2]:
|
||||||
|
print '\t\t', 'Sort failure!'
|
||||||
|
raise SystemExit(1)
|
||||||
|
print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0])
|
||||||
|
|
||||||
|
def do_multi_sort(meth, ms):
|
||||||
|
if meth == 'new':
|
||||||
|
db.data.multisort(ms)
|
||||||
|
else:
|
||||||
|
for s in reversed(ms):
|
||||||
|
db.data.sort(*s)
|
||||||
|
|
||||||
|
def test_multi_sort(ms):
|
||||||
|
for meth in ('old', 'new'):
|
||||||
|
ttime = 0
|
||||||
|
NUM = 10
|
||||||
|
for i in range(NUM):
|
||||||
|
db.data.sort('id', False)
|
||||||
|
st = time.time()
|
||||||
|
do_multi_sort(meth, ms)
|
||||||
|
ttime += time.time() - st
|
||||||
|
yield (ttime/NUM, db.data._map)
|
||||||
|
|
||||||
|
print 'Running multi-sort differentials'
|
||||||
|
|
||||||
|
for ms in [
|
||||||
|
[('timestamp', False), ('author', True), ('title', False)],
|
||||||
|
[('size', True), ('tags', True), ('author', False)],
|
||||||
|
[('series', False), ('title', True)],
|
||||||
|
[('size', True), ('tags', True), ('author', False), ('pubdate',
|
||||||
|
True), ('tags', False), ('formats', False), ('uuid', True)],
|
||||||
|
|
||||||
|
]:
|
||||||
|
print '\t', ms
|
||||||
|
db.data.sort('id', False)
|
||||||
|
old, new = test_multi_sort(ms)
|
||||||
|
if old[1] != new[1]:
|
||||||
|
print '\t\t', 'Sort failure!'
|
||||||
|
raise SystemExit()
|
||||||
|
print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0])
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user