Use the new sorting code in the content server as well.

This commit is contained in:
Kovid Goyal 2010-09-12 09:32:16 -06:00
parent 5208e1a713
commit 78874a9117
2 changed files with 18 additions and 173 deletions

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, itertools, functools import re, itertools
from itertools import repeat from itertools import repeat
from datetime import timedelta from datetime import timedelta
from threading import Thread, RLock from threading import Thread, RLock
@ -584,39 +584,7 @@ class ResultCache(SearchQueryParser):
# Sorting functions {{{ # Sorting functions {{{
def seriescmp(self, sidx, siidx, x, y, library_order=None): def sanitize_sort_field_name(self, field):
try:
if library_order:
ans = cmp(title_sort(self._data[x][sidx].lower()),
title_sort(self._data[y][sidx].lower()))
else:
ans = cmp(self._data[x][sidx].lower(),
self._data[y][sidx].lower())
except AttributeError: # Some entries may be None
ans = cmp(self._data[x][sidx], self._data[y][sidx])
if ans != 0: return ans
return cmp(self._data[x][siidx], self._data[y][siidx])
def cmp(self, loc, x, y, asstr=True, subsort=False):
try:
ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if \
asstr else cmp(self._data[x][loc], self._data[y][loc])
except AttributeError: # Some entries may be None
ans = cmp(self._data[x][loc], self._data[y][loc])
except TypeError: ## raised when a datetime is None
x = self._data[x][loc]
if x is None:
x = UNDEFINED_DATE
y = self._data[y][loc]
if y is None:
y = UNDEFINED_DATE
return cmp(x, y)
if subsort and ans == 0:
idx = self.FIELD_MAP['sort']
return cmp(self._data[x][idx].lower(), self._data[y][idx].lower())
return ans
def sanitize_field_name(self, field):
field = field.lower().strip() field = field.lower().strip()
if field not in self.field_metadata.iterkeys(): if field not in self.field_metadata.iterkeys():
if field in ('author', 'tag', 'comment'): if field in ('author', 'tag', 'comment'):
@ -627,38 +595,10 @@ class ResultCache(SearchQueryParser):
return field return field
def sort(self, field, ascending, subsort=False): def sort(self, field, ascending, subsort=False):
field = self.sanitize_field_name(field) self.multisort([(field, ascending)])
as_string = field not in ('size', 'rating', 'timestamp')
if self.first_sort:
subsort = True
self.first_sort = False
if self.field_metadata[field]['is_custom']:
if self.field_metadata[field]['datatype'] == 'series':
fcmp = functools.partial(self.seriescmp,
self.field_metadata[field]['rec_index'],
self.field_metadata.cc_series_index_column_for(field),
library_order=tweaks['title_series_sorting'] == 'library_order')
else:
as_string = self.field_metadata[field]['datatype'] in ('comments', 'text')
field = self.field_metadata[field]['colnum']
fcmp = functools.partial(self.cmp, self.FIELD_MAP[field],
subsort=subsort, asstr=as_string)
elif field == 'series':
fcmp = functools.partial(self.seriescmp, self.FIELD_MAP['series'],
self.FIELD_MAP['series_index'],
library_order=tweaks['title_series_sorting'] == 'library_order')
else:
fcmp = functools.partial(self.cmp, self.field_metadata[field]['rec_index'],
subsort=subsort, asstr=as_string)
self._map.sort(cmp=fcmp, reverse=not ascending)
tmap = list(itertools.repeat(False, len(self._data)))
for x in self._map_filtered:
tmap[x] = True
self._map_filtered = [x for x in self._map if tmap[x]]
def multisort(self, fields=[], subsort=False): def multisort(self, fields=[], subsort=False):
fields = [(self.sanitize_field_name(x), bool(y)) for x, y in fields] fields = [(self.sanitize_sort_field_name(x), bool(y)) for x, y in fields]
keys = self.field_metadata.field_keys() keys = self.field_metadata.field_keys()
fields = [x for x in fields if x[0] in keys] fields = [x for x in fields if x[0] in keys]
if subsort and 'sort' not in [x[0] for x in fields]: if subsort and 'sort' not in [x[0] for x in fields]:
@ -671,6 +611,7 @@ class ResultCache(SearchQueryParser):
self._map.sort(key=keyg, reverse=not fields[0][1]) self._map.sort(key=keyg, reverse=not fields[0][1])
else: else:
self._map.sort(key=keyg) self._map.sort(key=keyg)
tmap = list(itertools.repeat(False, len(self._data))) tmap = list(itertools.repeat(False, len(self._data)))
for x in self._map_filtered: for x in self._map_filtered:
tmap[x] = True tmap[x] = True
@ -733,87 +674,3 @@ class SortKeyGenerator(object):
# }}} # }}}
if __name__ == '__main__':
# Testing.timing for new multi-sort {{{
import time
from calibre.library import db
db = db()
db.refresh()
fields = db.field_metadata.field_keys()
print fields
def do_single_sort(meth, field, order):
if meth == 'old':
db.data.sort(field, order)
else:
db.data.multisort([(field, order)])
def test_single_sort(field):
for meth in ('old', 'new'):
ttime = 0
NUM = 10
asc = desc = None
for i in range(NUM):
db.data.sort('id', False)
st = time.time()
do_single_sort(meth, field, True)
asc = db.data._map
do_single_sort(meth, field, False)
desc = db.data._map
ttime += time.time() - st
yield (ttime/NUM, asc, desc)
print 'Running single sort differentials'
for field in fields:
if field in ('search', 'id', 'news', 'flags'): continue
print '\t', field, db.field_metadata[field]['datatype']
old, new = test_single_sort(field)
if old[1] != new[1] or old[2] != new[2]:
print '\t\t', 'Sort failure!'
raise SystemExit(1)
print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0])
def do_multi_sort(meth, ms):
if meth == 'new':
db.data.multisort(ms)
else:
for s in reversed(ms):
db.data.sort(*s)
def test_multi_sort(ms):
for meth in ('old', 'new'):
ttime = 0
NUM = 10
for i in range(NUM):
db.data.sort('id', False)
st = time.time()
do_multi_sort(meth, ms)
ttime += time.time() - st
yield (ttime/NUM, db.data._map)
print 'Running multi-sort differentials'
for ms in [
[('timestamp', False), ('author', True), ('title', False)],
[('size', True), ('tags', True), ('author', False)],
[('series', False), ('title', True)],
[('size', True), ('tags', True), ('author', False), ('pubdate',
True), ('tags', False), ('formats', False), ('uuid', True)],
]:
print '\t', ms
db.data.sort('id', False)
old, new = test_multi_sort(ms)
if old[1] != new[1]:
print '\t\t', 'Sort failure!'
raise SystemExit()
print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0])
# }}}

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, os, cStringIO, operator import re, os, cStringIO
import cherrypy import cherrypy
try: try:
@ -16,7 +16,15 @@ except ImportError:
from calibre import fit_image, guess_type from calibre import fit_image, guess_type
from calibre.utils.date import fromtimestamp from calibre.utils.date import fromtimestamp
from calibre.ebooks.metadata import title_sort from calibre.library.caches import SortKeyGenerator
class CSSortKeyGenerator(SortKeyGenerator):
def __init__(self, fields, fm):
SortKeyGenerator.__init__(self, fields, fm, None)
def __call__(self, record):
return self.itervals(record).next()
class ContentServer(object): class ContentServer(object):
@ -47,32 +55,12 @@ class ContentServer(object):
def sort(self, items, field, order): def sort(self, items, field, order):
field = field.lower().strip() field = self.db.data.sanitize_sort_field_name(field)
if field == 'author':
field = 'authors'
if field == 'date':
field = 'timestamp'
if field not in ('title', 'authors', 'rating', 'timestamp', 'tags', 'size', 'series'): if field not in ('title', 'authors', 'rating', 'timestamp', 'tags', 'size', 'series'):
raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field) raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field)
cmpf = cmp if field in ('rating', 'size', 'timestamp') else \ keyg = CSSortKeyGenerator([(field, order)], self.db.field_metadata)
lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '') items.sort(key=keyg, reverse=not order)
if field == 'series':
items.sort(cmp=self.seriescmp, reverse=not order)
else:
lookup = 'sort' if field == 'title' else field
lookup = 'author_sort' if field == 'authors' else field
field = self.db.FIELD_MAP[lookup]
getter = operator.itemgetter(field)
items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order)
def seriescmp(self, x, y):
si = self.db.FIELD_MAP['series']
try:
ans = cmp(title_sort(x[si].lower()), title_sort(y[si].lower()))
except AttributeError: # Some entries may be None
ans = cmp(x[si], y[si])
if ans != 0: return ans
return cmp(x[self.db.FIELD_MAP['series_index']], y[self.db.FIELD_MAP['series_index']])
# }}} # }}}