mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #8007 (Search performance on multiple words)
This commit is contained in:
parent
95a9ee5b76
commit
8bcdb0fed7
@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
self.search_restriction = ''
|
||||
self.field_metadata = field_metadata
|
||||
self.all_search_locations = field_metadata.get_search_terms()
|
||||
SearchQueryParser.__init__(self, self.all_search_locations)
|
||||
SearchQueryParser.__init__(self, self.all_search_locations, optimize=True)
|
||||
self.build_date_relop_dict()
|
||||
self.build_numeric_relop_dict()
|
||||
|
||||
@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
'<=':[2, relop_le]
|
||||
}
|
||||
|
||||
def get_dates_matches(self, location, query):
|
||||
def get_dates_matches(self, location, query, candidates):
|
||||
matches = set([])
|
||||
if len(query) < 2:
|
||||
return matches
|
||||
@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
loc = self.field_metadata[location]['rec_index']
|
||||
|
||||
if query == 'false':
|
||||
for item in self._data:
|
||||
for id_ in candidates:
|
||||
item = self._data[id_]
|
||||
if item is None: continue
|
||||
if item[loc] is None or item[loc] <= UNDEFINED_DATE:
|
||||
matches.add(item[0])
|
||||
return matches
|
||||
if query == 'true':
|
||||
for item in self._data:
|
||||
for id_ in candidates:
|
||||
item = self._data[id_]
|
||||
if item is None: continue
|
||||
if item[loc] is not None and item[loc] > UNDEFINED_DATE:
|
||||
matches.add(item[0])
|
||||
@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
field_count = query.count('-') + 1
|
||||
else:
|
||||
field_count = query.count('/') + 1
|
||||
for item in self._data:
|
||||
for id_ in candidates:
|
||||
item = self._data[id_]
|
||||
if item is None or item[loc] is None: continue
|
||||
if relop(item[loc], qd, field_count):
|
||||
matches.add(item[0])
|
||||
@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
'<=':[2, lambda r, q: r <= q]
|
||||
}
|
||||
|
||||
def get_numeric_matches(self, location, query, val_func = None):
|
||||
def get_numeric_matches(self, location, query, candidates, val_func = None):
|
||||
matches = set([])
|
||||
if len(query) == 0:
|
||||
return matches
|
||||
@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
except:
|
||||
return matches
|
||||
|
||||
for item in self._data:
|
||||
for id_ in candidates:
|
||||
item = self._data[id_]
|
||||
if item is None:
|
||||
continue
|
||||
v = val_func(item)
|
||||
@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
matches.add(item[0])
|
||||
return matches
|
||||
|
||||
def get_matches(self, location, query, allow_recursion=True):
|
||||
def get_matches(self, location, query, allow_recursion=True, candidates=None):
|
||||
matches = set([])
|
||||
if candidates is None:
|
||||
candidates = self.universal_set()
|
||||
if len(candidates) == 0:
|
||||
return matches
|
||||
|
||||
if query and query.strip():
|
||||
# get metadata key associated with the search term. Eliminates
|
||||
# dealing with plurals and other aliases
|
||||
@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
else:
|
||||
q = query
|
||||
|
||||
for item in self._data:
|
||||
for id_ in candidates:
|
||||
item = self._data[id]
|
||||
if item is None: continue
|
||||
|
||||
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
|
||||
|
@ -118,8 +118,9 @@ class SearchQueryParser(object):
|
||||
failed.append(test[0])
|
||||
return failed
|
||||
|
||||
def __init__(self, locations, test=False):
|
||||
def __init__(self, locations, test=False, optimize=False):
|
||||
self._tests_failed = False
|
||||
self.optimize = optimize
|
||||
# Define a token
|
||||
standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
|
||||
locations)
|
||||
@ -182,38 +183,52 @@ class SearchQueryParser(object):
|
||||
# empty the list of searches used for recursion testing
|
||||
self.recurse_level = 0
|
||||
self.searches_seen = set([])
|
||||
return self._parse(query)
|
||||
candidates = self.universal_set()
|
||||
return self._parse(query, candidates)
|
||||
|
||||
# this parse is used internally because it doesn't clear the
|
||||
# recursive search test list. However, we permit seeing the
|
||||
# same search a few times because the search might appear within
|
||||
# another search.
|
||||
def _parse(self, query):
|
||||
def _parse(self, query, candidates=None):
|
||||
self.recurse_level += 1
|
||||
res = self._parser.parseString(query)[0]
|
||||
t = self.evaluate(res)
|
||||
if candidates is None:
|
||||
candidates = self.universal_set()
|
||||
t = self.evaluate(res, candidates)
|
||||
self.recurse_level -= 1
|
||||
return t
|
||||
|
||||
def method(self, group_name):
|
||||
return getattr(self, 'evaluate_'+group_name)
|
||||
|
||||
def evaluate(self, parse_result):
|
||||
return self.method(parse_result.getName())(parse_result)
|
||||
def evaluate(self, parse_result, candidates):
|
||||
return self.method(parse_result.getName())(parse_result, candidates)
|
||||
|
||||
def evaluate_and(self, argument):
|
||||
return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
|
||||
def evaluate_and(self, argument, candidates):
|
||||
# RHS checks only those items matched by LHS
|
||||
# returns result of RHS check: RHmatches(LHmatches(c))
|
||||
# return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
|
||||
l = self.evaluate(argument[0], candidates)
|
||||
return l.intersection(self.evaluate(argument[1], l))
|
||||
|
||||
def evaluate_or(self, argument):
|
||||
return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
|
||||
def evaluate_or(self, argument, candidates):
|
||||
# RHS checks only those elements not matched by LHS
|
||||
# returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c))
|
||||
# return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
|
||||
l = self.evaluate(argument[0], candidates)
|
||||
return l.union(self.evaluate(argument[1], candidates.difference(l)))
|
||||
|
||||
def evaluate_not(self, argument):
|
||||
return self.universal_set().difference(self.evaluate(argument[0]))
|
||||
def evaluate_not(self, argument, candidates):
|
||||
# unary op checks only candidates. Result: list of items matching
|
||||
# returns: c - matches(c)
|
||||
# return self.universal_set().difference(self.evaluate(argument[0]))
|
||||
return candidates.difference(self.evaluate(argument[0], candidates))
|
||||
|
||||
def evaluate_parenthesis(self, argument):
|
||||
return self.evaluate(argument[0])
|
||||
def evaluate_parenthesis(self, argument, candidates):
|
||||
return self.evaluate(argument[0], candidates)
|
||||
|
||||
def evaluate_token(self, argument):
|
||||
def evaluate_token(self, argument, candidates):
|
||||
location = argument[0]
|
||||
query = argument[1]
|
||||
if location.lower() == 'search':
|
||||
@ -224,17 +239,27 @@ class SearchQueryParser(object):
|
||||
raise ParseException(query, len(query), 'undefined saved search', self)
|
||||
if self.recurse_level > 5:
|
||||
self.searches_seen.add(query)
|
||||
return self._parse(saved_searches().lookup(query))
|
||||
return self._parse(saved_searches().lookup(query), candidates)
|
||||
except: # convert all exceptions (e.g., missing key) to a parse error
|
||||
raise ParseException(query, len(query), 'undefined saved search', self)
|
||||
return self._get_matches(location, query, candidates)
|
||||
|
||||
def _get_matches(self, location, query, candidates):
|
||||
if self.optimize:
|
||||
return self.get_matches(location, query, candidates=candidates)
|
||||
else:
|
||||
return self.get_matches(location, query)
|
||||
|
||||
def get_matches(self, location, query):
|
||||
def get_matches(self, location, query, candidates=None):
|
||||
'''
|
||||
Should return the set of matches for :param:'location` and :param:`query`.
|
||||
|
||||
The search must be performed over all entries is :param:`candidates` is
|
||||
None otherwise only over the items in candidates.
|
||||
|
||||
:param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
|
||||
:param:`query` is a string literal.
|
||||
:param: None or a subset of the set returned by :meth:`universal_set`.
|
||||
'''
|
||||
return set([])
|
||||
|
||||
@ -561,7 +586,7 @@ class Tester(SearchQueryParser):
|
||||
def universal_set(self):
|
||||
return self._universal_set
|
||||
|
||||
def get_matches(self, location, query):
|
||||
def get_matches(self, location, query, candidates=None):
|
||||
location = location.lower()
|
||||
if location in self.fields.keys():
|
||||
getter = operator.itemgetter(self.fields[location])
|
||||
@ -573,6 +598,11 @@ class Tester(SearchQueryParser):
|
||||
if not query:
|
||||
return set([])
|
||||
query = query.lower()
|
||||
if candidates:
|
||||
return set(key for key, val in self.texts.items() \
|
||||
if key in candidates and query and query
|
||||
in getattr(getter(val), 'lower', lambda : '')())
|
||||
else:
|
||||
return set(key for key, val in self.texts.items() \
|
||||
if query and query in getattr(getter(val), 'lower', lambda : '')())
|
||||
|
||||
@ -592,6 +622,7 @@ class Tester(SearchQueryParser):
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
print 'testing unoptimized'
|
||||
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
|
||||
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
|
||||
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
|
||||
@ -601,6 +632,16 @@ def main(args=sys.argv):
|
||||
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
|
||||
return 1
|
||||
|
||||
print '\n\ntesting optimized'
|
||||
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
|
||||
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
|
||||
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
|
||||
'all', 'search'], test=True, optimize=True)
|
||||
failed = tester.run_tests()
|
||||
if tester._tests_failed or failed:
|
||||
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
x
Reference in New Issue
Block a user