mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #8007 (Search performance on multiple words)
This commit is contained in:
parent
95a9ee5b76
commit
8bcdb0fed7
@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
self.search_restriction = ''
|
self.search_restriction = ''
|
||||||
self.field_metadata = field_metadata
|
self.field_metadata = field_metadata
|
||||||
self.all_search_locations = field_metadata.get_search_terms()
|
self.all_search_locations = field_metadata.get_search_terms()
|
||||||
SearchQueryParser.__init__(self, self.all_search_locations)
|
SearchQueryParser.__init__(self, self.all_search_locations, optimize=True)
|
||||||
self.build_date_relop_dict()
|
self.build_date_relop_dict()
|
||||||
self.build_numeric_relop_dict()
|
self.build_numeric_relop_dict()
|
||||||
|
|
||||||
@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
'<=':[2, relop_le]
|
'<=':[2, relop_le]
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_dates_matches(self, location, query):
|
def get_dates_matches(self, location, query, candidates):
|
||||||
matches = set([])
|
matches = set([])
|
||||||
if len(query) < 2:
|
if len(query) < 2:
|
||||||
return matches
|
return matches
|
||||||
@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
loc = self.field_metadata[location]['rec_index']
|
loc = self.field_metadata[location]['rec_index']
|
||||||
|
|
||||||
if query == 'false':
|
if query == 'false':
|
||||||
for item in self._data:
|
for id_ in candidates:
|
||||||
|
item = self._data[id_]
|
||||||
if item is None: continue
|
if item is None: continue
|
||||||
if item[loc] is None or item[loc] <= UNDEFINED_DATE:
|
if item[loc] is None or item[loc] <= UNDEFINED_DATE:
|
||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
return matches
|
return matches
|
||||||
if query == 'true':
|
if query == 'true':
|
||||||
for item in self._data:
|
for id_ in candidates:
|
||||||
|
item = self._data[id_]
|
||||||
if item is None: continue
|
if item is None: continue
|
||||||
if item[loc] is not None and item[loc] > UNDEFINED_DATE:
|
if item[loc] is not None and item[loc] > UNDEFINED_DATE:
|
||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
field_count = query.count('-') + 1
|
field_count = query.count('-') + 1
|
||||||
else:
|
else:
|
||||||
field_count = query.count('/') + 1
|
field_count = query.count('/') + 1
|
||||||
for item in self._data:
|
for id_ in candidates:
|
||||||
|
item = self._data[id_]
|
||||||
if item is None or item[loc] is None: continue
|
if item is None or item[loc] is None: continue
|
||||||
if relop(item[loc], qd, field_count):
|
if relop(item[loc], qd, field_count):
|
||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
'<=':[2, lambda r, q: r <= q]
|
'<=':[2, lambda r, q: r <= q]
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_numeric_matches(self, location, query, val_func = None):
|
def get_numeric_matches(self, location, query, candidates, val_func = None):
|
||||||
matches = set([])
|
matches = set([])
|
||||||
if len(query) == 0:
|
if len(query) == 0:
|
||||||
return matches
|
return matches
|
||||||
@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
except:
|
except:
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
for item in self._data:
|
for id_ in candidates:
|
||||||
|
item = self._data[id_]
|
||||||
if item is None:
|
if item is None:
|
||||||
continue
|
continue
|
||||||
v = val_func(item)
|
v = val_func(item)
|
||||||
@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
def get_matches(self, location, query, allow_recursion=True):
|
def get_matches(self, location, query, allow_recursion=True, candidates=None):
|
||||||
matches = set([])
|
matches = set([])
|
||||||
|
if candidates is None:
|
||||||
|
candidates = self.universal_set()
|
||||||
|
if len(candidates) == 0:
|
||||||
|
return matches
|
||||||
|
|
||||||
if query and query.strip():
|
if query and query.strip():
|
||||||
# get metadata key associated with the search term. Eliminates
|
# get metadata key associated with the search term. Eliminates
|
||||||
# dealing with plurals and other aliases
|
# dealing with plurals and other aliases
|
||||||
@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
else:
|
else:
|
||||||
q = query
|
q = query
|
||||||
|
|
||||||
for item in self._data:
|
for id_ in candidates:
|
||||||
|
item = self._data[id]
|
||||||
if item is None: continue
|
if item is None: continue
|
||||||
|
|
||||||
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
|
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
|
||||||
|
@ -118,8 +118,9 @@ class SearchQueryParser(object):
|
|||||||
failed.append(test[0])
|
failed.append(test[0])
|
||||||
return failed
|
return failed
|
||||||
|
|
||||||
def __init__(self, locations, test=False):
|
def __init__(self, locations, test=False, optimize=False):
|
||||||
self._tests_failed = False
|
self._tests_failed = False
|
||||||
|
self.optimize = optimize
|
||||||
# Define a token
|
# Define a token
|
||||||
standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
|
standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
|
||||||
locations)
|
locations)
|
||||||
@ -182,38 +183,52 @@ class SearchQueryParser(object):
|
|||||||
# empty the list of searches used for recursion testing
|
# empty the list of searches used for recursion testing
|
||||||
self.recurse_level = 0
|
self.recurse_level = 0
|
||||||
self.searches_seen = set([])
|
self.searches_seen = set([])
|
||||||
return self._parse(query)
|
candidates = self.universal_set()
|
||||||
|
return self._parse(query, candidates)
|
||||||
|
|
||||||
# this parse is used internally because it doesn't clear the
|
# this parse is used internally because it doesn't clear the
|
||||||
# recursive search test list. However, we permit seeing the
|
# recursive search test list. However, we permit seeing the
|
||||||
# same search a few times because the search might appear within
|
# same search a few times because the search might appear within
|
||||||
# another search.
|
# another search.
|
||||||
def _parse(self, query):
|
def _parse(self, query, candidates=None):
|
||||||
self.recurse_level += 1
|
self.recurse_level += 1
|
||||||
res = self._parser.parseString(query)[0]
|
res = self._parser.parseString(query)[0]
|
||||||
t = self.evaluate(res)
|
if candidates is None:
|
||||||
|
candidates = self.universal_set()
|
||||||
|
t = self.evaluate(res, candidates)
|
||||||
self.recurse_level -= 1
|
self.recurse_level -= 1
|
||||||
return t
|
return t
|
||||||
|
|
||||||
def method(self, group_name):
|
def method(self, group_name):
|
||||||
return getattr(self, 'evaluate_'+group_name)
|
return getattr(self, 'evaluate_'+group_name)
|
||||||
|
|
||||||
def evaluate(self, parse_result):
|
def evaluate(self, parse_result, candidates):
|
||||||
return self.method(parse_result.getName())(parse_result)
|
return self.method(parse_result.getName())(parse_result, candidates)
|
||||||
|
|
||||||
def evaluate_and(self, argument):
|
def evaluate_and(self, argument, candidates):
|
||||||
return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
|
# RHS checks only those items matched by LHS
|
||||||
|
# returns result of RHS check: RHmatches(LHmatches(c))
|
||||||
|
# return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
|
||||||
|
l = self.evaluate(argument[0], candidates)
|
||||||
|
return l.intersection(self.evaluate(argument[1], l))
|
||||||
|
|
||||||
def evaluate_or(self, argument):
|
def evaluate_or(self, argument, candidates):
|
||||||
return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
|
# RHS checks only those elements not matched by LHS
|
||||||
|
# returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c))
|
||||||
|
# return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
|
||||||
|
l = self.evaluate(argument[0], candidates)
|
||||||
|
return l.union(self.evaluate(argument[1], candidates.difference(l)))
|
||||||
|
|
||||||
def evaluate_not(self, argument):
|
def evaluate_not(self, argument, candidates):
|
||||||
return self.universal_set().difference(self.evaluate(argument[0]))
|
# unary op checks only candidates. Result: list of items matching
|
||||||
|
# returns: c - matches(c)
|
||||||
|
# return self.universal_set().difference(self.evaluate(argument[0]))
|
||||||
|
return candidates.difference(self.evaluate(argument[0], candidates))
|
||||||
|
|
||||||
def evaluate_parenthesis(self, argument):
|
def evaluate_parenthesis(self, argument, candidates):
|
||||||
return self.evaluate(argument[0])
|
return self.evaluate(argument[0], candidates)
|
||||||
|
|
||||||
def evaluate_token(self, argument):
|
def evaluate_token(self, argument, candidates):
|
||||||
location = argument[0]
|
location = argument[0]
|
||||||
query = argument[1]
|
query = argument[1]
|
||||||
if location.lower() == 'search':
|
if location.lower() == 'search':
|
||||||
@ -224,17 +239,27 @@ class SearchQueryParser(object):
|
|||||||
raise ParseException(query, len(query), 'undefined saved search', self)
|
raise ParseException(query, len(query), 'undefined saved search', self)
|
||||||
if self.recurse_level > 5:
|
if self.recurse_level > 5:
|
||||||
self.searches_seen.add(query)
|
self.searches_seen.add(query)
|
||||||
return self._parse(saved_searches().lookup(query))
|
return self._parse(saved_searches().lookup(query), candidates)
|
||||||
except: # convert all exceptions (e.g., missing key) to a parse error
|
except: # convert all exceptions (e.g., missing key) to a parse error
|
||||||
raise ParseException(query, len(query), 'undefined saved search', self)
|
raise ParseException(query, len(query), 'undefined saved search', self)
|
||||||
return self.get_matches(location, query)
|
return self._get_matches(location, query, candidates)
|
||||||
|
|
||||||
def get_matches(self, location, query):
|
def _get_matches(self, location, query, candidates):
|
||||||
|
if self.optimize:
|
||||||
|
return self.get_matches(location, query, candidates=candidates)
|
||||||
|
else:
|
||||||
|
return self.get_matches(location, query)
|
||||||
|
|
||||||
|
def get_matches(self, location, query, candidates=None):
|
||||||
'''
|
'''
|
||||||
Should return the set of matches for :param:'location` and :param:`query`.
|
Should return the set of matches for :param:'location` and :param:`query`.
|
||||||
|
|
||||||
|
The search must be performed over all entries is :param:`candidates` is
|
||||||
|
None otherwise only over the items in candidates.
|
||||||
|
|
||||||
:param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
|
:param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
|
||||||
:param:`query` is a string literal.
|
:param:`query` is a string literal.
|
||||||
|
:param: None or a subset of the set returned by :meth:`universal_set`.
|
||||||
'''
|
'''
|
||||||
return set([])
|
return set([])
|
||||||
|
|
||||||
@ -561,7 +586,7 @@ class Tester(SearchQueryParser):
|
|||||||
def universal_set(self):
|
def universal_set(self):
|
||||||
return self._universal_set
|
return self._universal_set
|
||||||
|
|
||||||
def get_matches(self, location, query):
|
def get_matches(self, location, query, candidates=None):
|
||||||
location = location.lower()
|
location = location.lower()
|
||||||
if location in self.fields.keys():
|
if location in self.fields.keys():
|
||||||
getter = operator.itemgetter(self.fields[location])
|
getter = operator.itemgetter(self.fields[location])
|
||||||
@ -573,8 +598,13 @@ class Tester(SearchQueryParser):
|
|||||||
if not query:
|
if not query:
|
||||||
return set([])
|
return set([])
|
||||||
query = query.lower()
|
query = query.lower()
|
||||||
return set(key for key, val in self.texts.items() \
|
if candidates:
|
||||||
if query and query in getattr(getter(val), 'lower', lambda : '')())
|
return set(key for key, val in self.texts.items() \
|
||||||
|
if key in candidates and query and query
|
||||||
|
in getattr(getter(val), 'lower', lambda : '')())
|
||||||
|
else:
|
||||||
|
return set(key for key, val in self.texts.items() \
|
||||||
|
if query and query in getattr(getter(val), 'lower', lambda : '')())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -592,6 +622,7 @@ class Tester(SearchQueryParser):
|
|||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
|
print 'testing unoptimized'
|
||||||
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
|
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
|
||||||
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
|
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
|
||||||
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
|
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
|
||||||
@ -601,6 +632,16 @@ def main(args=sys.argv):
|
|||||||
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
|
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
print '\n\ntesting optimized'
|
||||||
|
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
|
||||||
|
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
|
||||||
|
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
|
||||||
|
'all', 'search'], test=True, optimize=True)
|
||||||
|
failed = tester.run_tests()
|
||||||
|
if tester._tests_failed or failed:
|
||||||
|
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
|
||||||
|
return 1
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user