diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 980c9f1fa9..7caeeabda8 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{ self.search_restriction = '' self.field_metadata = field_metadata self.all_search_locations = field_metadata.get_search_terms() - SearchQueryParser.__init__(self, self.all_search_locations) + SearchQueryParser.__init__(self, self.all_search_locations, optimize=True) self.build_date_relop_dict() self.build_numeric_relop_dict() @@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{ '<=':[2, relop_le] } - def get_dates_matches(self, location, query): + def get_dates_matches(self, location, query, candidates): matches = set([]) if len(query) < 2: return matches @@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{ loc = self.field_metadata[location]['rec_index'] if query == 'false': - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None: continue if item[loc] is None or item[loc] <= UNDEFINED_DATE: matches.add(item[0]) return matches if query == 'true': - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None: continue if item[loc] is not None and item[loc] > UNDEFINED_DATE: matches.add(item[0]) @@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{ field_count = query.count('-') + 1 else: field_count = query.count('/') + 1 - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None or item[loc] is None: continue if relop(item[loc], qd, field_count): matches.add(item[0]) @@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{ '<=':[2, lambda r, q: r <= q] } - def get_numeric_matches(self, location, query, val_func = None): + def get_numeric_matches(self, location, query, candidates, val_func = None): matches = set([]) if len(query) == 0: return matches @@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{ except: return matches - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None: continue v = val_func(item) @@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{ matches.add(item[0]) return matches - def get_matches(self, location, query, allow_recursion=True): + def get_matches(self, location, query, allow_recursion=True, candidates=None): matches = set([]) + if candidates is None: + candidates = self.universal_set() + if len(candidates) == 0: + return matches + if query and query.strip(): # get metadata key associated with the search term. Eliminates # dealing with plurals and other aliases @@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{ else: q = query - for item in self._data: + for id_ in candidates: + item = self._data[id] if item is None: continue if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak diff --git a/src/calibre/utils/search_query_parser.py b/src/calibre/utils/search_query_parser.py index db7c7bde5f..447ff8cd14 100644 --- a/src/calibre/utils/search_query_parser.py +++ b/src/calibre/utils/search_query_parser.py @@ -118,8 +118,9 @@ class SearchQueryParser(object): failed.append(test[0]) return failed - def __init__(self, locations, test=False): + def __init__(self, locations, test=False, optimize=False): self._tests_failed = False + self.optimize = optimize # Define a token standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'), locations) @@ -182,38 +183,52 @@ class SearchQueryParser(object): # empty the list of searches used for recursion testing self.recurse_level = 0 self.searches_seen = set([]) - return self._parse(query) + candidates = self.universal_set() + return self._parse(query, candidates) # this parse is used internally because it doesn't clear the # recursive search test list. However, we permit seeing the # same search a few times because the search might appear within # another search. - def _parse(self, query): + def _parse(self, query, candidates=None): self.recurse_level += 1 res = self._parser.parseString(query)[0] - t = self.evaluate(res) + if candidates is None: + candidates = self.universal_set() + t = self.evaluate(res, candidates) self.recurse_level -= 1 return t def method(self, group_name): return getattr(self, 'evaluate_'+group_name) - def evaluate(self, parse_result): - return self.method(parse_result.getName())(parse_result) + def evaluate(self, parse_result, candidates): + return self.method(parse_result.getName())(parse_result, candidates) - def evaluate_and(self, argument): - return self.evaluate(argument[0]).intersection(self.evaluate(argument[1])) + def evaluate_and(self, argument, candidates): + # RHS checks only those items matched by LHS + # returns result of RHS check: RHmatches(LHmatches(c)) + # return self.evaluate(argument[0]).intersection(self.evaluate(argument[1])) + l = self.evaluate(argument[0], candidates) + return l.intersection(self.evaluate(argument[1], l)) - def evaluate_or(self, argument): - return self.evaluate(argument[0]).union(self.evaluate(argument[1])) + def evaluate_or(self, argument, candidates): + # RHS checks only those elements not matched by LHS + # returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c)) + # return self.evaluate(argument[0]).union(self.evaluate(argument[1])) + l = self.evaluate(argument[0], candidates) + return l.union(self.evaluate(argument[1], candidates.difference(l))) - def evaluate_not(self, argument): - return self.universal_set().difference(self.evaluate(argument[0])) + def evaluate_not(self, argument, candidates): + # unary op checks only candidates. Result: list of items matching + # returns: c - matches(c) + # return self.universal_set().difference(self.evaluate(argument[0])) + return candidates.difference(self.evaluate(argument[0], candidates)) - def evaluate_parenthesis(self, argument): - return self.evaluate(argument[0]) + def evaluate_parenthesis(self, argument, candidates): + return self.evaluate(argument[0], candidates) - def evaluate_token(self, argument): + def evaluate_token(self, argument, candidates): location = argument[0] query = argument[1] if location.lower() == 'search': @@ -224,17 +239,27 @@ class SearchQueryParser(object): raise ParseException(query, len(query), 'undefined saved search', self) if self.recurse_level > 5: self.searches_seen.add(query) - return self._parse(saved_searches().lookup(query)) + return self._parse(saved_searches().lookup(query), candidates) except: # convert all exceptions (e.g., missing key) to a parse error raise ParseException(query, len(query), 'undefined saved search', self) - return self.get_matches(location, query) + return self._get_matches(location, query, candidates) - def get_matches(self, location, query): + def _get_matches(self, location, query, candidates): + if self.optimize: + return self.get_matches(location, query, candidates=candidates) + else: + return self.get_matches(location, query) + + def get_matches(self, location, query, candidates=None): ''' Should return the set of matches for :param:'location` and :param:`query`. + The search must be performed over all entries is :param:`candidates` is + None otherwise only over the items in candidates. + :param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`. :param:`query` is a string literal. + :param: None or a subset of the set returned by :meth:`universal_set`. ''' return set([]) @@ -561,7 +586,7 @@ class Tester(SearchQueryParser): def universal_set(self): return self._universal_set - def get_matches(self, location, query): + def get_matches(self, location, query, candidates=None): location = location.lower() if location in self.fields.keys(): getter = operator.itemgetter(self.fields[location]) @@ -573,8 +598,13 @@ class Tester(SearchQueryParser): if not query: return set([]) query = query.lower() - return set(key for key, val in self.texts.items() \ - if query and query in getattr(getter(val), 'lower', lambda : '')()) + if candidates: + return set(key for key, val in self.texts.items() \ + if key in candidates and query and query + in getattr(getter(val), 'lower', lambda : '')()) + else: + return set(key for key, val in self.texts.items() \ + if query and query in getattr(getter(val), 'lower', lambda : '')()) @@ -592,6 +622,7 @@ class Tester(SearchQueryParser): def main(args=sys.argv): + print 'testing unoptimized' tester = Tester(['authors', 'author', 'series', 'formats', 'format', 'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover', 'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read', @@ -601,6 +632,16 @@ def main(args=sys.argv): print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<' return 1 + print '\n\ntesting optimized' + tester = Tester(['authors', 'author', 'series', 'formats', 'format', + 'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover', + 'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read', + 'all', 'search'], test=True, optimize=True) + failed = tester.run_tests() + if tester._tests_failed or failed: + print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<' + return 1 + return 0 if __name__ == '__main__':