diff --git a/src/calibre/utils/search_query_parser.py b/src/calibre/utils/search_query_parser.py index ecf3e11ce8..29573cb9c9 100644 --- a/src/calibre/utils/search_query_parser.py +++ b/src/calibre/utils/search_query_parser.py @@ -97,19 +97,28 @@ def saved_searches(): ''' Parse a search expression into a series of potentially recursive operations. -The syntax is a bit twisted. +Note that the interpreter wants binary operators, not n-ary ops. This is why we +recurse instead of iterating when building sequences of the same op. + +The syntax is more than a bit twisted. In particular, the handling of colons +in the base token requires semantic analysis. + +Also note that the query string is lowercased before analysis. This is OK because +calibre's searches are all case-insensitive. + +Grammar: prog ::= or_expression or_expression ::= and_expression [ 'or' or_expression ] -and_expression ::= not_expression [ ( [ 'and' ] and_expression ) | ( '(' or_expression ')' ) ] +and_expression ::= not_expression [ [ 'and' ] and_expression ] -not_expression ::= [ 'not' ] base_token +not_expression ::= [ 'not' ] location_expression -base_token ::= location_expression | ( '(' or_expression ')' ) +location_expression ::= base_token | ( '(' or_expression ')' ) -location_expression ::= [ word [ ':' word ]* +base_token ::= a sequence of letters and colons, perhaps quoted ''' class Parser(object): @@ -119,20 +128,24 @@ class Parser(object): OPCODE = 1 WORD = 2 - EOF = 3 + QUOTED_WORD = 3 + EOF = 4 # Had to translate named constants to numeric values lex_scanner = re.Scanner([ - (r'[():]', lambda x,t: (1, t)), - (r'[^ "():]+', lambda x,t: (2, unicode(t))), - (r'".*?((? 1 and words[0] in self.locations: + loc = words[0] + words = words[1:] + if len(words) == 1 and self.token_type() == self.QUOTED_WORD: + return ['token', loc, self.token(advance=True)] + return ['token', loc, ':'.join(words)] + + return ['token', 'all', ':'.join(words)] class SearchQueryParser(object): ''' @@ -264,6 +269,7 @@ class SearchQueryParser(object): def __init__(self, locations, test=False, optimize=False): self.sqp_initialize(locations, test=test, optimize=optimize) + self.parser = Parser() def sqp_change_locations(self, locations): self.sqp_initialize(locations, optimize=self.optimize) @@ -287,7 +293,7 @@ class SearchQueryParser(object): def _parse(self, query, candidates=None): self.recurse_level += 1 try: - res = Parser().parse(query, self.locations) + res = self.parser.parse(query, self.locations) except RuntimeError: raise ParseException('Failed to parse query, recursion limit reached: %s'%repr(query)) if candidates is None: