Search query parser: Fix parsing of search terms that contain a word that starts with 'and' or 'or' and is not the first word

This commit is contained in:
Kovid Goyal 2010-07-21 23:36:12 -06:00
parent 6fa16aa3fa
commit aa8eab9d35

View File

@ -18,10 +18,12 @@ If this module is run, it will perform a series of unit tests.
import sys, string, operator
from calibre.utils.pyparsing import Keyword, Group, Forward, CharsNotIn, Suppress, \
OneOrMore, oneOf, CaselessLiteral, Optional, NoMatch, ParseException
from calibre.utils.pyparsing import CaselessKeyword, Group, Forward, CharsNotIn, Suppress, \
OneOrMore, MatchFirst, CaselessLiteral, Optional, NoMatch, ParseException
from calibre.constants import preferred_encoding
'''
This class manages access to the preference holding the saved search queries.
It exists to ensure that unicode is used throughout, and also to permit
@ -149,18 +151,19 @@ class SearchQueryParser(object):
Not = Forward()
Not << (Group(
Suppress(Keyword("not", caseless=True)) + Not
Suppress(CaselessKeyword("not")) + Not
).setResultsName("not") | Parenthesis)
And = Forward()
And << (Group(
Not + Suppress(Keyword("and", caseless=True)) + And
Not + Suppress(CaselessKeyword("and")) + And
).setResultsName("and") | Group(
Not + OneOrMore(~oneOf("and or", caseless=True) + And)
Not + OneOrMore(~MatchFirst(list(map(CaselessKeyword,
('and', 'or')))) + And)
).setResultsName("and") | Not)
Or << (Group(
And + Suppress(Keyword("or", caseless=True)) + Or
And + Suppress(CaselessKeyword("or")) + Or
).setResultsName("or") | And)
if test:
@ -291,7 +294,7 @@ class Tester(SearchQueryParser):
28: [u"Kushiel's Scion", u'Jacqueline Carey', None, u'lrf,rar'],
29: [u'Underworld', u'Don DeLillo', None, u'lrf,rar'],
30: [u'Genghis Khan and The Making of the Modern World',
u'Jack Weatherford',
u'Jack Weatherford Orc',
u'Three Rivers Press',
u'lrf,zip'],
31: [u'The Best and the Brightest',
@ -543,6 +546,7 @@ class Tester(SearchQueryParser):
'london:thames': set([13]),
'publisher:london:thames': set([13]),
'"(1977)"': set([13]),
'jack weatherford orc': set([30]),
}
fields = {'title':0, 'author':1, 'publisher':2, 'tag':3}