Merge new search query parser from calibre_temp

This commit is contained in:
Charles Haley 2013-04-20 10:13:41 +02:00
commit 4fc41455ea
8 changed files with 180 additions and 3821 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -195,13 +195,13 @@ class DateSearch(object): # {{{
try:
qd = now() - timedelta(int(num))
except:
raise ParseException(query, len(query), 'Number conversion error')
raise ParseException(_('Number conversion error: {0}').format(num))
field_count = 3
else:
try:
qd = parse_date(query, as_utc=False)
except:
raise ParseException(query, len(query), 'Date conversion error')
raise ParseException(_('Date conversion error: {0}').format(query))
if '-' in query:
field_count = query.count('-') + 1
else:
@ -285,8 +285,8 @@ class NumericSearch(object): # {{{
try:
q = cast(query) * mult
except:
raise ParseException(query, len(query),
'Non-numeric value in query: %r'%query)
raise ParseException(
_('Non-numeric value in query: {0}').format(query))
for val, book_ids in field_iter():
if val is None:
@ -351,8 +351,8 @@ class KeyPairSearch(object): # {{{
if ':' in query:
q = [q.strip() for q in query.split(':')]
if len(q) != 2:
raise ParseException(query, len(query),
'Invalid query format for colon-separated search')
raise ParseException(
_('Invalid query format for colon-separated search: {0}').format(query))
keyq, valq = q
keyq_mkind, keyq = _matchkind(keyq)
valq_mkind, valq = _matchkind(valq)
@ -465,7 +465,8 @@ class Parser(SearchQueryParser):
if invert:
matches = self.all_book_ids - matches
return matches
raise ParseException(query, len(query), 'Recursive query group detected')
raise ParseException(
_('Recursive query group detected: {0}').format(query))
# If the user has asked to restrict searching over all field, apply
# that restriction

View File

@ -12,7 +12,7 @@ from PyQt4.Qt import (QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage,
QModelIndex, QVariant, QDateTime, QColor, QPixmap)
from calibre.gui2 import NONE, UNDEFINED_QDATETIME, error_dialog
from calibre.utils.pyparsing import ParseException
from calibre.utils.search_query_parser import ParseException
from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors
from calibre.ebooks.metadata.book.base import SafeFormat
from calibre.ptempfile import PersistentTemporaryFile

View File

@ -13,7 +13,7 @@ from PyQt4.Qt import (
from calibre.gui2 import error_dialog, question_dialog
from calibre.gui2.widgets import ComboBoxWithHelp
from calibre.utils.icu import sort_key
from calibre.utils.pyparsing import ParseException
from calibre.utils.search_query_parser import ParseException
from calibre.utils.search_query_parser import saved_searches
class SelectNames(QDialog): # {{{
@ -299,7 +299,7 @@ class SearchRestrictionMixin(object):
def __init__(self):
self.checked = QIcon(I('ok.png'))
self.empty = QIcon(I('empty.png'))
self.empty = QIcon(I('blank.png'))
self.search_based_vl_name = None
self.search_based_vl = None

View File

@ -14,7 +14,7 @@ from threading import Thread
from calibre.utils.config import tweaks, prefs
from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_sort
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.utils.pyparsing import ParseException
from calibre.utils.search_query_parser import ParseException
from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc)
from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match
from calibre.ebooks.metadata import title_sort, author_to_author_sort
@ -366,25 +366,18 @@ class ResultCache(SearchQueryParser): # {{{
elif query in self.local_thismonth:
qd = now()
field_count = 2
elif query.endswith(self.local_daysago):
elif query.endswith(self.local_daysago) or query.endswith(self.untrans_daysago):
num = query[0:-self.local_daysago_len]
try:
qd = now() - timedelta(int(num))
except:
raise ParseException(query, len(query), 'Number conversion error', self)
field_count = 3
elif query.endswith(self.untrans_daysago):
num = query[0:-self.untrans_daysago_len]
try:
qd = now() - timedelta(int(num))
except:
raise ParseException(query, len(query), 'Number conversion error', self)
raise ParseException(_('Number conversion error: {0}').format(num))
field_count = 3
else:
try:
qd = parse_date(query, as_utc=False)
except:
raise ParseException(query, len(query), 'Date conversion error', self)
raise ParseException(_('Date conversion error: {0}').format(query))
if '-' in query:
field_count = query.count('-') + 1
else:
@ -460,8 +453,7 @@ class ResultCache(SearchQueryParser): # {{{
try:
q = cast(query) * mult
except:
raise ParseException(query, len(query),
'Non-numeric value in query', self)
raise ParseException(_('Non-numeric value in query: {0}').format(query))
for id_ in candidates:
item = self._data[id_]
@ -501,12 +493,13 @@ class ResultCache(SearchQueryParser): # {{{
return matches
def get_keypair_matches(self, location, query, candidates):
print query
matches = set([])
if query.find(':') >= 0:
q = [q.strip() for q in query.split(':')]
if len(q) != 2:
raise ParseException(query, len(query),
'Invalid query format for colon-separated search', self)
raise ParseException(
_('Invalid query format for colon-separated search: {0}').format(query))
(keyq, valq) = q
keyq_mkind, keyq = self._matchkind(keyq)
valq_mkind, valq = self._matchkind(valq)
@ -655,7 +648,7 @@ class ResultCache(SearchQueryParser): # {{{
if invert:
matches = self.universal_set() - matches
return matches
raise ParseException(query, len(query), 'Recursive query group detected', self)
raise ParseException(_('Recursive query group detected: {0}').format(query))
# apply the limit if appropriate
if location == 'all' and prefs['limit_search_columns'] and \

File diff suppressed because it is too large Load Diff

View File

@ -16,11 +16,8 @@ methods :method:`SearchQueryParser.universal_set` and
If this module is run, it will perform a series of unit tests.
'''
import sys, operator, weakref
import sys, operator, weakref, re
from calibre.utils.pyparsing import (CaselessKeyword, Group, Forward,
CharsNotIn, Suppress, OneOrMore, MatchFirst, CaselessLiteral,
Optional, NoMatch, ParseException, QuotedString)
from calibre.constants import preferred_encoding
from calibre.utils.icu import sort_key
from calibre import prints
@ -96,6 +93,151 @@ def saved_searches():
global ss
return ss
'''
Parse a search expression into a series of potentially recursive operations.
Note that the interpreter wants binary operators, not n-ary ops. This is why we
recurse instead of iterating when building sequences of the same op.
The syntax is more than a bit twisted. In particular, the handling of colons
in the base token requires semantic analysis.
Also note that the query string is lowercased before analysis. This is OK because
calibre's searches are all case-insensitive.
Grammar:
prog ::= or_expression
or_expression ::= and_expression [ 'or' or_expression ]
and_expression ::= not_expression [ [ 'and' ] and_expression ]
not_expression ::= [ 'not' ] location_expression
location_expression ::= base_token | ( '(' or_expression ')' )
base_token ::= a sequence of letters and colons, perhaps quoted
'''
class Parser(object):
def __init__(self):
self.current_token = 0
self.tokens = None
OPCODE = 1
WORD = 2
QUOTED_WORD = 3
EOF = 4
# Had to translate named constants to numeric values
lex_scanner = re.Scanner([
(r'[()]', lambda x,t: (1, t)),
(r'[^ "()]+', lambda x,t: (2, unicode(t))),
(r'".*?((?<!\\)")', lambda x,t: (3, t[1:-1])),
(r'\s', None)
], flags=re.DOTALL)
def token(self, advance=False):
if self.is_eof():
return None
res = self.tokens[self.current_token][1]
if advance:
self.current_token += 1
return res
def token_type(self):
if self.is_eof():
return self.EOF
return self.tokens[self.current_token][0]
def is_eof(self):
return self.current_token >= len(self.tokens)
def advance(self):
self.current_token += 1
def parse(self, expr, locations):
self.locations = locations
self.tokens = self.lex_scanner.scan(icu_lower(expr))[0]
self.current_token = 0
prog = self.or_expression()
if not self.is_eof():
raise ParseException(_('Extra characters at end of search'))
#prints(self.tokens, '\n', prog)
return prog
def or_expression(self):
lhs = self.and_expression()
if self.token() == 'or':
self.advance()
return ['or', lhs, self.or_expression()]
return lhs
def and_expression(self):
lhs = self.not_expression()
if self.token() == 'and':
self.advance()
return ['and', lhs, self.and_expression()]
# Account for the optional 'and'
if self.token_type() in [self.WORD, self.QUOTED_WORD] and self.token() != 'or':
return ['and', lhs, self.and_expression()]
return lhs
def not_expression(self):
if self.token() == 'not':
self.advance()
return ['not', self.not_expression()]
return self.location_expression()
def location_expression(self):
if self.token() == '(':
self.advance()
res = self.or_expression()
if self.token(advance=True) != ')':
raise ParseException(_('missing )'))
return res
if self.token_type() not in [ self.WORD, self.QUOTED_WORD ]:
raise ParseException(_('Invalid syntax. Expected a lookup name or a word'))
return self.base_token()
def base_token(self):
if self.token_type() == self.QUOTED_WORD:
return ['token', 'all', self.token(advance=True)]
words = self.token(advance=True).split(':')
# The complexity here comes from having colon-separated search
# values. That forces us to check that the first "word" in a colon-
# separated group is a valid location. If not, then the token must
# be reconstructed. We also have the problem that locations can be
# followed by quoted strings that appear as the next token. and that
# tokens can be a sequence of colons.
# We have a location if there is more than one word and the first
# word is in locations. This check could produce a "wrong" answer if
# the search string is something like 'author: "foo"' because it
# will be interpreted as 'author:"foo"'. I am choosing to accept the
# possible error. The expression should be written '"author:" foo'
if len(words) > 1 and words[0] in self.locations:
loc = words[0]
words = words[1:]
if len(words) == 1 and self.token_type() == self.QUOTED_WORD:
return ['token', loc, self.token(advance=True)]
return ['token', loc, ':'.join(words)]
return ['token', 'all', ':'.join(words)]
class ParseException(Exception):
@property
def msg(self):
if len(self.args) > 0:
return self.args[0]
return ""
class SearchQueryParser(object):
'''
Parses a search query.
@ -134,70 +276,15 @@ class SearchQueryParser(object):
def __init__(self, locations, test=False, optimize=False):
self.sqp_initialize(locations, test=test, optimize=optimize)
self.parser = Parser()
def sqp_change_locations(self, locations):
self.sqp_initialize(locations, optimize=self.optimize)
def sqp_initialize(self, locations, test=False, optimize=False):
self.locations = locations
self._tests_failed = False
self.optimize = optimize
# Define a token
standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
locations)
location = NoMatch()
for l in standard_locations:
location |= l
location = Optional(location, default='all')
word_query = CharsNotIn(u'\t\r\n\u00a0 ' + u'()')
#quoted_query = Suppress('"')+CharsNotIn('"')+Suppress('"')
quoted_query = QuotedString('"', escChar='\\')
query = quoted_query | word_query
Token = Group(location + query).setResultsName('token')
if test:
print 'Testing Token parser:'
Token.validate()
failed = SearchQueryParser.run_tests(Token, 'token',
(
('tag:asd', ['tag', 'asd']),
(u'ddsä', ['all', u'ddsä']),
('"one \\"two"', ['all', 'one "two']),
('title:"one \\"1.5\\" two"', ['title', 'one "1.5" two']),
('title:abc"def', ['title', 'abc"def']),
)
)
Or = Forward()
Parenthesis = Group(
Suppress('(') + Or + Suppress(')')
).setResultsName('parenthesis') | Token
Not = Forward()
Not << (Group(
Suppress(CaselessKeyword("not")) + Not
).setResultsName("not") | Parenthesis)
And = Forward()
And << (Group(
Not + Suppress(CaselessKeyword("and")) + And
).setResultsName("and") | Group(
Not + OneOrMore(~MatchFirst(list(map(CaselessKeyword,
('and', 'or')))) + And)
).setResultsName("and") | Not)
Or << (Group(
And + Suppress(CaselessKeyword("or")) + Or
).setResultsName("or") | And)
if test:
#Or.validate()
self._tests_failed = bool(failed)
self._parser = Or
self._parser.setDebug(False)
def parse(self, query):
# empty the list of searches used for recursion testing
@ -213,10 +300,9 @@ class SearchQueryParser(object):
def _parse(self, query, candidates=None):
self.recurse_level += 1
try:
res = self._parser.parseString(query)[0]
res = self.parser.parse(query, self.locations)
except RuntimeError:
import repr
raise ParseException('Failed to parse query, recursion limit reached: %s'%repr(query))
raise ParseException(_('Failed to parse query, recursion limit reached: %s')%repr(query))
if candidates is None:
candidates = self.universal_set()
t = self.evaluate(res, candidates)
@ -227,7 +313,7 @@ class SearchQueryParser(object):
return getattr(self, 'evaluate_'+group_name)
def evaluate(self, parse_result, candidates):
return self.method(parse_result.getName())(parse_result, candidates)
return self.method(parse_result[0])(parse_result[1:], candidates)
def evaluate_and(self, argument, candidates):
# RHS checks only those items matched by LHS
@ -249,8 +335,8 @@ class SearchQueryParser(object):
# return self.universal_set().difference(self.evaluate(argument[0]))
return candidates.difference(self.evaluate(argument[0], candidates))
def evaluate_parenthesis(self, argument, candidates):
return self.evaluate(argument[0], candidates)
# def evaluate_parenthesis(self, argument, candidates):
# return self.evaluate(argument[0], candidates)
def evaluate_token(self, argument, candidates):
location = argument[0]
@ -260,12 +346,16 @@ class SearchQueryParser(object):
query = query[1:]
try:
if query in self.searches_seen:
raise ParseException(query, len(query), 'undefined saved search', self)
raise ParseException(_('Recursive saved search: {0}').format(query))
if self.recurse_level > 5:
self.searches_seen.add(query)
return self._parse(saved_searches().lookup(query), candidates)
except ParseException as e:
raise e
except: # convert all exceptions (e.g., missing key) to a parse error
raise ParseException(query, len(query), 'undefined saved search', self)
import traceback
traceback.print_exc()
raise ParseException(_('Unknown error in saved search: {0}').format(query))
return self._get_matches(location, query, candidates)
def _get_matches(self, location, query, candidates):

View File

@ -19,7 +19,7 @@ from calibre.web.feeds.recipes.collection import \
SchedulerConfig, download_builtin_recipe, update_custom_recipe, \
add_custom_recipe, remove_custom_recipe, get_custom_recipe, \
get_builtin_recipe
from calibre.utils.pyparsing import ParseException
from calibre.utils.search_query_parser import ParseException
class NewsTreeItem(object):