mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge new search query parser from calibre_temp
This commit is contained in:
commit
4fc41455ea
Binary file not shown.
Before Width: | Height: | Size: 1.1 KiB |
@ -195,13 +195,13 @@ class DateSearch(object): # {{{
|
||||
try:
|
||||
qd = now() - timedelta(int(num))
|
||||
except:
|
||||
raise ParseException(query, len(query), 'Number conversion error')
|
||||
raise ParseException(_('Number conversion error: {0}').format(num))
|
||||
field_count = 3
|
||||
else:
|
||||
try:
|
||||
qd = parse_date(query, as_utc=False)
|
||||
except:
|
||||
raise ParseException(query, len(query), 'Date conversion error')
|
||||
raise ParseException(_('Date conversion error: {0}').format(query))
|
||||
if '-' in query:
|
||||
field_count = query.count('-') + 1
|
||||
else:
|
||||
@ -285,8 +285,8 @@ class NumericSearch(object): # {{{
|
||||
try:
|
||||
q = cast(query) * mult
|
||||
except:
|
||||
raise ParseException(query, len(query),
|
||||
'Non-numeric value in query: %r'%query)
|
||||
raise ParseException(
|
||||
_('Non-numeric value in query: {0}').format(query))
|
||||
|
||||
for val, book_ids in field_iter():
|
||||
if val is None:
|
||||
@ -351,8 +351,8 @@ class KeyPairSearch(object): # {{{
|
||||
if ':' in query:
|
||||
q = [q.strip() for q in query.split(':')]
|
||||
if len(q) != 2:
|
||||
raise ParseException(query, len(query),
|
||||
'Invalid query format for colon-separated search')
|
||||
raise ParseException(
|
||||
_('Invalid query format for colon-separated search: {0}').format(query))
|
||||
keyq, valq = q
|
||||
keyq_mkind, keyq = _matchkind(keyq)
|
||||
valq_mkind, valq = _matchkind(valq)
|
||||
@ -465,7 +465,8 @@ class Parser(SearchQueryParser):
|
||||
if invert:
|
||||
matches = self.all_book_ids - matches
|
||||
return matches
|
||||
raise ParseException(query, len(query), 'Recursive query group detected')
|
||||
raise ParseException(
|
||||
_('Recursive query group detected: {0}').format(query))
|
||||
|
||||
# If the user has asked to restrict searching over all field, apply
|
||||
# that restriction
|
||||
|
@ -12,7 +12,7 @@ from PyQt4.Qt import (QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage,
|
||||
QModelIndex, QVariant, QDateTime, QColor, QPixmap)
|
||||
|
||||
from calibre.gui2 import NONE, UNDEFINED_QDATETIME, error_dialog
|
||||
from calibre.utils.pyparsing import ParseException
|
||||
from calibre.utils.search_query_parser import ParseException
|
||||
from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors
|
||||
from calibre.ebooks.metadata.book.base import SafeFormat
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
@ -13,7 +13,7 @@ from PyQt4.Qt import (
|
||||
from calibre.gui2 import error_dialog, question_dialog
|
||||
from calibre.gui2.widgets import ComboBoxWithHelp
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.pyparsing import ParseException
|
||||
from calibre.utils.search_query_parser import ParseException
|
||||
from calibre.utils.search_query_parser import saved_searches
|
||||
|
||||
class SelectNames(QDialog): # {{{
|
||||
@ -299,7 +299,7 @@ class SearchRestrictionMixin(object):
|
||||
|
||||
def __init__(self):
|
||||
self.checked = QIcon(I('ok.png'))
|
||||
self.empty = QIcon(I('empty.png'))
|
||||
self.empty = QIcon(I('blank.png'))
|
||||
self.search_based_vl_name = None
|
||||
self.search_based_vl = None
|
||||
|
||||
|
@ -14,7 +14,7 @@ from threading import Thread
|
||||
from calibre.utils.config import tweaks, prefs
|
||||
from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_sort
|
||||
from calibre.utils.search_query_parser import SearchQueryParser
|
||||
from calibre.utils.pyparsing import ParseException
|
||||
from calibre.utils.search_query_parser import ParseException
|
||||
from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc)
|
||||
from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match
|
||||
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
||||
@ -366,25 +366,18 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
elif query in self.local_thismonth:
|
||||
qd = now()
|
||||
field_count = 2
|
||||
elif query.endswith(self.local_daysago):
|
||||
elif query.endswith(self.local_daysago) or query.endswith(self.untrans_daysago):
|
||||
num = query[0:-self.local_daysago_len]
|
||||
try:
|
||||
qd = now() - timedelta(int(num))
|
||||
except:
|
||||
raise ParseException(query, len(query), 'Number conversion error', self)
|
||||
field_count = 3
|
||||
elif query.endswith(self.untrans_daysago):
|
||||
num = query[0:-self.untrans_daysago_len]
|
||||
try:
|
||||
qd = now() - timedelta(int(num))
|
||||
except:
|
||||
raise ParseException(query, len(query), 'Number conversion error', self)
|
||||
raise ParseException(_('Number conversion error: {0}').format(num))
|
||||
field_count = 3
|
||||
else:
|
||||
try:
|
||||
qd = parse_date(query, as_utc=False)
|
||||
except:
|
||||
raise ParseException(query, len(query), 'Date conversion error', self)
|
||||
raise ParseException(_('Date conversion error: {0}').format(query))
|
||||
if '-' in query:
|
||||
field_count = query.count('-') + 1
|
||||
else:
|
||||
@ -460,8 +453,7 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
try:
|
||||
q = cast(query) * mult
|
||||
except:
|
||||
raise ParseException(query, len(query),
|
||||
'Non-numeric value in query', self)
|
||||
raise ParseException(_('Non-numeric value in query: {0}').format(query))
|
||||
|
||||
for id_ in candidates:
|
||||
item = self._data[id_]
|
||||
@ -501,12 +493,13 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
return matches
|
||||
|
||||
def get_keypair_matches(self, location, query, candidates):
|
||||
print query
|
||||
matches = set([])
|
||||
if query.find(':') >= 0:
|
||||
q = [q.strip() for q in query.split(':')]
|
||||
if len(q) != 2:
|
||||
raise ParseException(query, len(query),
|
||||
'Invalid query format for colon-separated search', self)
|
||||
raise ParseException(
|
||||
_('Invalid query format for colon-separated search: {0}').format(query))
|
||||
(keyq, valq) = q
|
||||
keyq_mkind, keyq = self._matchkind(keyq)
|
||||
valq_mkind, valq = self._matchkind(valq)
|
||||
@ -655,7 +648,7 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
if invert:
|
||||
matches = self.universal_set() - matches
|
||||
return matches
|
||||
raise ParseException(query, len(query), 'Recursive query group detected', self)
|
||||
raise ParseException(_('Recursive query group detected: {0}').format(query))
|
||||
|
||||
# apply the limit if appropriate
|
||||
if location == 'all' and prefs['limit_search_columns'] and \
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -16,11 +16,8 @@ methods :method:`SearchQueryParser.universal_set` and
|
||||
If this module is run, it will perform a series of unit tests.
|
||||
'''
|
||||
|
||||
import sys, operator, weakref
|
||||
import sys, operator, weakref, re
|
||||
|
||||
from calibre.utils.pyparsing import (CaselessKeyword, Group, Forward,
|
||||
CharsNotIn, Suppress, OneOrMore, MatchFirst, CaselessLiteral,
|
||||
Optional, NoMatch, ParseException, QuotedString)
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre import prints
|
||||
@ -96,6 +93,151 @@ def saved_searches():
|
||||
global ss
|
||||
return ss
|
||||
|
||||
'''
|
||||
Parse a search expression into a series of potentially recursive operations.
|
||||
|
||||
Note that the interpreter wants binary operators, not n-ary ops. This is why we
|
||||
recurse instead of iterating when building sequences of the same op.
|
||||
|
||||
The syntax is more than a bit twisted. In particular, the handling of colons
|
||||
in the base token requires semantic analysis.
|
||||
|
||||
Also note that the query string is lowercased before analysis. This is OK because
|
||||
calibre's searches are all case-insensitive.
|
||||
|
||||
Grammar:
|
||||
|
||||
prog ::= or_expression
|
||||
|
||||
or_expression ::= and_expression [ 'or' or_expression ]
|
||||
|
||||
and_expression ::= not_expression [ [ 'and' ] and_expression ]
|
||||
|
||||
not_expression ::= [ 'not' ] location_expression
|
||||
|
||||
location_expression ::= base_token | ( '(' or_expression ')' )
|
||||
|
||||
base_token ::= a sequence of letters and colons, perhaps quoted
|
||||
'''
|
||||
class Parser(object):
|
||||
|
||||
def __init__(self):
|
||||
self.current_token = 0
|
||||
self.tokens = None
|
||||
|
||||
OPCODE = 1
|
||||
WORD = 2
|
||||
QUOTED_WORD = 3
|
||||
EOF = 4
|
||||
|
||||
# Had to translate named constants to numeric values
|
||||
lex_scanner = re.Scanner([
|
||||
(r'[()]', lambda x,t: (1, t)),
|
||||
(r'[^ "()]+', lambda x,t: (2, unicode(t))),
|
||||
(r'".*?((?<!\\)")', lambda x,t: (3, t[1:-1])),
|
||||
(r'\s', None)
|
||||
], flags=re.DOTALL)
|
||||
|
||||
def token(self, advance=False):
|
||||
if self.is_eof():
|
||||
return None
|
||||
res = self.tokens[self.current_token][1]
|
||||
if advance:
|
||||
self.current_token += 1
|
||||
return res
|
||||
|
||||
def token_type(self):
|
||||
if self.is_eof():
|
||||
return self.EOF
|
||||
return self.tokens[self.current_token][0]
|
||||
|
||||
def is_eof(self):
|
||||
return self.current_token >= len(self.tokens)
|
||||
|
||||
def advance(self):
|
||||
self.current_token += 1
|
||||
|
||||
def parse(self, expr, locations):
|
||||
self.locations = locations
|
||||
self.tokens = self.lex_scanner.scan(icu_lower(expr))[0]
|
||||
self.current_token = 0
|
||||
prog = self.or_expression()
|
||||
if not self.is_eof():
|
||||
raise ParseException(_('Extra characters at end of search'))
|
||||
#prints(self.tokens, '\n', prog)
|
||||
return prog
|
||||
|
||||
def or_expression(self):
|
||||
lhs = self.and_expression()
|
||||
if self.token() == 'or':
|
||||
self.advance()
|
||||
return ['or', lhs, self.or_expression()]
|
||||
return lhs
|
||||
|
||||
def and_expression(self):
|
||||
lhs = self.not_expression()
|
||||
if self.token() == 'and':
|
||||
self.advance()
|
||||
return ['and', lhs, self.and_expression()]
|
||||
|
||||
# Account for the optional 'and'
|
||||
if self.token_type() in [self.WORD, self.QUOTED_WORD] and self.token() != 'or':
|
||||
return ['and', lhs, self.and_expression()]
|
||||
return lhs
|
||||
|
||||
def not_expression(self):
|
||||
if self.token() == 'not':
|
||||
self.advance()
|
||||
return ['not', self.not_expression()]
|
||||
return self.location_expression()
|
||||
|
||||
def location_expression(self):
|
||||
if self.token() == '(':
|
||||
self.advance()
|
||||
res = self.or_expression()
|
||||
if self.token(advance=True) != ')':
|
||||
raise ParseException(_('missing )'))
|
||||
return res
|
||||
if self.token_type() not in [ self.WORD, self.QUOTED_WORD ]:
|
||||
raise ParseException(_('Invalid syntax. Expected a lookup name or a word'))
|
||||
|
||||
return self.base_token()
|
||||
|
||||
def base_token(self):
|
||||
if self.token_type() == self.QUOTED_WORD:
|
||||
return ['token', 'all', self.token(advance=True)]
|
||||
|
||||
words = self.token(advance=True).split(':')
|
||||
|
||||
# The complexity here comes from having colon-separated search
|
||||
# values. That forces us to check that the first "word" in a colon-
|
||||
# separated group is a valid location. If not, then the token must
|
||||
# be reconstructed. We also have the problem that locations can be
|
||||
# followed by quoted strings that appear as the next token. and that
|
||||
# tokens can be a sequence of colons.
|
||||
|
||||
# We have a location if there is more than one word and the first
|
||||
# word is in locations. This check could produce a "wrong" answer if
|
||||
# the search string is something like 'author: "foo"' because it
|
||||
# will be interpreted as 'author:"foo"'. I am choosing to accept the
|
||||
# possible error. The expression should be written '"author:" foo'
|
||||
if len(words) > 1 and words[0] in self.locations:
|
||||
loc = words[0]
|
||||
words = words[1:]
|
||||
if len(words) == 1 and self.token_type() == self.QUOTED_WORD:
|
||||
return ['token', loc, self.token(advance=True)]
|
||||
return ['token', loc, ':'.join(words)]
|
||||
|
||||
return ['token', 'all', ':'.join(words)]
|
||||
|
||||
class ParseException(Exception):
|
||||
|
||||
@property
|
||||
def msg(self):
|
||||
if len(self.args) > 0:
|
||||
return self.args[0]
|
||||
return ""
|
||||
|
||||
class SearchQueryParser(object):
|
||||
'''
|
||||
Parses a search query.
|
||||
@ -134,70 +276,15 @@ class SearchQueryParser(object):
|
||||
|
||||
def __init__(self, locations, test=False, optimize=False):
|
||||
self.sqp_initialize(locations, test=test, optimize=optimize)
|
||||
self.parser = Parser()
|
||||
|
||||
def sqp_change_locations(self, locations):
|
||||
self.sqp_initialize(locations, optimize=self.optimize)
|
||||
|
||||
def sqp_initialize(self, locations, test=False, optimize=False):
|
||||
self.locations = locations
|
||||
self._tests_failed = False
|
||||
self.optimize = optimize
|
||||
# Define a token
|
||||
standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
|
||||
locations)
|
||||
location = NoMatch()
|
||||
for l in standard_locations:
|
||||
location |= l
|
||||
location = Optional(location, default='all')
|
||||
word_query = CharsNotIn(u'\t\r\n\u00a0 ' + u'()')
|
||||
#quoted_query = Suppress('"')+CharsNotIn('"')+Suppress('"')
|
||||
quoted_query = QuotedString('"', escChar='\\')
|
||||
query = quoted_query | word_query
|
||||
Token = Group(location + query).setResultsName('token')
|
||||
|
||||
if test:
|
||||
print 'Testing Token parser:'
|
||||
Token.validate()
|
||||
failed = SearchQueryParser.run_tests(Token, 'token',
|
||||
(
|
||||
('tag:asd', ['tag', 'asd']),
|
||||
(u'ddsä', ['all', u'ddsä']),
|
||||
('"one \\"two"', ['all', 'one "two']),
|
||||
('title:"one \\"1.5\\" two"', ['title', 'one "1.5" two']),
|
||||
('title:abc"def', ['title', 'abc"def']),
|
||||
)
|
||||
)
|
||||
|
||||
Or = Forward()
|
||||
|
||||
Parenthesis = Group(
|
||||
Suppress('(') + Or + Suppress(')')
|
||||
).setResultsName('parenthesis') | Token
|
||||
|
||||
|
||||
Not = Forward()
|
||||
Not << (Group(
|
||||
Suppress(CaselessKeyword("not")) + Not
|
||||
).setResultsName("not") | Parenthesis)
|
||||
|
||||
And = Forward()
|
||||
And << (Group(
|
||||
Not + Suppress(CaselessKeyword("and")) + And
|
||||
).setResultsName("and") | Group(
|
||||
Not + OneOrMore(~MatchFirst(list(map(CaselessKeyword,
|
||||
('and', 'or')))) + And)
|
||||
).setResultsName("and") | Not)
|
||||
|
||||
Or << (Group(
|
||||
And + Suppress(CaselessKeyword("or")) + Or
|
||||
).setResultsName("or") | And)
|
||||
|
||||
if test:
|
||||
#Or.validate()
|
||||
self._tests_failed = bool(failed)
|
||||
|
||||
self._parser = Or
|
||||
self._parser.setDebug(False)
|
||||
|
||||
|
||||
def parse(self, query):
|
||||
# empty the list of searches used for recursion testing
|
||||
@ -213,10 +300,9 @@ class SearchQueryParser(object):
|
||||
def _parse(self, query, candidates=None):
|
||||
self.recurse_level += 1
|
||||
try:
|
||||
res = self._parser.parseString(query)[0]
|
||||
res = self.parser.parse(query, self.locations)
|
||||
except RuntimeError:
|
||||
import repr
|
||||
raise ParseException('Failed to parse query, recursion limit reached: %s'%repr(query))
|
||||
raise ParseException(_('Failed to parse query, recursion limit reached: %s')%repr(query))
|
||||
if candidates is None:
|
||||
candidates = self.universal_set()
|
||||
t = self.evaluate(res, candidates)
|
||||
@ -227,7 +313,7 @@ class SearchQueryParser(object):
|
||||
return getattr(self, 'evaluate_'+group_name)
|
||||
|
||||
def evaluate(self, parse_result, candidates):
|
||||
return self.method(parse_result.getName())(parse_result, candidates)
|
||||
return self.method(parse_result[0])(parse_result[1:], candidates)
|
||||
|
||||
def evaluate_and(self, argument, candidates):
|
||||
# RHS checks only those items matched by LHS
|
||||
@ -249,8 +335,8 @@ class SearchQueryParser(object):
|
||||
# return self.universal_set().difference(self.evaluate(argument[0]))
|
||||
return candidates.difference(self.evaluate(argument[0], candidates))
|
||||
|
||||
def evaluate_parenthesis(self, argument, candidates):
|
||||
return self.evaluate(argument[0], candidates)
|
||||
# def evaluate_parenthesis(self, argument, candidates):
|
||||
# return self.evaluate(argument[0], candidates)
|
||||
|
||||
def evaluate_token(self, argument, candidates):
|
||||
location = argument[0]
|
||||
@ -260,12 +346,16 @@ class SearchQueryParser(object):
|
||||
query = query[1:]
|
||||
try:
|
||||
if query in self.searches_seen:
|
||||
raise ParseException(query, len(query), 'undefined saved search', self)
|
||||
raise ParseException(_('Recursive saved search: {0}').format(query))
|
||||
if self.recurse_level > 5:
|
||||
self.searches_seen.add(query)
|
||||
return self._parse(saved_searches().lookup(query), candidates)
|
||||
except ParseException as e:
|
||||
raise e
|
||||
except: # convert all exceptions (e.g., missing key) to a parse error
|
||||
raise ParseException(query, len(query), 'undefined saved search', self)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
raise ParseException(_('Unknown error in saved search: {0}').format(query))
|
||||
return self._get_matches(location, query, candidates)
|
||||
|
||||
def _get_matches(self, location, query, candidates):
|
||||
|
@ -19,7 +19,7 @@ from calibre.web.feeds.recipes.collection import \
|
||||
SchedulerConfig, download_builtin_recipe, update_custom_recipe, \
|
||||
add_custom_recipe, remove_custom_recipe, get_custom_recipe, \
|
||||
get_builtin_recipe
|
||||
from calibre.utils.pyparsing import ParseException
|
||||
from calibre.utils.search_query_parser import ParseException
|
||||
|
||||
class NewsTreeItem(object):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user