Merge new search query parser from calibre_temp

2025-07-09 03:04:10 -04:00 · 2013-04-20 10:13:41 +02:00 · 2013-04-20 10:13:41 +02:00 · 4fc41455ea
commit 4fc41455ea
parent 3631011901 e7d4bec981
8 changed files with 180 additions and 3821 deletions
--- a/resources/images/empty.png
+++ b/resources/images/empty.png
--- a/src/calibre/db/search.py
+++ b/src/calibre/db/search.py
@ -195,13 +195,13 @@ class DateSearch(object):  # {{{
                try:
                    qd = now() - timedelta(int(num))
                except:
-                    raise ParseException(query, len(query), 'Number conversion error')
+                    raise ParseException(_('Number conversion error: {0}').format(num))
                field_count = 3
            else:
                try:
                    qd = parse_date(query, as_utc=False)
                except:
-                    raise ParseException(query, len(query), 'Date conversion error')
+                    raise ParseException(_('Date conversion error: {0}').format(query))
                if '-' in query:
                    field_count = query.count('-') + 1
                else:
@ -285,8 +285,8 @@ class NumericSearch(object):  # {{{
            try:
                q = cast(query) * mult
            except:
-                raise ParseException(query, len(query),
+                raise ParseException(
-                                     'Non-numeric value in query: %r'%query)
+                        _('Non-numeric value in query: {0}').format(query))
        for val, book_ids in field_iter():
            if val is None:
@ -351,8 +351,8 @@ class KeyPairSearch(object):  # {{{
        if ':' in query:
            q = [q.strip() for q in query.split(':')]
            if len(q) != 2:
-                raise ParseException(query, len(query),
+                raise ParseException(
-                        'Invalid query format for colon-separated search')
+                 _('Invalid query format for colon-separated search: {0}').format(query))
            keyq, valq = q
            keyq_mkind, keyq = _matchkind(keyq)
            valq_mkind, valq = _matchkind(valq)
@ -465,7 +465,8 @@ class Parser(SearchQueryParser):
                if invert:
                    matches = self.all_book_ids - matches
                return matches
-            raise ParseException(query, len(query), 'Recursive query group detected')
+            raise ParseException(
                       _('Recursive query group detected: {0}').format(query))
        # If the user has asked to restrict searching over all field, apply
        # that restriction
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -12,7 +12,7 @@ from PyQt4.Qt import (QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage,
        QModelIndex, QVariant, QDateTime, QColor, QPixmap)
 from calibre.gui2 import NONE, UNDEFINED_QDATETIME, error_dialog
-from calibre.utils.pyparsing import ParseException
+from calibre.utils.search_query_parser import ParseException
 from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors
 from calibre.ebooks.metadata.book.base import SafeFormat
 from calibre.ptempfile import PersistentTemporaryFile
--- a/src/calibre/gui2/search_restriction_mixin.py
+++ b/src/calibre/gui2/search_restriction_mixin.py
@ -13,7 +13,7 @@ from PyQt4.Qt import (
 from calibre.gui2 import error_dialog, question_dialog
 from calibre.gui2.widgets import ComboBoxWithHelp
 from calibre.utils.icu import sort_key
-from calibre.utils.pyparsing import ParseException
+from calibre.utils.search_query_parser import ParseException
 from calibre.utils.search_query_parser import saved_searches
 class SelectNames(QDialog):  # {{{
@ -299,7 +299,7 @@ class SearchRestrictionMixin(object):
    def __init__(self):
        self.checked = QIcon(I('ok.png'))
-        self.empty = QIcon(I('empty.png'))
+        self.empty = QIcon(I('blank.png'))
        self.search_based_vl_name = None
        self.search_based_vl = None
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -14,7 +14,7 @@ from threading import Thread
 from calibre.utils.config import tweaks, prefs
 from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_sort
 from calibre.utils.search_query_parser import SearchQueryParser
-from calibre.utils.pyparsing import ParseException
+from calibre.utils.search_query_parser import ParseException
 from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc)
 from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
@ -366,25 +366,18 @@ class ResultCache(SearchQueryParser): # {{{
        elif query in self.local_thismonth:
            qd = now()
            field_count = 2
-        elif query.endswith(self.local_daysago):
+        elif query.endswith(self.local_daysago) or query.endswith(self.untrans_daysago):
            num = query[0:-self.local_daysago_len]
            try:
                qd = now() - timedelta(int(num))
            except:
-                raise ParseException(query, len(query), 'Number conversion error', self)
+                raise ParseException(_('Number conversion error: {0}').format(num))
            field_count = 3
        elif query.endswith(self.untrans_daysago):
            num = query[0:-self.untrans_daysago_len]
            try:
                qd = now() - timedelta(int(num))
            except:
                raise ParseException(query, len(query), 'Number conversion error', self)
            field_count = 3
        else:
            try:
                qd = parse_date(query, as_utc=False)
            except:
-                raise ParseException(query, len(query), 'Date conversion error', self)
+                raise ParseException(_('Date conversion error: {0}').format(query))
            if '-' in query:
                field_count = query.count('-') + 1
            else:
@ -460,8 +453,7 @@ class ResultCache(SearchQueryParser): # {{{
            try:
                q = cast(query) * mult
            except:
-                raise ParseException(query, len(query),
+                raise ParseException(_('Non-numeric value in query: {0}').format(query))
                                     'Non-numeric value in query', self)
        for id_ in candidates:
            item = self._data[id_]
@ -501,12 +493,13 @@ class ResultCache(SearchQueryParser): # {{{
        return matches
    def get_keypair_matches(self, location, query, candidates):
        print query
        matches = set([])
        if query.find(':') >= 0:
            q = [q.strip() for q in query.split(':')]
            if len(q) != 2:
-                raise ParseException(query, len(query),
+                raise ParseException(
-                        'Invalid query format for colon-separated search', self)
+                 _('Invalid query format for colon-separated search: {0}').format(query))
            (keyq, valq) = q
            keyq_mkind, keyq = self._matchkind(keyq)
            valq_mkind, valq = self._matchkind(valq)
@ -655,7 +648,7 @@ class ResultCache(SearchQueryParser): # {{{
                    if invert:
                        matches = self.universal_set() - matches
                    return matches
-                raise ParseException(query, len(query), 'Recursive query group detected', self)
+                raise ParseException(_('Recursive query group detected: {0}').format(query))
            # apply the limit if appropriate
            if location == 'all' and prefs['limit_search_columns'] and \
--- a/src/calibre/utils/pyparsing.py
+++ b/src/calibre/utils/pyparsing.py
--- a/src/calibre/utils/search_query_parser.py
+++ b/src/calibre/utils/search_query_parser.py
@ -16,11 +16,8 @@ methods :method:`SearchQueryParser.universal_set` and
 If this module is run, it will perform a series of unit tests.
 '''
-import sys, operator, weakref
+import sys, operator, weakref, re
 from calibre.utils.pyparsing import (CaselessKeyword, Group, Forward,
        CharsNotIn, Suppress, OneOrMore, MatchFirst, CaselessLiteral,
        Optional, NoMatch, ParseException, QuotedString)
 from calibre.constants import preferred_encoding
 from calibre.utils.icu import sort_key
 from calibre import prints
@ -96,6 +93,151 @@ def saved_searches():
    global ss
    return ss
 '''
 Parse a search expression into a series of potentially recursive operations.
 Note that the interpreter wants binary operators, not n-ary ops. This is why we
 recurse instead of iterating when building sequences of the same op.
 The syntax is more than a bit twisted. In particular, the handling of colons
 in the base token requires semantic analysis.
 Also note that the query string is lowercased before analysis. This is OK because
 calibre's searches are all case-insensitive.
 Grammar:
 prog ::= or_expression
 or_expression ::= and_expression [ 'or' or_expression ]
 and_expression ::= not_expression [ [ 'and' ] and_expression ]
 not_expression ::= [ 'not' ] location_expression
 location_expression ::= base_token | ( '(' or_expression ')' )
 base_token ::= a sequence of letters and colons, perhaps quoted
 '''
 class Parser(object):
        def __init__(self):
            self.current_token = 0
            self.tokens = None
        OPCODE = 1
        WORD = 2
        QUOTED_WORD = 3
        EOF = 4
        # Had to translate named constants to numeric values
        lex_scanner = re.Scanner([
                (r'[()]',             lambda x,t: (1, t)),
                (r'[^ "()]+',         lambda x,t: (2, unicode(t))),
                (r'".*?((?<!\\)")',   lambda x,t: (3, t[1:-1])),
                (r'\s',               None)
        ], flags=re.DOTALL)
        def token(self, advance=False):
            if self.is_eof():
                return None
            res = self.tokens[self.current_token][1]
            if advance:
                self.current_token += 1
            return res
        def token_type(self):
            if self.is_eof():
                return self.EOF
            return self.tokens[self.current_token][0]
        def is_eof(self):
            return self.current_token >= len(self.tokens)
        def advance(self):
            self.current_token += 1
        def parse(self, expr, locations):
            self.locations = locations
            self.tokens = self.lex_scanner.scan(icu_lower(expr))[0]
            self.current_token = 0
            prog = self.or_expression()
            if not self.is_eof():
                raise ParseException(_('Extra characters at end of search'))
            #prints(self.tokens, '\n', prog)
            return prog
        def or_expression(self):
            lhs = self.and_expression()
            if self.token() == 'or':
                self.advance()
                return ['or', lhs, self.or_expression()]
            return lhs
        def and_expression(self):
            lhs = self.not_expression()
            if self.token() == 'and':
                self.advance()
                return ['and', lhs, self.and_expression()]
            # Account for the optional 'and'
            if self.token_type() in [self.WORD, self.QUOTED_WORD] and self.token() != 'or':
                return ['and', lhs, self.and_expression()]
            return lhs
        def not_expression(self):
            if self.token() == 'not':
                self.advance()
                return ['not', self.not_expression()]
            return self.location_expression()
        def location_expression(self):
            if self.token() == '(':
                self.advance()
                res = self.or_expression()
                if self.token(advance=True) != ')':
                    raise ParseException(_('missing )'))
                return res
            if self.token_type() not in [ self.WORD, self.QUOTED_WORD ]:
                raise ParseException(_('Invalid syntax. Expected a lookup name or a word'))
            return self.base_token()
        def base_token(self):
            if self.token_type() == self.QUOTED_WORD:
                return ['token', 'all', self.token(advance=True)]
            words = self.token(advance=True).split(':')
            # The complexity here comes from having colon-separated search
            # values. That forces us to check that the first "word" in a colon-
            # separated group is a valid location. If not, then the token must
            # be reconstructed. We also have the problem that locations can be
            # followed by quoted strings that appear as the next token. and that
            # tokens can be a sequence of colons.
            # We have a location if there is more than one word and the first
            # word is in locations. This check could produce a "wrong" answer if
            # the search string is something like 'author: "foo"' because it
            # will be interpreted as 'author:"foo"'. I am choosing to accept the
            # possible error. The expression should be written '"author:" foo'
            if len(words) > 1 and words[0] in self.locations:
                loc = words[0]
                words = words[1:]
                if len(words) == 1 and self.token_type() == self.QUOTED_WORD:
                    return ['token', loc, self.token(advance=True)]
                return ['token', loc, ':'.join(words)]
            return ['token', 'all', ':'.join(words)]
 class ParseException(Exception):
    @property
    def msg(self):
        if len(self.args) > 0:
            return self.args[0]
        return ""
 class SearchQueryParser(object):
    '''
    Parses a search query.
@ -134,70 +276,15 @@ class SearchQueryParser(object):
    def __init__(self, locations, test=False, optimize=False):
        self.sqp_initialize(locations, test=test, optimize=optimize)
        self.parser = Parser()
    def sqp_change_locations(self, locations):
        self.sqp_initialize(locations, optimize=self.optimize)
    def sqp_initialize(self, locations, test=False, optimize=False):
        self.locations = locations
        self._tests_failed = False
        self.optimize = optimize
        # Define a token
        standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
                locations)
        location = NoMatch()
        for l in standard_locations:
            location |= l
        location     = Optional(location, default='all')
        word_query   = CharsNotIn(u'\t\r\n\u00a0 ' + u'()')
        #quoted_query = Suppress('"')+CharsNotIn('"')+Suppress('"')
        quoted_query = QuotedString('"', escChar='\\')
        query        = quoted_query | word_query
        Token        = Group(location + query).setResultsName('token')
        if test:
            print 'Testing Token parser:'
            Token.validate()
            failed = SearchQueryParser.run_tests(Token, 'token',
                (
                 ('tag:asd',           ['tag', 'asd']),
                 (u'ddsä',              ['all', u'ddsä']),
                 ('"one \\"two"',         ['all', 'one "two']),
                 ('title:"one \\"1.5\\" two"',   ['title', 'one "1.5" two']),
                 ('title:abc"def', ['title', 'abc"def']),
                )
            )
        Or = Forward()
        Parenthesis = Group(
                        Suppress('(') + Or + Suppress(')')
                        ).setResultsName('parenthesis') | Token
        Not = Forward()
        Not << (Group(
            Suppress(CaselessKeyword("not")) + Not
        ).setResultsName("not") | Parenthesis)
        And = Forward()
        And << (Group(
            Not + Suppress(CaselessKeyword("and")) + And
        ).setResultsName("and") | Group(
            Not + OneOrMore(~MatchFirst(list(map(CaselessKeyword,
                ('and', 'or')))) + And)
        ).setResultsName("and") | Not)
        Or << (Group(
            And + Suppress(CaselessKeyword("or")) + Or
        ).setResultsName("or") | And)
        if test:
            #Or.validate()
            self._tests_failed = bool(failed)
        self._parser = Or
        self._parser.setDebug(False)
    def parse(self, query):
        # empty the list of searches used for recursion testing
@ -213,10 +300,9 @@ class SearchQueryParser(object):
    def _parse(self, query, candidates=None):
        self.recurse_level += 1
        try:
-            res = self._parser.parseString(query)[0]
+            res = self.parser.parse(query, self.locations)
        except RuntimeError:
-            import repr
+            raise ParseException(_('Failed to parse query, recursion limit reached: %s')%repr(query))
            raise ParseException('Failed to parse query, recursion limit reached: %s'%repr(query))
        if candidates is None:
            candidates = self.universal_set()
        t = self.evaluate(res, candidates)
@ -227,7 +313,7 @@ class SearchQueryParser(object):
        return getattr(self, 'evaluate_'+group_name)
    def evaluate(self, parse_result, candidates):
-        return self.method(parse_result.getName())(parse_result, candidates)
+        return self.method(parse_result[0])(parse_result[1:], candidates)
    def evaluate_and(self, argument, candidates):
        # RHS checks only those items matched by LHS
@ -249,8 +335,8 @@ class SearchQueryParser(object):
        #  return self.universal_set().difference(self.evaluate(argument[0]))
        return candidates.difference(self.evaluate(argument[0], candidates))
-    def evaluate_parenthesis(self, argument, candidates):
+#     def evaluate_parenthesis(self, argument, candidates):
-        return self.evaluate(argument[0], candidates)
+#         return self.evaluate(argument[0], candidates)
    def evaluate_token(self, argument, candidates):
        location = argument[0]
@ -260,12 +346,16 @@ class SearchQueryParser(object):
                query = query[1:]
            try:
                if query in self.searches_seen:
-                    raise ParseException(query, len(query), 'undefined saved search', self)
+                    raise ParseException(_('Recursive saved search: {0}').format(query))
                if self.recurse_level > 5:
                    self.searches_seen.add(query)
                return self._parse(saved_searches().lookup(query), candidates)
            except ParseException as e:
                raise e
            except: # convert all exceptions (e.g., missing key) to a parse error
-                raise ParseException(query, len(query), 'undefined saved search', self)
+                import traceback
                traceback.print_exc()
                raise ParseException(_('Unknown error in saved search: {0}').format(query))
        return self._get_matches(location, query, candidates)
    def _get_matches(self, location, query, candidates):
--- a/src/calibre/web/feeds/recipes/model.py
+++ b/src/calibre/web/feeds/recipes/model.py
@ -19,7 +19,7 @@ from calibre.web.feeds.recipes.collection import \
        SchedulerConfig, download_builtin_recipe, update_custom_recipe, \
        add_custom_recipe, remove_custom_recipe, get_custom_recipe, \
        get_builtin_recipe
-from calibre.utils.pyparsing import ParseException
+from calibre.utils.search_query_parser import ParseException
 class NewsTreeItem(object):