diff --git a/manual/gui.rst b/manual/gui.rst index be1edd8fc9..78d768f451 100644 --- a/manual/gui.rst +++ b/manual/gui.rst @@ -386,7 +386,7 @@ Two variants of equality searches are used for hierarchical items (e.g., A.B.C): *'Regular expression' searches* -Regular expression searches are indicated by prefixing the search string with a tilde (~). Any `Python-compatible regular expression `__ can be used. Backslashes used to escape special characters in regular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(``. Regular expression searches are 'contains' searches unless the expression is anchored. Character variants are significant: ``~e`` doesn't match ``é``. +Regular expression searches are indicated by prefixing the search string with a tilde (~). Any `Python-compatible regular expression `__ can be used. Backslashes used to escape special characters in regular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(`` or alternatively use `super quotes` (see below). Regular expression searches are 'contains' searches unless the expression is anchored. Character variants are significant: ``~e`` doesn't match ``é``. *'Character variant' searches* @@ -414,6 +414,21 @@ then these character variant searches find: * ``title:"^db"`` matches nothing * ``title:"^,"`` matches #1 (instead of all books) because the comma is significant +*Search Expression Syntax* + +A `search expression` is a sequence of `search terms` optionally separated by the operators ``and`` and ``or``. If two search terms occur without a separating operator, ``and`` is assumed. The ``and`` operator has priority over the ``or`` operator; for example the expression ``a or b and c`` is the same as ``a or (b and c)``. You can use parenthesis to change the priority; for example ``(a or b) and c`` to make the ``or`` evaluate before the ``and``. You can use the operator ``not`` to negate (invert) the result of evaluating a search expression. Examples: + + * ``not tag:foo`` finds all books that don't contain the tag ``foo`` + * ``not (author:Asimov or author:Weber)`` finds all books not written by either Asimov or Weber. + +The above examples show examples of `search terms`. A basic `search term` is a sequence of characters not including spaces, quotes (``"``), backslashes (``\``), or parentheses (``( )``). It can be optionally preceeded by a column name specifier: the `lookup name` of a column followed by a colon (``:``), for example ``author:Asimov``. If a search term must contain a space then the entire term must be enclosed in quotes, as in ``title:"The Ring"``. If the search term must contain quotes then they must be `escaped` with backslashes. For example, to search for a series named `The "Ball" and The "Chain"`, use:: + + series:"The \"Ball\" and The \"Chain\" + +If you need an actual backslash, something that happens frequently in `regular expression` searches, use two of them (``\\``). + +It is sometimes hard to get all the escapes right so the result is what you want, especially in `regular expression` and `template` searches. In these cases use the `super-quote`: ``"""sequence of characters"""``. Super-quoted characters are used unchanged: no escape processing is done. + *More information* To search for a string that begins with an equals, tilde, or caret; prefix the string with a backslash. @@ -524,6 +539,7 @@ Examples: * ``template:"program: connected_device_name('main')#@#:t:kindle"`` -- is true when the ``kindle`` device is connected. * ``template:"program: select(formats_sizes(), 'EPUB')#@#:n:>1000000"`` -- finds books with EPUB files larger than 1 MB. * ``template:"program: select(formats_modtimes('iso'), 'EPUB')#@#:d:>10daysago"`` -- finds books with EPUB files newer than 10 days ago. + * ``template:"""program: book_count('tags:^"' & $series & '"', 0) != 0#@#:n:1"""`` -- finds all books containing the series name in the tags. This example uses super-quoting because the template uses both single quotes (``'``) and double quotes (``"``) when constructing the search expression. You can build template search queries easily using the :guilabel:`Advanced search dialog` accessed by clicking the button |sbi|. You can test templates on specific books using the calibre :guilabel:`Template tester`, which can be added to the toolbars or menus via :guilabel:`Preferences->Toolbars & menus`. It can also be assigned a keyboard shortcut via :guilabel:`Preferences->Shortcuts`. diff --git a/src/calibre/gui2/dialogs/search.py b/src/calibre/gui2/dialogs/search.py index 35a473cdfb..bc026e7379 100644 --- a/src/calibre/gui2/dialogs/search.py +++ b/src/calibre/gui2/dialogs/search.py @@ -324,6 +324,14 @@ class SearchDialog(QDialog): QDialog.__init__(self, parent) setup_ui(self, db) + # Get metadata of some of the selected books to give to the template + # dialog to help test the template + from calibre.gui2.ui import get_gui + view = get_gui().library_view + rows = view.selectionModel().selectedRows()[0:10] # Maximum of 10 books + mi = [db.new_api.get_proxy_metadata(db.data.index_to_id(x.row())) for x in rows] + self.template_program_box.set_mi(mi) + current_tab = gprefs.get('advanced search dialog current tab', 0) self.tab_widget.setCurrentIndex(current_tab) if current_tab == 1: @@ -393,12 +401,13 @@ class SearchDialog(QDialog): def template_search_string(self): template = str(self.template_program_box.text()) - value = str(self.template_value_box.text()).replace('"', '\\"') + value = str(self.template_value_box.text()) if template and value: cb = self.template_test_type_box op = str(cb.itemData(cb.currentIndex())) l = f'{template}#@#:{op}:{value}' - return 'template:"' + l + '"' + # Use docstring quoting (super-quoting) to avoid problems with escaping + return 'template:"""' + l + '"""' return '' def date_search_string(self): diff --git a/src/calibre/utils/search_query_parser.py b/src/calibre/utils/search_query_parser.py index c8b6442244..bac513a1e4 100644 --- a/src/calibre/utils/search_query_parser.py +++ b/src/calibre/utils/search_query_parser.py @@ -22,6 +22,7 @@ import weakref, re from calibre.constants import preferred_encoding from calibre.utils.icu import sort_key from calibre import prints +from polyglot.binary import as_hex_unicode, from_hex_unicode from polyglot.builtins import codepoint_to_chr @@ -150,6 +151,9 @@ class Parser: EOF = 4 REPLACEMENTS = tuple(('\\' + x, codepoint_to_chr(i + 1)) for i, x in enumerate('\\"()')) + # the sep must be a printable character sequence that won't actually appear naturally + docstring_sep = '□ༀ؆' # Unicode white square, Tibetian Om, Arabic-Indic Cube Root + # Had to translate named constants to numeric values lex_scanner = re.Scanner([ (r'[()]', lambda x,t: (Parser.OPCODE, t)), @@ -187,6 +191,11 @@ class Parser: self.current_token += 1 def tokenize(self, expr): + # convert docstrings to base64 to avoid all processing. Change the docstring + # indicator to something unique with no characters special to the parser. + expr = re.sub('(""")(..*?)(""")', + lambda mo: self.docstring_sep + as_hex_unicode(mo.group(2)) + self.docstring_sep, expr) + # Strip out escaped backslashes, quotes and parens so that the # lex scanner doesn't get confused. We put them back later. for k, v in self.REPLACEMENTS: @@ -194,14 +203,14 @@ class Parser: tokens = self.lex_scanner.scan(expr)[0] def unescape(x): + # recover the docstrings + x = re.sub(f'({self.docstring_sep})(..*?)({self.docstring_sep})', + lambda mo: from_hex_unicode(mo.group(2)), x) for k, v in self.REPLACEMENTS: x = x.replace(v, k[1:]) return x - return [ - (tt, unescape(tv) if tt in (self.WORD, self.QUOTED_WORD) else tv) - for tt, tv in tokens - ] + return [(tt, unescape(tv)) for tt, tv in tokens] def parse(self, expr, locations): self.locations = locations diff --git a/src/calibre/utils/search_query_parser_test.py b/src/calibre/utils/search_query_parser_test.py index abcaa4ddf1..2097fddc40 100644 --- a/src/calibre/utils/search_query_parser_test.py +++ b/src/calibre/utils/search_query_parser_test.py @@ -386,6 +386,14 @@ class TestSQP(unittest.TestCase): t('"a \\" () b"', 'Q', 'a " () b') t('"a“b"', 'Q', 'a“b') t('"a”b"', 'Q', 'a”b') + # docstring tests + t(r'"""a\1b"""', 'W', r'a\1b') + t(r'("""a\1b""" AND """c""" OR d)', + 'O', '(', 'W', r'a\1b', 'W', 'AND', 'W', 'c', 'W', 'OR', 'W', 'd', 'O', ')') + t(r'template:="""a\1b"""', 'W', r'template:=a\1b') + t(r'template:"""=a\1b"""', 'W', r'template:=a\1b') + t(r'template:"""program: return ("\"1\"")#@#n:1"""', 'W', + r'template:program: return ("\"1\"")#@#n:1') def find_tests():