This commit is contained in:
Kovid Goyal 2022-07-14 17:42:54 +05:30
commit 77bdfee72a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 49 additions and 7 deletions

View File

@ -386,7 +386,7 @@ Two variants of equality searches are used for hierarchical items (e.g., A.B.C):
*'Regular expression' searches*
Regular expression searches are indicated by prefixing the search string with a tilde (~). Any `Python-compatible regular expression <https://docs.python.org/library/re.html>`__ can be used. Backslashes used to escape special characters in regular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(``. Regular expression searches are 'contains' searches unless the expression is anchored. Character variants are significant: ``~e`` doesn't match ``é``.
Regular expression searches are indicated by prefixing the search string with a tilde (~). Any `Python-compatible regular expression <https://docs.python.org/library/re.html>`__ can be used. Backslashes used to escape special characters in regular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(`` or alternatively use `super quotes` (see below). Regular expression searches are 'contains' searches unless the expression is anchored. Character variants are significant: ``~e`` doesn't match ``é``.
*'Character variant' searches*
@ -414,6 +414,21 @@ then these character variant searches find:
* ``title:"^db"`` matches nothing
* ``title:"^,"`` matches #1 (instead of all books) because the comma is significant
*Search Expression Syntax*
A `search expression` is a sequence of `search terms` optionally separated by the operators ``and`` and ``or``. If two search terms occur without a separating operator, ``and`` is assumed. The ``and`` operator has priority over the ``or`` operator; for example the expression ``a or b and c`` is the same as ``a or (b and c)``. You can use parenthesis to change the priority; for example ``(a or b) and c`` to make the ``or`` evaluate before the ``and``. You can use the operator ``not`` to negate (invert) the result of evaluating a search expression. Examples:
* ``not tag:foo`` finds all books that don't contain the tag ``foo``
* ``not (author:Asimov or author:Weber)`` finds all books not written by either Asimov or Weber.
The above examples show examples of `search terms`. A basic `search term` is a sequence of characters not including spaces, quotes (``"``), backslashes (``\``), or parentheses (``( )``). It can be optionally preceeded by a column name specifier: the `lookup name` of a column followed by a colon (``:``), for example ``author:Asimov``. If a search term must contain a space then the entire term must be enclosed in quotes, as in ``title:"The Ring"``. If the search term must contain quotes then they must be `escaped` with backslashes. For example, to search for a series named `The "Ball" and The "Chain"`, use::
series:"The \"Ball\" and The \"Chain\"
If you need an actual backslash, something that happens frequently in `regular expression` searches, use two of them (``\\``).
It is sometimes hard to get all the escapes right so the result is what you want, especially in `regular expression` and `template` searches. In these cases use the `super-quote`: ``"""sequence of characters"""``. Super-quoted characters are used unchanged: no escape processing is done.
*More information*
To search for a string that begins with an equals, tilde, or caret; prefix the string with a backslash.
@ -524,6 +539,7 @@ Examples:
* ``template:"program: connected_device_name('main')#@#:t:kindle"`` -- is true when the ``kindle`` device is connected.
* ``template:"program: select(formats_sizes(), 'EPUB')#@#:n:>1000000"`` -- finds books with EPUB files larger than 1 MB.
* ``template:"program: select(formats_modtimes('iso'), 'EPUB')#@#:d:>10daysago"`` -- finds books with EPUB files newer than 10 days ago.
* ``template:"""program: book_count('tags:^"' & $series & '"', 0) != 0#@#:n:1"""`` -- finds all books containing the series name in the tags. This example uses super-quoting because the template uses both single quotes (``'``) and double quotes (``"``) when constructing the search expression.
You can build template search queries easily using the :guilabel:`Advanced search dialog` accessed by clicking the button |sbi|. You can test templates on specific books using the calibre :guilabel:`Template tester`, which can be added to the toolbars or menus via :guilabel:`Preferences->Toolbars & menus`. It can also be assigned a keyboard shortcut via :guilabel:`Preferences->Shortcuts`.

View File

@ -324,6 +324,14 @@ class SearchDialog(QDialog):
QDialog.__init__(self, parent)
setup_ui(self, db)
# Get metadata of some of the selected books to give to the template
# dialog to help test the template
from calibre.gui2.ui import get_gui
view = get_gui().library_view
rows = view.selectionModel().selectedRows()[0:10] # Maximum of 10 books
mi = [db.new_api.get_proxy_metadata(db.data.index_to_id(x.row())) for x in rows]
self.template_program_box.set_mi(mi)
current_tab = gprefs.get('advanced search dialog current tab', 0)
self.tab_widget.setCurrentIndex(current_tab)
if current_tab == 1:
@ -393,12 +401,13 @@ class SearchDialog(QDialog):
def template_search_string(self):
template = str(self.template_program_box.text())
value = str(self.template_value_box.text()).replace('"', '\\"')
value = str(self.template_value_box.text())
if template and value:
cb = self.template_test_type_box
op = str(cb.itemData(cb.currentIndex()))
l = f'{template}#@#:{op}:{value}'
return 'template:"' + l + '"'
# Use docstring quoting (super-quoting) to avoid problems with escaping
return 'template:"""' + l + '"""'
return ''
def date_search_string(self):

View File

@ -22,6 +22,7 @@ import weakref, re
from calibre.constants import preferred_encoding
from calibre.utils.icu import sort_key
from calibre import prints
from polyglot.binary import as_hex_unicode, from_hex_unicode
from polyglot.builtins import codepoint_to_chr
@ -150,6 +151,9 @@ class Parser:
EOF = 4
REPLACEMENTS = tuple(('\\' + x, codepoint_to_chr(i + 1)) for i, x in enumerate('\\"()'))
# the sep must be a printable character sequence that won't actually appear naturally
docstring_sep = '□ༀ؆' # Unicode white square, Tibetian Om, Arabic-Indic Cube Root
# Had to translate named constants to numeric values
lex_scanner = re.Scanner([
(r'[()]', lambda x,t: (Parser.OPCODE, t)),
@ -187,6 +191,11 @@ class Parser:
self.current_token += 1
def tokenize(self, expr):
# convert docstrings to base64 to avoid all processing. Change the docstring
# indicator to something unique with no characters special to the parser.
expr = re.sub('(""")(..*?)(""")',
lambda mo: self.docstring_sep + as_hex_unicode(mo.group(2)) + self.docstring_sep, expr)
# Strip out escaped backslashes, quotes and parens so that the
# lex scanner doesn't get confused. We put them back later.
for k, v in self.REPLACEMENTS:
@ -194,14 +203,14 @@ class Parser:
tokens = self.lex_scanner.scan(expr)[0]
def unescape(x):
# recover the docstrings
x = re.sub(f'({self.docstring_sep})(..*?)({self.docstring_sep})',
lambda mo: from_hex_unicode(mo.group(2)), x)
for k, v in self.REPLACEMENTS:
x = x.replace(v, k[1:])
return x
return [
(tt, unescape(tv) if tt in (self.WORD, self.QUOTED_WORD) else tv)
for tt, tv in tokens
]
return [(tt, unescape(tv)) for tt, tv in tokens]
def parse(self, expr, locations):
self.locations = locations

View File

@ -386,6 +386,14 @@ class TestSQP(unittest.TestCase):
t('"a \\" () b"', 'Q', 'a " () b')
t('"a“b"', 'Q', 'a“b')
t('"a”b"', 'Q', 'a”b')
# docstring tests
t(r'"""a\1b"""', 'W', r'a\1b')
t(r'("""a\1b""" AND """c""" OR d)',
'O', '(', 'W', r'a\1b', 'W', 'AND', 'W', 'c', 'W', 'OR', 'W', 'd', 'O', ')')
t(r'template:="""a\1b"""', 'W', r'template:=a\1b')
t(r'template:"""=a\1b"""', 'W', r'template:=a\1b')
t(r'template:"""program: return ("\"1\"")#@#n:1"""', 'W',
r'template:program: return ("\"1\"")#@#n:1')
def find_tests():