mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/cbhaley/calibre
This commit is contained in:
commit
77bdfee72a
@ -386,7 +386,7 @@ Two variants of equality searches are used for hierarchical items (e.g., A.B.C):
|
|||||||
|
|
||||||
*'Regular expression' searches*
|
*'Regular expression' searches*
|
||||||
|
|
||||||
Regular expression searches are indicated by prefixing the search string with a tilde (~). Any `Python-compatible regular expression <https://docs.python.org/library/re.html>`__ can be used. Backslashes used to escape special characters in regular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(``. Regular expression searches are 'contains' searches unless the expression is anchored. Character variants are significant: ``~e`` doesn't match ``é``.
|
Regular expression searches are indicated by prefixing the search string with a tilde (~). Any `Python-compatible regular expression <https://docs.python.org/library/re.html>`__ can be used. Backslashes used to escape special characters in regular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(`` or alternatively use `super quotes` (see below). Regular expression searches are 'contains' searches unless the expression is anchored. Character variants are significant: ``~e`` doesn't match ``é``.
|
||||||
|
|
||||||
*'Character variant' searches*
|
*'Character variant' searches*
|
||||||
|
|
||||||
@ -414,6 +414,21 @@ then these character variant searches find:
|
|||||||
* ``title:"^db"`` matches nothing
|
* ``title:"^db"`` matches nothing
|
||||||
* ``title:"^,"`` matches #1 (instead of all books) because the comma is significant
|
* ``title:"^,"`` matches #1 (instead of all books) because the comma is significant
|
||||||
|
|
||||||
|
*Search Expression Syntax*
|
||||||
|
|
||||||
|
A `search expression` is a sequence of `search terms` optionally separated by the operators ``and`` and ``or``. If two search terms occur without a separating operator, ``and`` is assumed. The ``and`` operator has priority over the ``or`` operator; for example the expression ``a or b and c`` is the same as ``a or (b and c)``. You can use parenthesis to change the priority; for example ``(a or b) and c`` to make the ``or`` evaluate before the ``and``. You can use the operator ``not`` to negate (invert) the result of evaluating a search expression. Examples:
|
||||||
|
|
||||||
|
* ``not tag:foo`` finds all books that don't contain the tag ``foo``
|
||||||
|
* ``not (author:Asimov or author:Weber)`` finds all books not written by either Asimov or Weber.
|
||||||
|
|
||||||
|
The above examples show examples of `search terms`. A basic `search term` is a sequence of characters not including spaces, quotes (``"``), backslashes (``\``), or parentheses (``( )``). It can be optionally preceeded by a column name specifier: the `lookup name` of a column followed by a colon (``:``), for example ``author:Asimov``. If a search term must contain a space then the entire term must be enclosed in quotes, as in ``title:"The Ring"``. If the search term must contain quotes then they must be `escaped` with backslashes. For example, to search for a series named `The "Ball" and The "Chain"`, use::
|
||||||
|
|
||||||
|
series:"The \"Ball\" and The \"Chain\"
|
||||||
|
|
||||||
|
If you need an actual backslash, something that happens frequently in `regular expression` searches, use two of them (``\\``).
|
||||||
|
|
||||||
|
It is sometimes hard to get all the escapes right so the result is what you want, especially in `regular expression` and `template` searches. In these cases use the `super-quote`: ``"""sequence of characters"""``. Super-quoted characters are used unchanged: no escape processing is done.
|
||||||
|
|
||||||
*More information*
|
*More information*
|
||||||
|
|
||||||
To search for a string that begins with an equals, tilde, or caret; prefix the string with a backslash.
|
To search for a string that begins with an equals, tilde, or caret; prefix the string with a backslash.
|
||||||
@ -524,6 +539,7 @@ Examples:
|
|||||||
* ``template:"program: connected_device_name('main')#@#:t:kindle"`` -- is true when the ``kindle`` device is connected.
|
* ``template:"program: connected_device_name('main')#@#:t:kindle"`` -- is true when the ``kindle`` device is connected.
|
||||||
* ``template:"program: select(formats_sizes(), 'EPUB')#@#:n:>1000000"`` -- finds books with EPUB files larger than 1 MB.
|
* ``template:"program: select(formats_sizes(), 'EPUB')#@#:n:>1000000"`` -- finds books with EPUB files larger than 1 MB.
|
||||||
* ``template:"program: select(formats_modtimes('iso'), 'EPUB')#@#:d:>10daysago"`` -- finds books with EPUB files newer than 10 days ago.
|
* ``template:"program: select(formats_modtimes('iso'), 'EPUB')#@#:d:>10daysago"`` -- finds books with EPUB files newer than 10 days ago.
|
||||||
|
* ``template:"""program: book_count('tags:^"' & $series & '"', 0) != 0#@#:n:1"""`` -- finds all books containing the series name in the tags. This example uses super-quoting because the template uses both single quotes (``'``) and double quotes (``"``) when constructing the search expression.
|
||||||
|
|
||||||
You can build template search queries easily using the :guilabel:`Advanced search dialog` accessed by clicking the button |sbi|. You can test templates on specific books using the calibre :guilabel:`Template tester`, which can be added to the toolbars or menus via :guilabel:`Preferences->Toolbars & menus`. It can also be assigned a keyboard shortcut via :guilabel:`Preferences->Shortcuts`.
|
You can build template search queries easily using the :guilabel:`Advanced search dialog` accessed by clicking the button |sbi|. You can test templates on specific books using the calibre :guilabel:`Template tester`, which can be added to the toolbars or menus via :guilabel:`Preferences->Toolbars & menus`. It can also be assigned a keyboard shortcut via :guilabel:`Preferences->Shortcuts`.
|
||||||
|
|
||||||
|
@ -324,6 +324,14 @@ class SearchDialog(QDialog):
|
|||||||
QDialog.__init__(self, parent)
|
QDialog.__init__(self, parent)
|
||||||
setup_ui(self, db)
|
setup_ui(self, db)
|
||||||
|
|
||||||
|
# Get metadata of some of the selected books to give to the template
|
||||||
|
# dialog to help test the template
|
||||||
|
from calibre.gui2.ui import get_gui
|
||||||
|
view = get_gui().library_view
|
||||||
|
rows = view.selectionModel().selectedRows()[0:10] # Maximum of 10 books
|
||||||
|
mi = [db.new_api.get_proxy_metadata(db.data.index_to_id(x.row())) for x in rows]
|
||||||
|
self.template_program_box.set_mi(mi)
|
||||||
|
|
||||||
current_tab = gprefs.get('advanced search dialog current tab', 0)
|
current_tab = gprefs.get('advanced search dialog current tab', 0)
|
||||||
self.tab_widget.setCurrentIndex(current_tab)
|
self.tab_widget.setCurrentIndex(current_tab)
|
||||||
if current_tab == 1:
|
if current_tab == 1:
|
||||||
@ -393,12 +401,13 @@ class SearchDialog(QDialog):
|
|||||||
|
|
||||||
def template_search_string(self):
|
def template_search_string(self):
|
||||||
template = str(self.template_program_box.text())
|
template = str(self.template_program_box.text())
|
||||||
value = str(self.template_value_box.text()).replace('"', '\\"')
|
value = str(self.template_value_box.text())
|
||||||
if template and value:
|
if template and value:
|
||||||
cb = self.template_test_type_box
|
cb = self.template_test_type_box
|
||||||
op = str(cb.itemData(cb.currentIndex()))
|
op = str(cb.itemData(cb.currentIndex()))
|
||||||
l = f'{template}#@#:{op}:{value}'
|
l = f'{template}#@#:{op}:{value}'
|
||||||
return 'template:"' + l + '"'
|
# Use docstring quoting (super-quoting) to avoid problems with escaping
|
||||||
|
return 'template:"""' + l + '"""'
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def date_search_string(self):
|
def date_search_string(self):
|
||||||
|
@ -22,6 +22,7 @@ import weakref, re
|
|||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
|
from polyglot.binary import as_hex_unicode, from_hex_unicode
|
||||||
from polyglot.builtins import codepoint_to_chr
|
from polyglot.builtins import codepoint_to_chr
|
||||||
|
|
||||||
|
|
||||||
@ -150,6 +151,9 @@ class Parser:
|
|||||||
EOF = 4
|
EOF = 4
|
||||||
REPLACEMENTS = tuple(('\\' + x, codepoint_to_chr(i + 1)) for i, x in enumerate('\\"()'))
|
REPLACEMENTS = tuple(('\\' + x, codepoint_to_chr(i + 1)) for i, x in enumerate('\\"()'))
|
||||||
|
|
||||||
|
# the sep must be a printable character sequence that won't actually appear naturally
|
||||||
|
docstring_sep = '□ༀ؆' # Unicode white square, Tibetian Om, Arabic-Indic Cube Root
|
||||||
|
|
||||||
# Had to translate named constants to numeric values
|
# Had to translate named constants to numeric values
|
||||||
lex_scanner = re.Scanner([
|
lex_scanner = re.Scanner([
|
||||||
(r'[()]', lambda x,t: (Parser.OPCODE, t)),
|
(r'[()]', lambda x,t: (Parser.OPCODE, t)),
|
||||||
@ -187,6 +191,11 @@ class Parser:
|
|||||||
self.current_token += 1
|
self.current_token += 1
|
||||||
|
|
||||||
def tokenize(self, expr):
|
def tokenize(self, expr):
|
||||||
|
# convert docstrings to base64 to avoid all processing. Change the docstring
|
||||||
|
# indicator to something unique with no characters special to the parser.
|
||||||
|
expr = re.sub('(""")(..*?)(""")',
|
||||||
|
lambda mo: self.docstring_sep + as_hex_unicode(mo.group(2)) + self.docstring_sep, expr)
|
||||||
|
|
||||||
# Strip out escaped backslashes, quotes and parens so that the
|
# Strip out escaped backslashes, quotes and parens so that the
|
||||||
# lex scanner doesn't get confused. We put them back later.
|
# lex scanner doesn't get confused. We put them back later.
|
||||||
for k, v in self.REPLACEMENTS:
|
for k, v in self.REPLACEMENTS:
|
||||||
@ -194,14 +203,14 @@ class Parser:
|
|||||||
tokens = self.lex_scanner.scan(expr)[0]
|
tokens = self.lex_scanner.scan(expr)[0]
|
||||||
|
|
||||||
def unescape(x):
|
def unescape(x):
|
||||||
|
# recover the docstrings
|
||||||
|
x = re.sub(f'({self.docstring_sep})(..*?)({self.docstring_sep})',
|
||||||
|
lambda mo: from_hex_unicode(mo.group(2)), x)
|
||||||
for k, v in self.REPLACEMENTS:
|
for k, v in self.REPLACEMENTS:
|
||||||
x = x.replace(v, k[1:])
|
x = x.replace(v, k[1:])
|
||||||
return x
|
return x
|
||||||
|
|
||||||
return [
|
return [(tt, unescape(tv)) for tt, tv in tokens]
|
||||||
(tt, unescape(tv) if tt in (self.WORD, self.QUOTED_WORD) else tv)
|
|
||||||
for tt, tv in tokens
|
|
||||||
]
|
|
||||||
|
|
||||||
def parse(self, expr, locations):
|
def parse(self, expr, locations):
|
||||||
self.locations = locations
|
self.locations = locations
|
||||||
|
@ -386,6 +386,14 @@ class TestSQP(unittest.TestCase):
|
|||||||
t('"a \\" () b"', 'Q', 'a " () b')
|
t('"a \\" () b"', 'Q', 'a " () b')
|
||||||
t('"a“b"', 'Q', 'a“b')
|
t('"a“b"', 'Q', 'a“b')
|
||||||
t('"a”b"', 'Q', 'a”b')
|
t('"a”b"', 'Q', 'a”b')
|
||||||
|
# docstring tests
|
||||||
|
t(r'"""a\1b"""', 'W', r'a\1b')
|
||||||
|
t(r'("""a\1b""" AND """c""" OR d)',
|
||||||
|
'O', '(', 'W', r'a\1b', 'W', 'AND', 'W', 'c', 'W', 'OR', 'W', 'd', 'O', ')')
|
||||||
|
t(r'template:="""a\1b"""', 'W', r'template:=a\1b')
|
||||||
|
t(r'template:"""=a\1b"""', 'W', r'template:=a\1b')
|
||||||
|
t(r'template:"""program: return ("\"1\"")#@#n:1"""', 'W',
|
||||||
|
r'template:program: return ("\"1\"")#@#n:1')
|
||||||
|
|
||||||
|
|
||||||
def find_tests():
|
def find_tests():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user