Efficiently search for "all notes"

This commit is contained in:
Kovid Goyal 2023-09-26 12:46:58 +05:30
parent b88c0551ad
commit 6992f86c71
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 88 additions and 7 deletions

View File

@ -1005,10 +1005,11 @@ class DB:
return self.notes.unretire(self.conn, field, item_id, item_val) return self.notes.unretire(self.conn, field, item_id, item_val)
def notes_search(self, def notes_search(self,
fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_fields, return_text, process_each_result fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_fields, return_text, process_each_result, limit
): ):
yield from self.notes.search( yield from self.notes.search(
self.conn, fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_fields, return_text, process_each_result) self.conn, fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_fields, return_text,
process_each_result, limit)
def export_notes_data(self, outfile): def export_notes_data(self, outfile):
import zipfile import zipfile

View File

@ -752,8 +752,9 @@ class Cache:
return_text=True, return_text=True,
result_type=tuple, result_type=tuple,
process_each_result=None, process_each_result=None,
limit=None,
): ):
' Search the text of notes using an FTS index ' ' Search the text of notes using an FTS index. If the query is empty return all notes. '
return result_type(self.backend.notes_search( return result_type(self.backend.notes_search(
fts_engine_query, fts_engine_query,
use_stemming=use_stemming, use_stemming=use_stemming,
@ -763,6 +764,7 @@ class Cache:
return_text=return_text, return_text=return_text,
restrict_to_fields=restrict_to_fields, restrict_to_fields=restrict_to_fields,
process_each_result=process_each_result, process_each_result=process_each_result,
limit=limit,
)) ))
# }}} # }}}

View File

@ -367,10 +367,37 @@ class Notes:
with suppress(FileNotFoundError), open(path, 'rb') as f: with suppress(FileNotFoundError), open(path, 'rb') as f:
return {'name': name, 'data': f.read(), 'hash': resource_hash} return {'name': name, 'data': f.read(), 'hash': resource_hash}
def all_notes(self, conn, restrict_to_fields=(), limit=None, snippet_size=64, return_text=True, process_each_result=None) -> list[dict]:
if snippet_size is None:
snippet_size = 64
char_size = snippet_size * 8
query = 'SELECT {0}.id, {0}.colname, {0}.item, substr({0}.searchable_text, 9, {1}) FROM {0} '.format('notes', char_size)
if restrict_to_fields:
query += ' WHERE notes_db.notes.colname IN ({})'.format(','.join(repeat('?', len(restrict_to_fields))))
query += ' ORDER BY ctime DESC'
if limit is not None:
query += f' LIMIT {limit}'
for record in conn.execute(query, tuple(restrict_to_fields)):
result = {
'id': record[0],
'field': record[1],
'item_id': record[2],
'text': record[3] if return_text else '',
}
if process_each_result is not None:
result = process_each_result(result)
ret = yield result
if ret is True:
break
def search(self, def search(self,
conn, fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_fields=(), conn, fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_fields=(),
return_text=True, process_each_result=None return_text=True, process_each_result=None, limit=None
): ):
if not fts_engine_query:
yield from self.all_notes(
conn, restrict_to_fields, limit=limit, snippet_size=snippet_size, return_text=return_text, process_each_result=process_each_result)
return
fts_engine_query = unicode_normalize(fts_engine_query) fts_engine_query = unicode_normalize(fts_engine_query)
fts_table = 'notes_fts' + ('_stemmed' if use_stemming else '') fts_table = 'notes_fts' + ('_stemmed' if use_stemming else '')
if return_text: if return_text:
@ -390,6 +417,8 @@ class Notes:
query += ' notes_db.notes.colname IN ({}) AND '.format(','.join(repeat('?', len(restrict_to_fields)))) query += ' notes_db.notes.colname IN ({}) AND '.format(','.join(repeat('?', len(restrict_to_fields))))
query += f' "{fts_table}" MATCH ?' query += f' "{fts_table}" MATCH ?'
query += f' ORDER BY {fts_table}.rank ' query += f' ORDER BY {fts_table}.rank '
if limit is not None:
query += f' LIMIT {limit}'
try: try:
for record in conn.execute(query, restrict_to_fields+(fts_engine_query,)): for record in conn.execute(query, restrict_to_fields+(fts_engine_query,)):
result = { result = {

View File

@ -171,6 +171,8 @@ def test_fts(self: 'NotesTest'):
self.ae(ids_for_search('wunderbar'), {('authors', authors[0])}) self.ae(ids_for_search('wunderbar'), {('authors', authors[0])})
self.ae(ids_for_search('common'), {('authors', authors[0]), ('authors', authors[1]), ('tags', tags[0]), ('tags', tags[1])}) self.ae(ids_for_search('common'), {('authors', authors[0]), ('authors', authors[1]), ('tags', tags[0]), ('tags', tags[1])})
self.ae(ids_for_search('common', ('tags',)), {('tags', tags[0]), ('tags', tags[1])}) self.ae(ids_for_search('common', ('tags',)), {('tags', tags[0]), ('tags', tags[1])})
self.ae(ids_for_search(''), ids_for_search('common'))
self.ae(ids_for_search('', ('tags',)), ids_for_search('common', ('tags',)))
# test that searching by item value works # test that searching by item value works
an = cache.get_item_name('authors', authors[0]) an = cache.get_item_name('authors', authors[0])

View File

@ -8,9 +8,11 @@ from qt.core import (
QToolButton, QVBoxLayout, QWidget, pyqtSignal, QToolButton, QVBoxLayout, QWidget, pyqtSignal,
) )
from calibre.db.backend import FTSQueryError
from calibre.db.cache import Cache from calibre.db.cache import Cache
from calibre.gui2 import Application, gprefs from calibre.gui2 import Application, error_dialog, gprefs
from calibre.gui2.viewer.widgets import SearchBox from calibre.gui2.viewer.widgets import ResultsDelegate, SearchBox
from calibre.gui2.widgets import BusyCursor
from calibre.gui2.widgets2 import Dialog, FlowLayout from calibre.gui2.widgets2 import Dialog, FlowLayout
@ -19,6 +21,27 @@ def current_db() -> Cache:
return (getattr(current_db, 'ans', None) or get_gui().current_db).new_api return (getattr(current_db, 'ans', None) or get_gui().current_db).new_api
class NotesResultsDelegate(ResultsDelegate):
add_ellipsis = False
emphasize_text = False
def result_data(self, result):
if not isinstance(result, dict):
return None, None, None, None, None
full_text = result['text']
parts = full_text.split('\x1d', 2)
before = after = ''
if len(parts) > 2:
before, text = parts[:2]
after = parts[2].replace('\x1d', '')
elif len(parts) == 2:
before, text = parts
else:
text = parts[0]
return False, before, text, after, False
class RestrictFields(QWidget): class RestrictFields(QWidget):
def __init__(self, parent=None): def __init__(self, parent=None):
@ -28,6 +51,7 @@ class RestrictFields(QWidget):
self.restrict_label = QLabel(_('Restrict to:')) self.restrict_label = QLabel(_('Restrict to:'))
self.restricted_fields = [] self.restricted_fields = []
self.add_button = b = QToolButton(self) self.add_button = b = QToolButton(self)
b.setToolTip(_('Add categories to which to restrict results.\nWhen no categories are specified no restriction is in effect'))
b.setIcon(QIcon.ic('plus.png')), b.setText(_('Add')), b.setToolButtonStyle(Qt.ToolButtonStyle.ToolButtonTextBesideIcon) b.setIcon(QIcon.ic('plus.png')), b.setText(_('Add')), b.setToolButtonStyle(Qt.ToolButtonStyle.ToolButtonTextBesideIcon)
b.setPopupMode(QToolButton.ToolButtonPopupMode.InstantPopup) b.setPopupMode(QToolButton.ToolButtonPopupMode.InstantPopup)
self.fields_menu = m = QMenu() self.fields_menu = m = QMenu()
@ -128,8 +152,9 @@ class SearchInput(QWidget):
@property @property
def current_query(self): def current_query(self):
return { return {
'query': self.search_box.lineEdit().text().strip(), 'fts_engine_query': self.search_box.lineEdit().text().strip(),
'restrict_to_fields': tuple(self.restrict.restricted_fields), 'restrict_to_fields': tuple(self.restrict.restricted_fields),
'use_stemming': bool(self.parent().use_stemmer.isChecked()),
} }
def cleared(self): def cleared(self):
@ -144,6 +169,8 @@ class SearchInput(QWidget):
class NotesBrowser(Dialog): class NotesBrowser(Dialog):
current_query = None
def __init__(self, parent=None): def __init__(self, parent=None):
super().__init__(_('Browse notes'), 'browse-notes-dialog', default_buttons=QDialogButtonBox.StandardButton.Close) super().__init__(_('Browse notes'), 'browse-notes-dialog', default_buttons=QDialogButtonBox.StandardButton.Close)
self.setWindowIcon(QIcon.ic('notes.png')) self.setWindowIcon(QIcon.ic('notes.png'))
@ -168,6 +195,26 @@ class NotesBrowser(Dialog):
l.addLayout(h) l.addLayout(h)
h.addWidget(us), h.addStretch(10), h.addWidget(self.bb) h.addWidget(us), h.addStretch(10), h.addWidget(self.bb)
def do_find(self, backwards=False):
q = self.search_input.current_query
if q == self.current_query:
self.results_list.show_next(backwards)
return
try:
with BusyCursor():
results = current_db().search_notes(
highlight_start='\x1d', highlight_end='\x1d', snippet_size=64, **q
)
self.results_list.set_results(results, bool(q['fts_engine_query']))
self.current_query = q
except FTSQueryError as err:
return error_dialog(self, _('Invalid search expression'), '<p>' + _(
'The search expression: {0} is invalid. The search syntax used is the'
' SQLite Full text Search Query syntax, <a href="{1}">described here</a>.').format(
err.query, 'https://www.sqlite.org/fts5.html#full_text_query_syntax'),
det_msg=str(err), show=True)
if __name__ == '__main__': if __name__ == '__main__':