diff --git a/resources/fts_sqlite.sql b/resources/fts_sqlite.sql index d2b0bd764f..cda94b4f11 100644 --- a/resources/fts_sqlite.sql +++ b/resources/fts_sqlite.sql @@ -19,8 +19,8 @@ CREATE TABLE fts_db.books_text ( id INTEGER PRIMARY KEY, ); -CREATE VIRTUAL TABLE fts_db.books_fts USING fts5(searchable_text, content = 'fts_db.books_text', content_rowid = 'id', tokenize = 'calibre remove_diacritics 2'); -CREATE VIRTUAL TABLE fts_db.books_fts_stemmed USING fts5(searchable_text, content = 'fts_db.books_text', content_rowid = 'id', tokenize = 'porter calibre remove_diacritics 2'); +CREATE VIRTUAL TABLE fts_db.books_fts USING fts5(searchable_text, content = 'books_text', content_rowid = 'id', tokenize = 'calibre remove_diacritics 2'); +CREATE VIRTUAL TABLE fts_db.books_fts_stemmed USING fts5(searchable_text, content = 'books_text', content_rowid = 'id', tokenize = 'porter calibre remove_diacritics 2'); CREATE TRIGGER fts_db.books_fts_insert_trg AFTER INSERT ON fts_db.books_text BEGIN diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 371ed71a5c..4efa5a18f3 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -971,7 +971,7 @@ class DB: def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg): return self.fts.commit_result(book_id, fmt, fmt_size, fmt_hash, text, err_msg) - def search(self, + def fts_search(self, fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids, ): yield from self.fts.search(fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids,) diff --git a/src/calibre/db/fts/connect.py b/src/calibre/db/fts/connect.py index 922dc68ad5..49c791fbbe 100644 --- a/src/calibre/db/fts/connect.py +++ b/src/calibre/db/fts/connect.py @@ -123,35 +123,32 @@ class FTS: fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids, ): fts_engine_query = unicode_normalize(fts_engine_query) - fts_table = 'books_fts_stemmed' if use_stemming else 'books_fts' + fts_table = 'books_fts' + ('_stemmed' if use_stemming else '') text = 'books_text.searchable_text' if highlight_start is not None and highlight_end is not None: if snippet_size is not None: - text = 'snippet({fts_table}, 0, "{highlight_start}", "{highlight_end}", "…", {snippet_size})'.format( - fts_table=fts_table, highlight_start=highlight_start, highlight_end=highlight_end, - snippet_size=max(1, min(snippet_size, 64))) + text = f'snippet({fts_table}, 0, "{highlight_start}", "{highlight_end}", "…", {max(1, min(snippet_size, 64))})' else: - text = f'highlight({fts_table}, 0, "{highlight_start}", "{highlight_end}")' + text = f'highlight("{fts_table}", 0, "{highlight_start}", "{highlight_end}")' query = 'SELECT {0}.id, {0}.book, {0}.format, {1} FROM {0} ' query = query.format('books_text', text) - query += ' JOIN {fts_table} ON books_text.id = {fts_table}.rowid'.format(fts_table=fts_table) - query += f' WHERE {fts_table} MATCH ?' + query += f' JOIN {fts_table} ON fts_db.books_text.id = {fts_table}.rowid' + query += f' WHERE "{fts_table}" MATCH ?' data = [fts_engine_query] query += f' ORDER BY {fts_table}.rank ' + conn = self.get_connection() try: - for (rowid, book_id, fmt, user_type, user, annot_data, text) in self.execute(query, tuple(data)): + for (rowid, book_id, fmt, text) in conn.execute(query, tuple(data)): if restrict_to_book_ids is not None and book_id not in restrict_to_book_ids: continue yield { 'id': rowid, 'book_id': book_id, 'format': fmt, - 'user_type': user_type, - 'user': user, 'text': text, } except apsw.SQLError as e: - raise FTSQueryError(fts_engine_query, query, e) + raise FTSQueryError(fts_engine_query, query, e) from e def shutdown(self): self.pool.shutdown() diff --git a/src/calibre/db/tests/fts_api.py b/src/calibre/db/tests/fts_api.py index 1b37137f53..45b5817ff6 100644 --- a/src/calibre/db/tests/fts_api.py +++ b/src/calibre/db/tests/fts_api.py @@ -117,6 +117,24 @@ class FTSAPITest(BaseTest): for w in workers: self.assertFalse(w.is_alive()) + def test_fts_search(self): + cache = self.new_library() + fts = cache.enable_fts() + self.wait_for_fts_to_finish(fts) + self.assertFalse(fts.all_currently_dirty()) + cache.add_format(1, 'TXT', BytesIO(b'some long text to help with testing search.')) + cache.add_format(2, 'MD', BytesIO(b'some other long text that will also help with the testing of search')) + self.assertTrue(fts.all_currently_dirty()) + self.wait_for_fts_to_finish(fts) + self.assertFalse(fts.all_currently_dirty()) + self.ae({x['id'] for x in cache.fts_search('help')}, {1, 2}) + self.ae({x['format'] for x in cache.fts_search('help')}, {'TXT', 'MD'}) + self.ae({x['id'] for x in cache.fts_search('also')}, {2}) + self.ae({x['text'] for x in cache.fts_search('also', highlight_start='[', highlight_end=']')}, { + 'some other long text that will [also] help with the testing of search'}) + self.ae({x['text'] for x in cache.fts_search('also', highlight_start='[', highlight_end=']', snippet_size=3)}, { + '…will [also] help…'}) + def test_fts_triggers(self): cache = self.init_cache() # the cache fts jobs will clear dirtied flag so disable it