mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Get FTS search API working
This commit is contained in:
parent
6f3cd9cc44
commit
4502569b90
@ -19,8 +19,8 @@ CREATE TABLE fts_db.books_text ( id INTEGER PRIMARY KEY,
|
||||
);
|
||||
|
||||
|
||||
CREATE VIRTUAL TABLE fts_db.books_fts USING fts5(searchable_text, content = 'fts_db.books_text', content_rowid = 'id', tokenize = 'calibre remove_diacritics 2');
|
||||
CREATE VIRTUAL TABLE fts_db.books_fts_stemmed USING fts5(searchable_text, content = 'fts_db.books_text', content_rowid = 'id', tokenize = 'porter calibre remove_diacritics 2');
|
||||
CREATE VIRTUAL TABLE fts_db.books_fts USING fts5(searchable_text, content = 'books_text', content_rowid = 'id', tokenize = 'calibre remove_diacritics 2');
|
||||
CREATE VIRTUAL TABLE fts_db.books_fts_stemmed USING fts5(searchable_text, content = 'books_text', content_rowid = 'id', tokenize = 'porter calibre remove_diacritics 2');
|
||||
|
||||
CREATE TRIGGER fts_db.books_fts_insert_trg AFTER INSERT ON fts_db.books_text
|
||||
BEGIN
|
||||
|
@ -971,7 +971,7 @@ class DB:
|
||||
def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg):
|
||||
return self.fts.commit_result(book_id, fmt, fmt_size, fmt_hash, text, err_msg)
|
||||
|
||||
def search(self,
|
||||
def fts_search(self,
|
||||
fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids,
|
||||
):
|
||||
yield from self.fts.search(fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids,)
|
||||
|
@ -123,35 +123,32 @@ class FTS:
|
||||
fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids,
|
||||
):
|
||||
fts_engine_query = unicode_normalize(fts_engine_query)
|
||||
fts_table = 'books_fts_stemmed' if use_stemming else 'books_fts'
|
||||
fts_table = 'books_fts' + ('_stemmed' if use_stemming else '')
|
||||
text = 'books_text.searchable_text'
|
||||
if highlight_start is not None and highlight_end is not None:
|
||||
if snippet_size is not None:
|
||||
text = 'snippet({fts_table}, 0, "{highlight_start}", "{highlight_end}", "…", {snippet_size})'.format(
|
||||
fts_table=fts_table, highlight_start=highlight_start, highlight_end=highlight_end,
|
||||
snippet_size=max(1, min(snippet_size, 64)))
|
||||
text = f'snippet({fts_table}, 0, "{highlight_start}", "{highlight_end}", "…", {max(1, min(snippet_size, 64))})'
|
||||
else:
|
||||
text = f'highlight({fts_table}, 0, "{highlight_start}", "{highlight_end}")'
|
||||
text = f'highlight("{fts_table}", 0, "{highlight_start}", "{highlight_end}")'
|
||||
query = 'SELECT {0}.id, {0}.book, {0}.format, {1} FROM {0} '
|
||||
query = query.format('books_text', text)
|
||||
query += ' JOIN {fts_table} ON books_text.id = {fts_table}.rowid'.format(fts_table=fts_table)
|
||||
query += f' WHERE {fts_table} MATCH ?'
|
||||
query += f' JOIN {fts_table} ON fts_db.books_text.id = {fts_table}.rowid'
|
||||
query += f' WHERE "{fts_table}" MATCH ?'
|
||||
data = [fts_engine_query]
|
||||
query += f' ORDER BY {fts_table}.rank '
|
||||
conn = self.get_connection()
|
||||
try:
|
||||
for (rowid, book_id, fmt, user_type, user, annot_data, text) in self.execute(query, tuple(data)):
|
||||
for (rowid, book_id, fmt, text) in conn.execute(query, tuple(data)):
|
||||
if restrict_to_book_ids is not None and book_id not in restrict_to_book_ids:
|
||||
continue
|
||||
yield {
|
||||
'id': rowid,
|
||||
'book_id': book_id,
|
||||
'format': fmt,
|
||||
'user_type': user_type,
|
||||
'user': user,
|
||||
'text': text,
|
||||
}
|
||||
except apsw.SQLError as e:
|
||||
raise FTSQueryError(fts_engine_query, query, e)
|
||||
raise FTSQueryError(fts_engine_query, query, e) from e
|
||||
|
||||
def shutdown(self):
|
||||
self.pool.shutdown()
|
||||
|
@ -117,6 +117,24 @@ class FTSAPITest(BaseTest):
|
||||
for w in workers:
|
||||
self.assertFalse(w.is_alive())
|
||||
|
||||
def test_fts_search(self):
|
||||
cache = self.new_library()
|
||||
fts = cache.enable_fts()
|
||||
self.wait_for_fts_to_finish(fts)
|
||||
self.assertFalse(fts.all_currently_dirty())
|
||||
cache.add_format(1, 'TXT', BytesIO(b'some long text to help with testing search.'))
|
||||
cache.add_format(2, 'MD', BytesIO(b'some other long text that will also help with the testing of search'))
|
||||
self.assertTrue(fts.all_currently_dirty())
|
||||
self.wait_for_fts_to_finish(fts)
|
||||
self.assertFalse(fts.all_currently_dirty())
|
||||
self.ae({x['id'] for x in cache.fts_search('help')}, {1, 2})
|
||||
self.ae({x['format'] for x in cache.fts_search('help')}, {'TXT', 'MD'})
|
||||
self.ae({x['id'] for x in cache.fts_search('also')}, {2})
|
||||
self.ae({x['text'] for x in cache.fts_search('also', highlight_start='[', highlight_end=']')}, {
|
||||
'some other long text that will [also] help with the testing of search'})
|
||||
self.ae({x['text'] for x in cache.fts_search('also', highlight_start='[', highlight_end=']', snippet_size=3)}, {
|
||||
'…will [also] help…'})
|
||||
|
||||
def test_fts_triggers(self):
|
||||
cache = self.init_cache()
|
||||
# the cache fts jobs will clear dirtied flag so disable it
|
||||
|
Loading…
x
Reference in New Issue
Block a user