Add method to re-index FTS

This commit is contained in:
Kovid Goyal 2022-05-02 16:32:54 +05:30
parent 53ae7d76cf
commit 34cf27727a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 49 additions and 15 deletions

View File

@ -1,17 +1,17 @@
CREATE TEMP TRIGGER fts_db_book_deleted_trg AFTER DELETE ON main.books BEGIN CREATE TEMP TRIGGER IF NOT EXISTS fts_db_book_deleted_trg AFTER DELETE ON main.books BEGIN
DELETE FROM books_text WHERE book=OLD.id; DELETE FROM books_text WHERE book=OLD.id;
DELETE FROM dirtied_formats WHERE book=OLD.id; DELETE FROM dirtied_formats WHERE book=OLD.id;
END; END;
CREATE TEMP TRIGGER fts_db_format_deleted_trg AFTER DELETE ON main.data BEGIN CREATE TEMP TRIGGER IF NOT EXISTS fts_db_format_deleted_trg AFTER DELETE ON main.data BEGIN
DELETE FROM books_text WHERE book=OLD.book AND format=OLD.format; DELETE FROM books_text WHERE book=OLD.book AND format=OLD.format;
DELETE FROM dirtied_formats WHERE book=OLD.book AND format=OLD.format; DELETE FROM dirtied_formats WHERE book=OLD.book AND format=OLD.format;
END; END;
CREATE TEMP TRIGGER fts_db_format_added_trg AFTER INSERT ON main.data BEGIN CREATE TEMP TRIGGER IF NOT EXISTS fts_db_format_added_trg AFTER INSERT ON main.data BEGIN
INSERT OR IGNORE INTO dirtied_formats(book, format) VALUES (NEW.book, NEW.format); INSERT OR IGNORE INTO dirtied_formats(book, format) VALUES (NEW.book, NEW.format);
END; END;
CREATE TEMP TRIGGER fts_db_format_updated_trg AFTER UPDATE ON main.data BEGIN CREATE TEMP TRIGGER IF NOT EXISTS fts_db_format_updated_trg AFTER UPDATE ON main.data BEGIN
INSERT OR IGNORE INTO dirtied_formats(book, format) VALUES (NEW.book, NEW.format); INSERT OR IGNORE INTO dirtied_formats(book, format) VALUES (NEW.book, NEW.format);
END; END;

View File

@ -933,6 +933,7 @@ class DB:
return return
from .fts.connect import FTS from .fts.connect import FTS
self.fts = FTS(dbref) self.fts = FTS(dbref)
return self.fts
def enable_fts(self, dbref=None): def enable_fts(self, dbref=None):
enabled = dbref is not None enabled = dbref is not None
@ -962,6 +963,12 @@ class DB:
def get_next_fts_job(self): def get_next_fts_job(self):
return self.fts.get_next_fts_job() return self.fts.get_next_fts_job()
def reindex_fts(self):
if self.conn.fts_dbpath:
self.conn.execute('DETACH fts_db')
os.remove(self.conn.fts_dbpath)
self.conn.fts_dbpath = None
def remove_dirty_fts(self, book_id, fmt): def remove_dirty_fts(self, book_id, fmt):
return self.fts.remove_dirty(book_id, fmt) return self.fts.remove_dirty(book_id, fmt)

View File

@ -431,9 +431,10 @@ class Cache:
def initialize_fts(self): def initialize_fts(self):
self.fts_queue_thread = None self.fts_queue_thread = None
self.fts_job_queue = Queue() self.fts_job_queue = Queue()
self.backend.initialize_fts(weakref.ref(self)) fts = self.backend.initialize_fts(weakref.ref(self))
if self.is_fts_enabled(): if self.is_fts_enabled():
self.start_fts_pool() self.start_fts_pool()
return fts
def start_fts_pool(self): def start_fts_pool(self):
from threading import Thread from threading import Thread
@ -531,6 +532,19 @@ class Cache:
def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg): def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg):
return self.backend.commit_fts_result(book_id, fmt, fmt_size, fmt_hash, text, err_msg) return self.backend.commit_fts_result(book_id, fmt, fmt_size, fmt_hash, text, err_msg)
@api
def reindex_fts(self):
if not self.is_fts_enabled():
return
with self.write_lock:
self._shutdown_fts()
self._shutdown_fts(stage=2)
with self.write_lock:
self.backend.reindex_fts()
fts = self.initialize_fts()
self._queue_next_fts_job()
return fts
@api @api
def set_fts_num_of_workers(self, num=None): def set_fts_num_of_workers(self, num=None):
existing = self.backend.fts_num_of_workers existing = self.backend.fts_num_of_workers
@ -2381,6 +2395,19 @@ class Cache:
def __del__(self): def __del__(self):
self.close() self.close()
def _shutdown_fts(self, stage=1):
if stage == 1:
self.backend.shutdown_fts()
if self.fts_queue_thread is not None:
self.fts_job_queue.put(None)
return
# the fts supervisor thread could be in the middle of committing a
# result to the db, so holding a lock here will cause a deadlock
if self.fts_queue_thread is not None:
self.fts_queue_thread.join()
self.fts_queue_thread = None
self.backend.join_fts()
@api @api
def close(self): def close(self):
with self.write_lock: with self.write_lock:
@ -2389,9 +2416,7 @@ class Cache:
self.close_called = True self.close_called = True
self.shutting_down = True self.shutting_down = True
self.event_dispatcher.close() self.event_dispatcher.close()
self.backend.shutdown_fts() self._shutdown_fts()
if self.fts_queue_thread is not None:
self.fts_job_queue.put(None)
from calibre.customize.ui import available_library_closed_plugins from calibre.customize.ui import available_library_closed_plugins
for plugin in available_library_closed_plugins(): for plugin in available_library_closed_plugins():
try: try:
@ -2399,12 +2424,7 @@ class Cache:
except Exception: except Exception:
import traceback import traceback
traceback.print_exc() traceback.print_exc()
# the fts supervisor thread could be in the middle of committing a self._shutdown_fts(stage=2)
# result to the db, so holding a lock here will cause a deadlock
if self.fts_queue_thread is not None:
self.fts_queue_thread.join()
self.fts_queue_thread = None
self.backend.join_fts()
with self.write_lock: with self.write_lock:
self.backend.close() self.backend.close()

View File

@ -206,7 +206,8 @@ class Pool:
self.initialized.clear() self.initialized.clear()
def join(self): def join(self):
self.supervisor_thread.join() with suppress(AttributeError):
self.supervisor_thread.join()
for w in self.workers: for w in self.workers:
w.join() w.join()
self.workers = [] self.workers = []

View File

@ -143,6 +143,12 @@ class FTSAPITest(BaseTest):
'some other long text that will [also] help with the testing of search'}) 'some other long text that will [also] help with the testing of search'})
self.ae({x['text'] for x in cache.fts_search('also', highlight_start='[', highlight_end=']', snippet_size=3)}, { self.ae({x['text'] for x in cache.fts_search('also', highlight_start='[', highlight_end=']', snippet_size=3)}, {
'…will [also] help…'}) '…will [also] help…'})
fts = cache.reindex_fts()
self.wait_for_fts_to_finish(fts)
self.assertFalse(fts.all_currently_dirty())
self.ae({x['id'] for x in cache.fts_search('help')}, {1, 2})
cache.remove_books((1,))
self.ae({x['id'] for x in cache.fts_search('help')}, {2})
cache.close() cache.close()
def test_fts_triggers(self): def test_fts_triggers(self):