From 4f90074eebbc42a245d13a2ba51d508ebd7c49fc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Feb 2022 14:58:00 +0530 Subject: [PATCH] More fts indexing tests --- src/calibre/db/fts/pool.py | 23 ++++++++++--------- src/calibre/db/tests/fts_api.py | 40 ++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/src/calibre/db/fts/pool.py b/src/calibre/db/fts/pool.py index 2a27f96aeb..6ec411be43 100644 --- a/src/calibre/db/fts/pool.py +++ b/src/calibre/db/fts/pool.py @@ -119,17 +119,6 @@ class Pool: self.expand_workers() self.initialized = True - def shutdown(self): - if self.initialized: - self.initialized = False - self.supervise_queue.put(quit) - for w in self.workers: - w.keep_going = False - self.jobs_queue.put(quit) - self.supervisor_thread.join() - for w in self.workers: - w.join() - def prune_dead_workers(self): self.workers = [w for w in self.workers if w.is_alive()] @@ -190,6 +179,18 @@ class Pool: db = self.dbref() if db is not None: db.commit_fts_result(result.book_id, result.fmt, result.fmt_size, result.fmt_hash, text) + + def shutdown(self): + if self.initialized: + self.initialized = False + self.supervise_queue.put(quit) + for w in self.workers: + w.keep_going = False + self.jobs_queue.put(quit) + self.supervisor_thread.join() + for w in self.workers: + w.join() + self.workers = [] # }}} def do_check_for_work(self): diff --git a/src/calibre/db/tests/fts_api.py b/src/calibre/db/tests/fts_api.py index a6e4094f09..f6fad37269 100644 --- a/src/calibre/db/tests/fts_api.py +++ b/src/calibre/db/tests/fts_api.py @@ -3,9 +3,12 @@ import builtins +import os +import shutil import sys import time from io import BytesIO +from zipfile import ZipFile from calibre.db.fts.text import html_to_text from calibre.db.tests.base import BaseTest @@ -29,6 +32,13 @@ class FTSAPITest(BaseTest): from calibre_extensions.sqlite_extension import set_ui_language set_ui_language('en') + def new_library(self): + if os.path.exists(self.library_path): + shutil.rmtree(self.library_path) + os.makedirs(self.library_path) + self.create_db(self.library_path) + return self.init_cache() + def wait_for_fts_to_finish(self, fts, timeout=10): if fts.pool.initialized: st = time.monotonic() @@ -38,8 +48,17 @@ class FTSAPITest(BaseTest): def text_records(self, fts): return fts.get_connection().get_dict('SELECT * FROM fts_db.books_text') + def make_txtz(self, txt, **extra): + buf = BytesIO() + with ZipFile(buf, mode='w') as zf: + zf.writestr('index.txt', txt) + for key, val in extra.items(): + zf.writestr(key, val) + buf.seek(0) + return buf + def test_fts_pool(self): - cache = self.init_cache() + cache = self.new_library() fts = cache.enable_fts() self.wait_for_fts_to_finish(fts) self.assertFalse(fts.all_currently_dirty()) @@ -63,6 +82,25 @@ class FTSAPITest(BaseTest): cache.add_format(1, 'TXT', BytesIO(b'a test text2')) self.wait_for_fts_to_finish(fts) check(id=2, book=1, format='TXT', searchable_text='a test text2') + # check closing shuts down all workers + cache.close() + self.assertFalse(fts.pool.initialized) + + # TODO: check shutdown when worker hangs + # TODO: add a max scan time and check that the worker honors it + # TODO: Add a column to store failures with tracebacks in the books_text table + + # check enabling scans pre-exisintg + cache = self.new_library() + cache.add_format(1, 'TXTZ', self.make_txtz(b'a test text')) + fts = cache.enable_fts() + self.wait_for_fts_to_finish(fts) + check(id=1, book=1, format='TXTZ', searchable_text='a test text') + # check changing the format but not the text doesnt cause a rescan + cache.add_format(1, 'TXTZ', self.make_txtz(b'a test text', extra='xxx')) + self.wait_for_fts_to_finish(fts) + check(id=1, book=1, format='TXTZ', searchable_text='a test text') + cache.close() def test_fts_triggers(self): cache = self.init_cache()