More fts indexing tests

This commit is contained in:
Kovid Goyal 2022-02-19 14:58:00 +05:30
parent 81a0f1b386
commit 4f90074eeb
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 51 additions and 12 deletions

View File

@ -119,17 +119,6 @@ class Pool:
self.expand_workers() self.expand_workers()
self.initialized = True self.initialized = True
def shutdown(self):
if self.initialized:
self.initialized = False
self.supervise_queue.put(quit)
for w in self.workers:
w.keep_going = False
self.jobs_queue.put(quit)
self.supervisor_thread.join()
for w in self.workers:
w.join()
def prune_dead_workers(self): def prune_dead_workers(self):
self.workers = [w for w in self.workers if w.is_alive()] self.workers = [w for w in self.workers if w.is_alive()]
@ -190,6 +179,18 @@ class Pool:
db = self.dbref() db = self.dbref()
if db is not None: if db is not None:
db.commit_fts_result(result.book_id, result.fmt, result.fmt_size, result.fmt_hash, text) db.commit_fts_result(result.book_id, result.fmt, result.fmt_size, result.fmt_hash, text)
def shutdown(self):
if self.initialized:
self.initialized = False
self.supervise_queue.put(quit)
for w in self.workers:
w.keep_going = False
self.jobs_queue.put(quit)
self.supervisor_thread.join()
for w in self.workers:
w.join()
self.workers = []
# }}} # }}}
def do_check_for_work(self): def do_check_for_work(self):

View File

@ -3,9 +3,12 @@
import builtins import builtins
import os
import shutil
import sys import sys
import time import time
from io import BytesIO from io import BytesIO
from zipfile import ZipFile
from calibre.db.fts.text import html_to_text from calibre.db.fts.text import html_to_text
from calibre.db.tests.base import BaseTest from calibre.db.tests.base import BaseTest
@ -29,6 +32,13 @@ class FTSAPITest(BaseTest):
from calibre_extensions.sqlite_extension import set_ui_language from calibre_extensions.sqlite_extension import set_ui_language
set_ui_language('en') set_ui_language('en')
def new_library(self):
if os.path.exists(self.library_path):
shutil.rmtree(self.library_path)
os.makedirs(self.library_path)
self.create_db(self.library_path)
return self.init_cache()
def wait_for_fts_to_finish(self, fts, timeout=10): def wait_for_fts_to_finish(self, fts, timeout=10):
if fts.pool.initialized: if fts.pool.initialized:
st = time.monotonic() st = time.monotonic()
@ -38,8 +48,17 @@ class FTSAPITest(BaseTest):
def text_records(self, fts): def text_records(self, fts):
return fts.get_connection().get_dict('SELECT * FROM fts_db.books_text') return fts.get_connection().get_dict('SELECT * FROM fts_db.books_text')
def make_txtz(self, txt, **extra):
buf = BytesIO()
with ZipFile(buf, mode='w') as zf:
zf.writestr('index.txt', txt)
for key, val in extra.items():
zf.writestr(key, val)
buf.seek(0)
return buf
def test_fts_pool(self): def test_fts_pool(self):
cache = self.init_cache() cache = self.new_library()
fts = cache.enable_fts() fts = cache.enable_fts()
self.wait_for_fts_to_finish(fts) self.wait_for_fts_to_finish(fts)
self.assertFalse(fts.all_currently_dirty()) self.assertFalse(fts.all_currently_dirty())
@ -63,6 +82,25 @@ class FTSAPITest(BaseTest):
cache.add_format(1, 'TXT', BytesIO(b'a test text2')) cache.add_format(1, 'TXT', BytesIO(b'a test text2'))
self.wait_for_fts_to_finish(fts) self.wait_for_fts_to_finish(fts)
check(id=2, book=1, format='TXT', searchable_text='a test text2') check(id=2, book=1, format='TXT', searchable_text='a test text2')
# check closing shuts down all workers
cache.close()
self.assertFalse(fts.pool.initialized)
# TODO: check shutdown when worker hangs
# TODO: add a max scan time and check that the worker honors it
# TODO: Add a column to store failures with tracebacks in the books_text table
# check enabling scans pre-exisintg
cache = self.new_library()
cache.add_format(1, 'TXTZ', self.make_txtz(b'a test text'))
fts = cache.enable_fts()
self.wait_for_fts_to_finish(fts)
check(id=1, book=1, format='TXTZ', searchable_text='a test text')
# check changing the format but not the text doesnt cause a rescan
cache.add_format(1, 'TXTZ', self.make_txtz(b'a test text', extra='xxx'))
self.wait_for_fts_to_finish(fts)
check(id=1, book=1, format='TXTZ', searchable_text='a test text')
cache.close()
def test_fts_triggers(self): def test_fts_triggers(self):
cache = self.init_cache() cache = self.init_cache()