Start work on fts

This commit is contained in:
Kovid Goyal 2022-01-30 18:59:17 +05:30
parent 380b5b4300
commit ba1707169a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
6 changed files with 116 additions and 1 deletions

36
resources/fts_sqlite.sql Normal file
View File

@ -0,0 +1,36 @@
CREATE TABLE fts_db.books_text ( id INTEGER PRIMARY KEY,
book INTEGER NOT NULL,
format TEXT NOT NULL COLLATE NOCASE,
timestamp REAL NOT NULL,
format_hash TEXT NOT NULL COLLATE NOCASE,
format_size INTEGER NOT NULL,
text_hash TEXT NOT NULL COLLATE NOCASE,
searchable_text TEXT NOT NULL DEFAULT "",
UNIQUE(book, format)
);
CREATE VIRTUAL TABLE fts_db.books_fts USING fts5(searchable_text, content = 'fts_db.books_text', content_rowid = 'id', tokenize = 'calibre remove_diacritics 2');
CREATE VIRTUAL TABLE fts_db.books_fts_stemmed USING fts5(searchable_text, content = 'fts_db.books_text', content_rowid = 'id', tokenize = 'porter calibre remove_diacritics 2');
CREATE TRIGGER fts_db.books_fts_insert_trg AFTER INSERT ON fts_db.books_text
BEGIN
INSERT INTO fts_db.books_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
INSERT INTO fts_db.books_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
END;
CREATE TRIGGER fts_db.books_fts_delete_trg AFTER DELETE ON fts_db.books_text
BEGIN
INSERT INTO fts_db.books_fts(fts_db.books_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
INSERT INTO fts_db.books_fts_stemmed(fts_db.books_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
END;
CREATE TRIGGER fts_db.books_fts_update_trg AFTER UPDATE ON fts_db.books_text
BEGIN
INSERT INTO fts_db.books_fts(fts_db.books_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
INSERT INTO fts_db.books_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
INSERT INTO fts_db.books_fts_stemmed(fts_db.books_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
INSERT INTO fts_db.books_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
END;
PRAGMA fts_db.user_version=1;

View File

@ -492,6 +492,7 @@ class DB:
self.initialize_prefs(default_prefs, restore_all_prefs, progress_callback)
self.initialize_custom_columns()
self.initialize_tables()
self.initialize_fts()
self.set_user_template_functions(compile_user_template_functions(
self.prefs.get('user_template_functions', [])))
if load_user_formatter_functions:
@ -564,6 +565,7 @@ class DB:
defs['cover_browser_subtitle_field'] = 'rating'
defs['styled_columns'] = {}
defs['edit_metadata_ignore_display_order'] = False
defs['fts_enabled'] = False
# Migrate the bool tristate tweak
defs['bools_are_tristate'] = \
@ -919,6 +921,16 @@ class DB:
# }}}
def initialize_fts(self):
self.fts = None
if not self.prefs['fts_enabled']:
return
from .fts.connect import FTS
self.fts = FTS(self.get_connection)
def get_connection(self):
return self.conn
@property
def conn(self):
if self._conn is None:
@ -1224,6 +1236,7 @@ class DB:
cur.execute(metadata_sqlite)
except:
cur.execute('ROLLBACK')
raise
else:
cur.execute('COMMIT')
if self.user_version == 0:

View File

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2022, Kovid Goyal <kovid at kovidgoyal.net>
import os
from .schema_upgrade import SchemaUpgrade
# TODO: db dump+restore
# TODO: calibre export/import
# TODO: check library and vacuuming of fts db
class FTS:
def __init__(self, get_connection):
self.get_connection = get_connection
conn = self.get_connection()
main_db_path = os.path.abspath(conn.db_filename('main'))
self.dbpath = os.path.join(os.path.dirname(main_db_path), 'full-text-search.db')
conn.execute(f'ATTACH DATABASE "{self.dbpath}" AS fts_db')
fts_sqlite = P('fts_sqlite.sql', data=True, allow_user_override=False).decode('utf-8')
cur = self.conn.cursor()
cur.execute('BEGIN EXCLUSIVE TRANSACTION')
try:
cur.execute(fts_sqlite)
except (Exception, BaseException):
cur.execute('ROLLBACK')
raise
else:
cur.execute('COMMIT')
SchemaUpgrade(conn)

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2022, Kovid Goyal <kovid at kovidgoyal.net>
class SchemaUpgrade:
def __init__(self, conn):
self.conn = conn
conn.execute('BEGIN EXCLUSIVE TRANSACTION')
try:
while True:
uv = self.user_version
meth = getattr(self, f'upgrade_version_{uv}', None)
if meth is None:
break
print(f'Upgrading FTS database to version {uv+1}...')
meth()
self.user_version = uv + 1
except (Exception, BaseException):
conn.execute('ROLLBACK')
raise
else:
conn.execute('COMMIT')
self.conn = None
@property
def user_version(self):
return self.conn.get('PRAGMA fts_db.user_version', all=False) or 0
@user_version.setter
def user_version(self, val):
self.conn.execute(f'PRAGMA fts_db.user_version={val}')

View File

@ -14,7 +14,7 @@ from polyglot.builtins import iteritems
EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS)
NORMALS = frozenset({'metadata.opf', 'cover.jpg'})
IGNORE_AT_TOP_LEVEL = frozenset({'metadata.db', 'metadata_db_prefs_backup.json', 'metadata_pre_restore.db'})
IGNORE_AT_TOP_LEVEL = frozenset({'metadata.db', 'metadata_db_prefs_backup.json', 'metadata_pre_restore.db', 'full-text-search.db'})
'''
Checks fields: